| from huggingface_hub import list_models, model_info |
| from datetime import datetime |
| from datasets import Dataset, load_dataset |
| import pandas as pd |
| import os |
| import globals |
| from typing import List, Tuple |
|
|
|
|
| def get_models_providers() -> List[Tuple[str, List[str]]]: |
| """Get list of popular text generation models and associated providers from Hugging Face""" |
| models = list_models( |
| filter="text-generation", |
| sort="likes", |
| direction=-1, |
| limit=globals.NUM_MODELS_RUN, |
| expand="inferenceProviderMapping" |
| ) |
|
|
| model_providers = [ |
| (model.id, [p.provider for p in model.inference_provider_mapping]) |
| for model in models |
| if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping |
| ] |
| return model_providers |
|
|
|
|
| def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
| """Initialize the models_providers.txt file with popular models and their providers.""" |
| model_to_providers = get_models_providers() |
|
|
| with open(file_path, 'w') as f: |
| f.write("# Models and Providers Configuration\n") |
| f.write("# Format: model_name provider_name\n") |
| f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") |
|
|
| count = 0 |
| for (model_id, providers) in model_to_providers: |
| try: |
| for provider in providers: |
| f.write(f"{model_id} {provider}\n") |
| count += 1 |
| except Exception as e: |
| print(f"Error processing model {model_id}: {e}") |
| continue |
|
|
| print(f"Successfully wrote {count} model-provider combinations to {file_path}") |
|
|
| return f"Initialized {count} model-provider combinations", load_models_providers_str() |
|
|
|
|
| def load_models_providers_str(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
| mp_list = load_models_providers(file_path) |
|
|
| return "\n".join([f"{model} : {provider}" for (model, provider) in mp_list]) |
|
|
|
|
| def load_models_providers(file_path: str = globals.LOCAL_CONFIG_FILE) -> List[Tuple[str, str]]: |
| """Load models and providers from text file. Creates file if it doesn't exist.""" |
| models_providers = [] |
| try: |
| |
| if not os.path.exists(file_path): |
| print(f"Config file {file_path} not found. Initializing...") |
| initialize_models_providers_file(file_path) |
|
|
| with open(file_path, 'r') as f: |
| for line in f: |
| line = line.strip() |
| |
| if line and not line.startswith('#'): |
| parts = line.split() |
| if len(parts) >= 2: |
| model = parts[0] |
| provider = parts[1] |
| models_providers.append((model, provider)) |
| except Exception as e: |
| print(f"Error loading model providers: {str(e)}") |
| import traceback |
| traceback.print_exc() |
| return models_providers |
|
|
|
|
| def save_results() -> None: |
| """Persist job results to HuggingFace dataset.""" |
| try: |
| if not globals.job_results: |
| print("No results to save") |
| return |
|
|
| records = list(globals.job_results.values()) |
| df = pd.DataFrame(records) |
| dataset = Dataset.from_pandas(df) |
|
|
| |
| dataset.push_to_hub( |
| globals.RESULTS_DATASET_NAME, |
| token=os.getenv("HF_TOKEN"), |
| private=False |
| ) |
| print(f"Saved {len(records)} results to dataset") |
|
|
| except Exception as e: |
| print(f"Error saving results to dataset: {e}") |
|
|
|
|
| def load_results() -> None: |
| """Load job results from HuggingFace dataset.""" |
| try: |
| |
| dataset = load_dataset( |
| globals.RESULTS_DATASET_NAME, |
| split="train", |
| token=os.getenv("HF_TOKEN") |
| ) |
|
|
| |
| for row in dataset: |
| key = globals.get_model_provider_key(row["model"], row["provider"]) |
| globals.job_results[key] = { |
| "model": row["model"], |
| "provider": row["provider"], |
| "last_run": row["last_run"], |
| "status": row["status"], |
| "current_score": row["current_score"], |
| "previous_score": row["previous_score"], |
| "job_id": row["job_id"], |
| "start_time": row.get("start_time"), |
| "duration": row.get("duration"), |
| "completed_at": row.get("completed_at"), |
| "runs": row.get("runs", []), |
| "score_variance": row.get("score_variance") |
| } |
|
|
| print(f"Loaded {len(globals.job_results)} results from dataset") |
|
|
| except Exception as e: |
| print(f"No existing dataset found or error loading: {e}") |
| print("Starting with empty results") |
|
|
| def style_status(val): |
| """Style function for status column.""" |
| if val == "COMPLETED": |
| return 'background-color: green' |
| elif val == "ERROR": |
| return 'background-color: red' |
| elif val == "RUNNING": |
| return 'background-color: blue' |
| return '' |
|
|
| def get_summary_stats(): |
| """Get summary statistics of job results.""" |
| if not globals.job_results: |
| return "📊 **Status:** No jobs yet" |
|
|
| total = len(globals.job_results) |
| running = sum(1 for info in globals.job_results.values() if info.get("status") == "RUNNING") |
| completed = sum(1 for info in globals.job_results.values() if info.get("status") == "COMPLETED") |
| failed = sum(1 for info in globals.job_results.values() if info.get("status") in ["ERROR", "FAILED"]) |
|
|
| return f"📊 **Total:** {total} | 🔵 **Running:** {running} | ✅ **Completed:** {completed} | ❌ **Failed:** {failed}" |
|
|
|
|
| def get_results_table(): |
| """Return job results as a styled pandas DataFrame for Gradio DataFrame.""" |
| if not globals.job_results: |
| return pd.DataFrame(columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Latest Job Id"]) |
|
|
| table_data = [] |
| for key, info in globals.job_results.items(): |
| |
| current_score = info.get("current_score", "N/A") |
| if current_score is not None and isinstance(current_score, (int, float)): |
| current_score = f"{current_score:.4f}" |
|
|
| |
| variance = info.get("score_variance", "N/A") |
| if variance is not None and isinstance(variance, (int, float)): |
| variance = f"{variance:.6f}" |
|
|
| |
| previous_score = info.get("previous_score", "N/A") |
| if previous_score is not None and isinstance(previous_score, (int, float)): |
| previous_score = f"{previous_score:.4f}" |
|
|
| |
| runs = info.get("runs", []) |
| completed_runs = sum(1 for run in runs if run.get("status") == "COMPLETED") |
| total_runs = len(runs) |
| runs_str = f"{completed_runs}/{total_runs}" if runs else "0/0" |
|
|
| |
| duration = info.get("duration") |
| if duration is not None and isinstance(duration, (int, float)): |
| |
| minutes = int(duration // 60) |
| seconds = int(duration % 60) |
| duration_str = f"{minutes}m {seconds}s" |
| else: |
| duration_str = "N/A" |
|
|
| |
| completed_at = info.get("completed_at", "N/A") |
|
|
| job_id = info.get("job_id", "N/A") |
| |
| if job_id != "N/A": |
| job_url = f"https://hf.co/jobs/{globals.NAMESPACE}/{job_id}" |
| job_link = f'{job_id}: <a href="{job_url}" target="_blank">📄</a> ' |
| else: |
| job_link = job_id |
|
|
| |
| model = info["model"] |
| provider = info["provider"] |
| relaunch_link = '🔄 Relaunch' |
|
|
| table_data.append([ |
| model, |
| provider, |
| runs_str, |
| info["last_run"], |
| info["status"], |
| current_score, |
| variance, |
| previous_score, |
| duration_str, |
| completed_at, |
| job_link, |
| relaunch_link |
| ]) |
|
|
| df = pd.DataFrame(table_data, columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Job Id and Logs", "Actions"]) |
|
|
| |
| styled_df = df.style.map(style_status, subset=['Status']) |
| return styled_df |
|
|
|
|