| """ |
| Resume benchmark from partial results, then run medium and large networks too. |
| Optimized for CPU: reduced timeouts, skip heavy combos. |
| """ |
| import os |
| import sys |
| import time |
| import numpy as np |
| import pandas as pd |
| import json |
| import logging |
| import warnings |
|
|
| warnings.filterwarnings('ignore') |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| |
| logging.getLogger('causallearn').setLevel(logging.WARNING) |
|
|
| sys.path.insert(0, '/app') |
| from causal_selection.data.generator import ( |
| load_bn_model, get_true_dag_adjmat, dag_to_cpdag, sample_dataset, |
| SMALL_NETWORKS, MEDIUM_NETWORKS, LARGE_NETWORKS, get_network_tier |
| ) |
| from causal_selection.discovery.algorithms import run_algorithm, ALGORITHM_POOL |
| from causal_selection.discovery.evaluator import evaluate_algorithm_result |
| from causal_selection.features.extractor import extract_all_features, FEATURE_NAMES |
|
|
| ALGO_NAMES = list(ALGORITHM_POOL.keys()) |
| RESULTS_DIR = '/app/causal_selection/data/results' |
|
|
| |
| SAMPLE_SIZES_FAST = { |
| 'small': [500, 1000, 2000, 5000], |
| 'medium': [500, 1000, 2000], |
| 'large': [500, 1000], |
| } |
|
|
| |
| ALGO_TIMEOUT = { |
| 'PC_discrete': {'small': 30, 'medium': 120, 'large': 300}, |
| 'FCI': {'small': 30, 'medium': 120, 'large': 300}, |
| 'GES': {'small': 30, 'medium': 120, 'large': 300}, |
| 'BOSS': {'small': 30, 'medium': 120, 'large': 300}, |
| 'GRaSP': {'small': 30, 'medium': 120, 'large': 300}, |
| 'HC': {'small': 30, 'medium': 60, 'large': 120}, |
| 'Tabu': {'small': 30, 'medium': 60, 'large': 120}, |
| 'MMHC': {'small': 30, 'medium': 60, 'large': 120}, |
| 'K2': {'small': 20, 'medium': 30, 'large': 60}, |
| } |
|
|
| SEEDS = 2 |
|
|
|
|
| def load_existing_results(): |
| """Load existing partial results to avoid re-running.""" |
| existing = set() |
| partial_path = os.path.join(RESULTS_DIR, 'configs_partial.csv') |
| final_path = os.path.join(RESULTS_DIR, 'configs.csv') |
| |
| for path in [partial_path, final_path]: |
| if os.path.exists(path): |
| df = pd.read_csv(path) |
| for _, row in df.iterrows(): |
| key = (row['network'], int(row['n_samples']), int(row['seed'])) |
| existing.add(key) |
| |
| return existing |
|
|
|
|
| def run_benchmark(): |
| """Run full benchmark with resume capability.""" |
| existing = load_existing_results() |
| logger.info(f"Found {len(existing)} existing configs") |
| |
| |
| all_features = [] |
| all_shd = [] |
| all_nshd = [] |
| all_configs = [] |
| |
| for prefix in ['meta_features_partial', 'meta_features']: |
| path = os.path.join(RESULTS_DIR, f'{prefix}.csv') |
| if os.path.exists(path): |
| df = pd.read_csv(path) |
| all_features = df.to_dict('records') |
| break |
| |
| for prefix in ['shd_matrix_partial', 'shd_matrix']: |
| path = os.path.join(RESULTS_DIR, f'{prefix}.csv') |
| if os.path.exists(path): |
| df = pd.read_csv(path) |
| all_shd = df.to_dict('records') |
| break |
| |
| for prefix in ['normalized_shd_partial', 'normalized_shd_matrix']: |
| path = os.path.join(RESULTS_DIR, f'{prefix}.csv') |
| if os.path.exists(path): |
| df = pd.read_csv(path) |
| all_nshd = df.to_dict('records') |
| break |
| |
| for prefix in ['configs_partial', 'configs']: |
| path = os.path.join(RESULTS_DIR, f'{prefix}.csv') |
| if os.path.exists(path): |
| df = pd.read_csv(path) |
| all_configs = df.to_dict('records') |
| break |
| |
| logger.info(f"Starting with {len(all_configs)} existing results") |
| |
| |
| all_networks = SMALL_NETWORKS + MEDIUM_NETWORKS + LARGE_NETWORKS |
| configs_to_run = [] |
| |
| for net in all_networks: |
| tier = get_network_tier(net) |
| for n_samples in SAMPLE_SIZES_FAST[tier]: |
| for seed in range(SEEDS): |
| key = (net, n_samples, seed) |
| if key not in existing: |
| configs_to_run.append((net, n_samples, seed, tier)) |
| |
| logger.info(f"Configs to run: {len(configs_to_run)}") |
| |
| total = len(configs_to_run) |
| for idx, (network, n_samples, seed, tier) in enumerate(configs_to_run): |
| logger.info(f"\n[{idx+1}/{total}] {network} N={n_samples} seed={seed}") |
| |
| try: |
| |
| model = load_bn_model(network) |
| true_dag, node_names = get_true_dag_adjmat(model) |
| true_cpdag = dag_to_cpdag(true_dag) |
| |
| |
| df = sample_dataset(model, n_samples, seed=seed) |
| |
| |
| features = extract_all_features(df, n_probe_triplets=80) |
| |
| |
| algo_metrics = {} |
| for algo_name in ALGO_NAMES: |
| timeout = ALGO_TIMEOUT[algo_name][tier] |
| result = run_algorithm(algo_name, df, timeout_sec=timeout) |
| metrics = evaluate_algorithm_result(result, true_cpdag) |
| algo_metrics[algo_name] = metrics |
| |
| s = metrics['status'] |
| if s == 'success': |
| logger.info(f" {algo_name:12s}: SHD={metrics['shd']:3d} F1={metrics['skeleton_f1']:.3f} t={metrics['runtime']:.1f}s") |
| else: |
| logger.info(f" {algo_name:12s}: {s} t={metrics['runtime']:.1f}s") |
| |
| |
| feat_row = {name: features.get(name, 0.0) for name in FEATURE_NAMES} |
| all_features.append(feat_row) |
| |
| shd_row = {algo: algo_metrics[algo]['shd'] for algo in ALGO_NAMES} |
| nshd_row = {algo: algo_metrics[algo]['normalized_shd'] for algo in ALGO_NAMES} |
| all_shd.append(shd_row) |
| all_nshd.append(nshd_row) |
| |
| config = { |
| 'network': network, |
| 'n_samples': n_samples, |
| 'seed': seed, |
| 'n_variables': len(node_names), |
| 'n_true_edges': int(((true_cpdag + true_cpdag.T) > 0).sum() // 2), |
| } |
| all_configs.append(config) |
| |
| |
| if (idx + 1) % 3 == 0: |
| _save_results(all_features, all_shd, all_nshd, all_configs, partial=True) |
| |
| except Exception as e: |
| logger.error(f"FAILED {network} N={n_samples} seed={seed}: {e}") |
| import traceback |
| traceback.print_exc() |
| |
| |
| _save_results(all_features, all_shd, all_nshd, all_configs, partial=False) |
| |
| |
| Y_shd = pd.DataFrame(all_shd) |
| configs_df = pd.DataFrame(all_configs) |
| |
| print("\n" + "=" * 80) |
| print("BENCHMARK COMPLETE") |
| print("=" * 80) |
| print(f"Total configs: {len(all_configs)}") |
| print(f"Networks: {configs_df['network'].unique()}") |
| print(f"\nMean SHD per algorithm:") |
| print(Y_shd.mean().sort_values()) |
| print(f"\nBest algorithm count:") |
| print(Y_shd.idxmin(axis=1).value_counts()) |
|
|
|
|
| def _save_results(features, shds, nshds, configs, partial=True): |
| os.makedirs(RESULTS_DIR, exist_ok=True) |
| suffix = '_partial' if partial else '' |
| pd.DataFrame(features).to_csv(os.path.join(RESULTS_DIR, f'meta_features{suffix}.csv'), index=False) |
| pd.DataFrame(shds).to_csv(os.path.join(RESULTS_DIR, f'shd_matrix{suffix}.csv'), index=False) |
| pd.DataFrame(nshds).to_csv(os.path.join(RESULTS_DIR, f'normalized_shd_matrix{suffix}.csv'), index=False) |
| pd.DataFrame(configs).to_csv(os.path.join(RESULTS_DIR, f'configs{suffix}.csv'), index=False) |
|
|
|
|
| if __name__ == '__main__': |
| run_benchmark() |
|
|