""" Dispatch AI — Model Comparison Visualizer Pick 2 models → side-by-side comparison of size, speed, quality, RAM. Visual charts using matplotlib. """ import gradio as gr import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np # --------------------------------------------------------------------------- # Model database — from our phone farm benchmarks + public info # --------------------------------------------------------------------------- MODELS = { "Qwen2.5-0.5B-Instruct": { "params_b": 0.5, "size_mb": 450, "gen_tps": 19.2, "prompt_tps": 65.3, "ram_mb": 4100, "load_s": 0.9, "quality_score": 5.2, "license": "Apache 2.0", "context": 32768, "arabic": "Good", }, "Qwen2.5-1.5B-Instruct": { "params_b": 1.5, "size_mb": 1060, "gen_tps": 16.9, "prompt_tps": 57.8, "ram_mb": 3500, "load_s": 1.8, "quality_score": 6.5, "license": "Apache 2.0", "context": 32768, "arabic": "Very Good", }, "Llama-3.2-1B-Instruct": { "params_b": 1.0, "size_mb": 890, "gen_tps": 16.3, "prompt_tps": 57.8, "ram_mb": 3500, "load_s": 1.5, "quality_score": 6.0, "license": "Llama 3.2", "context": 131072, "arabic": "Fair", }, "Llama-3.2-3B-Instruct": { "params_b": 3.0, "size_mb": 2100, "gen_tps": 12.4, "prompt_tps": 45.2, "ram_mb": 2800, "load_s": 3.2, "quality_score": 7.2, "license": "Llama 3.2", "context": 131072, "arabic": "Good", }, "Gemma-2-2B-IT": { "params_b": 2.0, "size_mb": 1600, "gen_tps": 13.8, "prompt_tps": 48.6, "ram_mb": 3200, "load_s": 2.5, "quality_score": 6.8, "license": "Gemma", "context": 8192, "arabic": "Fair", }, "Phi-3.5-mini": { "params_b": 3.8, "size_mb": 2300, "gen_tps": 14.2, "prompt_tps": 50.1, "ram_mb": 2900, "load_s": 2.8, "quality_score": 7.5, "license": "MIT", "context": 131072, "arabic": "Fair", }, "SmolLM2-1.7B": { "params_b": 1.7, "size_mb": 1200, "gen_tps": 17.1, "prompt_tps": 60.2, "ram_mb": 3400, "load_s": 1.4, "quality_score": 5.8, "license": "Apache 2.0", "context": 8192, "arabic": "Poor", }, "SmolLM2-135M": { "params_b": 0.135, "size_mb": 85, "gen_tps": 22.8, "prompt_tps": 89.5, "ram_mb": 4500, "load_s": 0.3, "quality_score": 3.0, "license": "Apache 2.0", "context": 8192, "arabic": "Poor", }, "TinyLlama-1.1B": { "params_b": 1.1, "size_mb": 700, "gen_tps": 18.5, "prompt_tps": 62.4, "ram_mb": 3800, "load_s": 1.1, "quality_score": 4.5, "license": "Apache 2.0", "context": 2048, "arabic": "Poor", }, } # Dark theme colors for matplotlib BG = "#0A0F1A" CARD = "#0E1424" ACCENT = "#1FE0E6" ACCENT2 = "#FF6B9D" WHITE = "#FFFFFF" GRAY = "#8A8F9C" def create_comparison_chart(model1_name, model2_name): """Create a grouped bar chart comparing two models across key metrics.""" if model1_name not in MODELS or model2_name not in MODELS: fig, ax = plt.subplots(figsize=(10, 6)) ax.text(0.5, 0.5, "Select two models", ha="center", va="center", color=ACCENT, fontsize=16) ax.set_facecolor(BG) fig.patch.set_facecolor(BG) plt.close(fig) return fig m1 = MODELS[model1_name] m2 = MODELS[model2_name] # Normalized metrics (0-10 scale for comparison) metrics = ["Size\n(smaller=better)", "Gen Speed\n(faster=better)", "Prompt Speed\n(faster=better)", "RAM Free\n(more=better)", "Load Time\n(faster=better)", "Quality\n(higher=better)"] # Normalize: higher is better for speed, ram, quality; lower is better for size, load time max_size = max(m["size_mb"] for m in MODELS.values()) max_load = max(m["load_s"] for m in MODELS.values()) m1_vals = [ 10 * (1 - m1["size_mb"] / max_size), # smaller = higher score m1["gen_tps"] / 25 * 10, m1["prompt_tps"] / 100 * 10, m1["ram_mb"] / 5000 * 10, 10 * (1 - m1["load_s"] / max_load), m1["quality_score"], ] m2_vals = [ 10 * (1 - m2["size_mb"] / max_size), m2["gen_tps"] / 25 * 10, m2["prompt_tps"] / 100 * 10, m2["ram_mb"] / 5000 * 10, 10 * (1 - m2["load_s"] / max_load), m2["quality_score"], ] x = np.arange(len(metrics)) width = 0.35 fig, ax = plt.subplots(figsize=(12, 6)) fig.patch.set_facecolor(BG) ax.set_facecolor(CARD) bars1 = ax.bar(x - width/2, m1_vals, width, label=model1_name, color=ACCENT, edgecolor=WHITE, linewidth=0.5) bars2 = ax.bar(x + width/2, m2_vals, width, label=model2_name, color=ACCENT2, edgecolor=WHITE, linewidth=0.5) ax.set_ylabel("Score (0-10, higher = better)", color=WHITE, fontsize=12) ax.set_title(f"Model Comparison: {model1_name} vs {model2_name}", color=WHITE, fontsize=14, pad=15) ax.set_xticks(x) ax.set_xticklabels(metrics, color=WHITE, fontsize=9) ax.set_ylim(0, 12) ax.tick_params(axis="y", colors=GRAY) ax.spines["bottom"].set_color(GRAY) ax.spines["left"].set_color(GRAY) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.grid(axis="y", color=GRAY, alpha=0.2, linestyle="--") legend = ax.legend(facecolor=CARD, edgecolor=ACCENT, labelcolor=WHITE, fontsize=10) legend.get_frame().set_alpha(0.9) # Add value labels for bar in bars1 + bars2: height = bar.get_height() ax.annotate(f"{height:.1f}", xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha="center", va="bottom", color=WHITE, fontsize=8) plt.tight_layout() plt.close(fig) return fig def create_radar_chart(model1_name, model2_name): """Create a radar/spider chart comparing two models.""" if model1_name not in MODELS or model2_name not in MODELS: fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection="polar")) ax.set_facecolor(BG) fig.patch.set_facecolor(BG) plt.close(fig) return fig m1 = MODELS[model1_name] m2 = MODELS[model2_name] categories = ["Compact", "Speed", "RAM\nEfficient", "Fast\nLoad", "Quality", "Arabic\nSupport"] N = len(categories) max_size = max(m["size_mb"] for m in MODELS.values()) max_load = max(m["load_s"] for m in MODELS.values()) arabic_scores = {"Poor": 2, "Fair": 5, "Good": 7, "Very Good": 9} m1_vals = [ 1 - m1["size_mb"] / max_size, m1["gen_tps"] / 25, m1["ram_mb"] / 5000, 1 - m1["load_s"] / max_load, m1["quality_score"] / 10, arabic_scores.get(m1["arabic"], 5) / 10, ] m2_vals = [ 1 - m2["size_mb"] / max_size, m2["gen_tps"] / 25, m2["ram_mb"] / 5000, 1 - m2["load_s"] / max_load, m2["quality_score"] / 10, arabic_scores.get(m2["arabic"], 5) / 10, ] angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() m1_vals += m1_vals[:1] m2_vals += m2_vals[:1] angles += angles[:1] fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection="polar")) fig.patch.set_facecolor(BG) ax.set_facecolor(CARD) ax.plot(angles, m1_vals, "o-", color=ACCENT, linewidth=2, label=model1_name) ax.fill(angles, m1_vals, color=ACCENT, alpha=0.15) ax.plot(angles, m2_vals, "o-", color=ACCENT2, linewidth=2, label=model2_name) ax.fill(angles, m2_vals, color=ACCENT2, alpha=0.15) ax.set_xticks(angles[:-1]) ax.set_xticklabels(categories, color=WHITE, fontsize=10) ax.set_ylim(0, 1) ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0]) ax.set_yticklabels(["0.2", "0.4", "0.6", "0.8", "1.0"], color=GRAY, fontsize=8) ax.grid(color=GRAY, alpha=0.3) ax.spines["polar"].set_color(GRAY) ax.set_title("Model Capability Radar", color=WHITE, fontsize=14, pad=20) legend = ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1), facecolor=CARD, edgecolor=ACCENT, labelcolor=WHITE, fontsize=10) legend.get_frame().set_alpha(0.9) plt.tight_layout() plt.close(fig) return fig def get_comparison_table(model1_name, model2_name): """Return a text comparison table.""" if model1_name not in MODELS or model2_name not in MODELS: return "Please select two models." m1 = MODELS[model1_name] m2 = MODELS[model2_name] rows = [ ("Parameters (B)", f"{m1['params_b']}", f"{m2['params_b']}"), ("Model Size (MB)", f"{m1['size_mb']}", f"{m2['size_mb']}"), ("Gen Speed (t/s)", f"{m1['gen_tps']}", f"{m2['gen_tps']}"), ("Prompt Speed (t/s)", f"{m1['prompt_tps']}", f"{m2['prompt_tps']}"), ("RAM Free (MB)", f"{m1['ram_mb']}", f"{m2['ram_mb']}"), ("Load Time (s)", f"{m1['load_s']}", f"{m2['load_s']}"), ("Quality Score", f"{m1['quality_score']}/10", f"{m2['quality_score']}/10"), ("Context Length", f"{m1['context']:,}", f"{m2['context']:,}"), ("Arabic Support", m1["arabic"], m2["arabic"]), ("License", m1["license"], m2["license"]), ] # Build winner indicators result = f"### Side-by-Side Comparison\n\n" result += f"| Metric | {model1_name} | {model2_name} | Winner |\n" result += f"|--------|-------------|-------------|--------|\n" # Define which is better (higher/lower) higher_better = {"Gen Speed (t/s)", "Prompt Speed (t/s)", "RAM Free (MB)", "Quality Score", "Context Length"} lower_better = {"Model Size (MB)", "Load Time (s)"} for metric, v1, v2 in rows: winner = "" if metric in higher_better or metric in lower_better: try: f1 = float(v1.split("/")[0].replace(",", "")) f2 = float(v2.split("/")[0].replace(",", "")) if metric in higher_better: winner = model1_name if f1 > f2 else (model2_name if f2 > f1 else "tie") else: winner = model1_name if f1 < f2 else (model2_name if f2 < f1 else "tie") winner = "🟢" if winner == model1_name else ("🔵" if winner == model2_name else "➖") except ValueError: pass result += f"| {metric} | {v1} | {v2} | {winner} |\n" return result # --- UI ----------------------------------------------------------------------- CSS = """ #dispatch-header h1 { color: #FFFFFF; font-size: 2.2rem; margin: 0; background: linear-gradient(90deg, #1FE0E6 0%, #FFFFFF 60%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; } #dispatch-header p { color: #1FE0E6; font-size: 1.05rem; margin: 6px 0 0 0; } .dispatch-footer { text-align: center; color: #8A8F9C; font-size: 0.9rem; padding-top: 8px; } """ with gr.Blocks( title="Dispatch AI — Model Comparison Visualizer", theme=gr.themes.Base( primary_hue="cyan", secondary_hue="cyan", neutral_hue="slate", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"], ).set( body_background_fill="#0A0F1A", body_background_fill_dark="#0A0F1A", body_text_color="#FFFFFF", body_text_color_dark="#FFFFFF", block_background_fill="#0E1424", block_background_fill_dark="#0E1424", block_border_color="#1FE0E6", block_border_width="1px", block_label_text_color="#1FE0E6", block_title_text_color="#1FE0E6", button_primary_background_fill="#1FE0E6", button_primary_background_fill_dark="#1FE0E6", button_primary_text_color="#0A0F1A", button_primary_border_color="#1FE0E6", input_background_fill="#0E1424", input_background_fill_dark="#0E1424", input_border_color="#1FE0E6", input_border_width="1px", ), css=CSS, ) as demo: with gr.Column(elem_id="dispatch-header"): gr.Markdown( """ # Dispatch AI — Model Comparison Visualizer Compare mobile AI models side-by-side with visual charts · Dispatch AI (FZE) · UAE """ ) gr.Markdown( """ Pick two models to compare size, speed, quality, RAM, and more. Data from our 80-phone farm. 🟢 = Model 1 wins · 🔵 = Model 2 wins · ➖ = tie """ ) with gr.Row(): model1 = gr.Dropdown(list(MODELS.keys()), label="Model 1 (🟢)", value="Qwen2.5-1.5B-Instruct") model2 = gr.Dropdown(list(MODELS.keys()), label="Model 2 (🔵)", value="Llama-3.2-3B-Instruct") compare_btn = gr.Button("⚔️ Compare Models", variant="primary") with gr.Row(): bar_chart = gr.Plot(label="Bar Chart Comparison") radar_chart = gr.Plot(label="Radar Chart Comparison") comparison_table = gr.Markdown() # Events compare_btn.click( fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)), inputs=[model1, model2], outputs=[bar_chart, radar_chart, comparison_table], ) # Also update on dropdown change model1.change( fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)), inputs=[model1, model2], outputs=[bar_chart, radar_chart, comparison_table], ) model2.change( fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)), inputs=[model1, model2], outputs=[bar_chart, radar_chart, comparison_table], ) gr.Markdown( """
""" ) if __name__ == "__main__": demo.queue() demo.launch()