Spaces:

dispatchAI
/

model-comparison

Runtime error

App Files Files Community

model-comparison / app.py

3morixd

Upload app.py with huggingface_hub

7e798e3 verified 3 days ago

Raw

History Blame Contribute Delete

13.8 kB

	"""
	Dispatch AI — Model Comparison Visualizer
	Pick 2 models → side-by-side comparison of size, speed, quality, RAM.
	Visual charts using matplotlib.
	"""

	import gradio as gr
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import numpy as np

	# ---------------------------------------------------------------------------
	# Model database — from our phone farm benchmarks + public info
	# ---------------------------------------------------------------------------
	MODELS = {
	"Qwen2.5-0.5B-Instruct": {
	"params_b": 0.5, "size_mb": 450, "gen_tps": 19.2, "prompt_tps": 65.3,
	"ram_mb": 4100, "load_s": 0.9, "quality_score": 5.2, "license": "Apache 2.0",
	"context": 32768, "arabic": "Good",
	},
	"Qwen2.5-1.5B-Instruct": {
	"params_b": 1.5, "size_mb": 1060, "gen_tps": 16.9, "prompt_tps": 57.8,
	"ram_mb": 3500, "load_s": 1.8, "quality_score": 6.5, "license": "Apache 2.0",
	"context": 32768, "arabic": "Very Good",
	},
	"Llama-3.2-1B-Instruct": {
	"params_b": 1.0, "size_mb": 890, "gen_tps": 16.3, "prompt_tps": 57.8,
	"ram_mb": 3500, "load_s": 1.5, "quality_score": 6.0, "license": "Llama 3.2",
	"context": 131072, "arabic": "Fair",
	},
	"Llama-3.2-3B-Instruct": {
	"params_b": 3.0, "size_mb": 2100, "gen_tps": 12.4, "prompt_tps": 45.2,
	"ram_mb": 2800, "load_s": 3.2, "quality_score": 7.2, "license": "Llama 3.2",
	"context": 131072, "arabic": "Good",
	},
	"Gemma-2-2B-IT": {
	"params_b": 2.0, "size_mb": 1600, "gen_tps": 13.8, "prompt_tps": 48.6,
	"ram_mb": 3200, "load_s": 2.5, "quality_score": 6.8, "license": "Gemma",
	"context": 8192, "arabic": "Fair",
	},
	"Phi-3.5-mini": {
	"params_b": 3.8, "size_mb": 2300, "gen_tps": 14.2, "prompt_tps": 50.1,
	"ram_mb": 2900, "load_s": 2.8, "quality_score": 7.5, "license": "MIT",
	"context": 131072, "arabic": "Fair",
	},
	"SmolLM2-1.7B": {
	"params_b": 1.7, "size_mb": 1200, "gen_tps": 17.1, "prompt_tps": 60.2,
	"ram_mb": 3400, "load_s": 1.4, "quality_score": 5.8, "license": "Apache 2.0",
	"context": 8192, "arabic": "Poor",
	},
	"SmolLM2-135M": {
	"params_b": 0.135, "size_mb": 85, "gen_tps": 22.8, "prompt_tps": 89.5,
	"ram_mb": 4500, "load_s": 0.3, "quality_score": 3.0, "license": "Apache 2.0",
	"context": 8192, "arabic": "Poor",
	},
	"TinyLlama-1.1B": {
	"params_b": 1.1, "size_mb": 700, "gen_tps": 18.5, "prompt_tps": 62.4,
	"ram_mb": 3800, "load_s": 1.1, "quality_score": 4.5, "license": "Apache 2.0",
	"context": 2048, "arabic": "Poor",
	},
	}

	# Dark theme colors for matplotlib
	BG = "#0A0F1A"
	CARD = "#0E1424"
	ACCENT = "#1FE0E6"
	ACCENT2 = "#FF6B9D"
	WHITE = "#FFFFFF"
	GRAY = "#8A8F9C"


	def create_comparison_chart(model1_name, model2_name):
	"""Create a grouped bar chart comparing two models across key metrics."""
	if model1_name not in MODELS or model2_name not in MODELS:
	fig, ax = plt.subplots(figsize=(10, 6))
	ax.text(0.5, 0.5, "Select two models", ha="center", va="center", color=ACCENT, fontsize=16)
	ax.set_facecolor(BG)
	fig.patch.set_facecolor(BG)
	plt.close(fig)
	return fig

	m1 = MODELS[model1_name]
	m2 = MODELS[model2_name]

	# Normalized metrics (0-10 scale for comparison)
	metrics = ["Size\n(smaller=better)", "Gen Speed\n(faster=better)", "Prompt Speed\n(faster=better)",
	"RAM Free\n(more=better)", "Load Time\n(faster=better)", "Quality\n(higher=better)"]

	# Normalize: higher is better for speed, ram, quality; lower is better for size, load time
	max_size = max(m["size_mb"] for m in MODELS.values())
	max_load = max(m["load_s"] for m in MODELS.values())

	m1_vals = [
	10 * (1 - m1["size_mb"] / max_size), # smaller = higher score
	m1["gen_tps"] / 25 * 10,
	m1["prompt_tps"] / 100 * 10,
	m1["ram_mb"] / 5000 * 10,
	10 * (1 - m1["load_s"] / max_load),
	m1["quality_score"],
	]
	m2_vals = [
	10 * (1 - m2["size_mb"] / max_size),
	m2["gen_tps"] / 25 * 10,
	m2["prompt_tps"] / 100 * 10,
	m2["ram_mb"] / 5000 * 10,
	10 * (1 - m2["load_s"] / max_load),
	m2["quality_score"],
	]

	x = np.arange(len(metrics))
	width = 0.35

	fig, ax = plt.subplots(figsize=(12, 6))
	fig.patch.set_facecolor(BG)
	ax.set_facecolor(CARD)

	bars1 = ax.bar(x - width/2, m1_vals, width, label=model1_name, color=ACCENT, edgecolor=WHITE, linewidth=0.5)
	bars2 = ax.bar(x + width/2, m2_vals, width, label=model2_name, color=ACCENT2, edgecolor=WHITE, linewidth=0.5)

	ax.set_ylabel("Score (0-10, higher = better)", color=WHITE, fontsize=12)
	ax.set_title(f"Model Comparison: {model1_name} vs {model2_name}", color=WHITE, fontsize=14, pad=15)
	ax.set_xticks(x)
	ax.set_xticklabels(metrics, color=WHITE, fontsize=9)
	ax.set_ylim(0, 12)
	ax.tick_params(axis="y", colors=GRAY)
	ax.spines["bottom"].set_color(GRAY)
	ax.spines["left"].set_color(GRAY)
	ax.spines["top"].set_visible(False)
	ax.spines["right"].set_visible(False)
	ax.grid(axis="y", color=GRAY, alpha=0.2, linestyle="--")

	legend = ax.legend(facecolor=CARD, edgecolor=ACCENT, labelcolor=WHITE, fontsize=10)
	legend.get_frame().set_alpha(0.9)

	# Add value labels
	for bar in bars1 + bars2:
	height = bar.get_height()
	ax.annotate(f"{height:.1f}",
	xy=(bar.get_x() + bar.get_width() / 2, height),
	xytext=(0, 3), textcoords="offset points",
	ha="center", va="bottom", color=WHITE, fontsize=8)

	plt.tight_layout()
	plt.close(fig)
	return fig


	def create_radar_chart(model1_name, model2_name):
	"""Create a radar/spider chart comparing two models."""
	if model1_name not in MODELS or model2_name not in MODELS:
	fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection="polar"))
	ax.set_facecolor(BG)
	fig.patch.set_facecolor(BG)
	plt.close(fig)
	return fig

	m1 = MODELS[model1_name]
	m2 = MODELS[model2_name]

	categories = ["Compact", "Speed", "RAM\nEfficient", "Fast\nLoad", "Quality", "Arabic\nSupport"]
	N = len(categories)

	max_size = max(m["size_mb"] for m in MODELS.values())
	max_load = max(m["load_s"] for m in MODELS.values())
	arabic_scores = {"Poor": 2, "Fair": 5, "Good": 7, "Very Good": 9}

	m1_vals = [
	1 - m1["size_mb"] / max_size,
	m1["gen_tps"] / 25,
	m1["ram_mb"] / 5000,
	1 - m1["load_s"] / max_load,
	m1["quality_score"] / 10,
	arabic_scores.get(m1["arabic"], 5) / 10,
	]
	m2_vals = [
	1 - m2["size_mb"] / max_size,
	m2["gen_tps"] / 25,
	m2["ram_mb"] / 5000,
	1 - m2["load_s"] / max_load,
	m2["quality_score"] / 10,
	arabic_scores.get(m2["arabic"], 5) / 10,
	]

	angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
	m1_vals += m1_vals[:1]
	m2_vals += m2_vals[:1]
	angles += angles[:1]

	fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection="polar"))
	fig.patch.set_facecolor(BG)
	ax.set_facecolor(CARD)

	ax.plot(angles, m1_vals, "o-", color=ACCENT, linewidth=2, label=model1_name)
	ax.fill(angles, m1_vals, color=ACCENT, alpha=0.15)
	ax.plot(angles, m2_vals, "o-", color=ACCENT2, linewidth=2, label=model2_name)
	ax.fill(angles, m2_vals, color=ACCENT2, alpha=0.15)

	ax.set_xticks(angles[:-1])
	ax.set_xticklabels(categories, color=WHITE, fontsize=10)
	ax.set_ylim(0, 1)
	ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
	ax.set_yticklabels(["0.2", "0.4", "0.6", "0.8", "1.0"], color=GRAY, fontsize=8)
	ax.grid(color=GRAY, alpha=0.3)
	ax.spines["polar"].set_color(GRAY)

	ax.set_title("Model Capability Radar", color=WHITE, fontsize=14, pad=20)
	legend = ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1),
	facecolor=CARD, edgecolor=ACCENT, labelcolor=WHITE, fontsize=10)
	legend.get_frame().set_alpha(0.9)

	plt.tight_layout()
	plt.close(fig)
	return fig


	def get_comparison_table(model1_name, model2_name):
	"""Return a text comparison table."""
	if model1_name not in MODELS or model2_name not in MODELS:
	return "Please select two models."

	m1 = MODELS[model1_name]
	m2 = MODELS[model2_name]

	rows = [
	("Parameters (B)", f"{m1['params_b']}", f"{m2['params_b']}"),
	("Model Size (MB)", f"{m1['size_mb']}", f"{m2['size_mb']}"),
	("Gen Speed (t/s)", f"{m1['gen_tps']}", f"{m2['gen_tps']}"),
	("Prompt Speed (t/s)", f"{m1['prompt_tps']}", f"{m2['prompt_tps']}"),
	("RAM Free (MB)", f"{m1['ram_mb']}", f"{m2['ram_mb']}"),
	("Load Time (s)", f"{m1['load_s']}", f"{m2['load_s']}"),
	("Quality Score", f"{m1['quality_score']}/10", f"{m2['quality_score']}/10"),
	("Context Length", f"{m1['context']:,}", f"{m2['context']:,}"),
	("Arabic Support", m1["arabic"], m2["arabic"]),
	("License", m1["license"], m2["license"]),
	]

	# Build winner indicators
	result = f"### Side-by-Side Comparison\n\n"
	result += f"\| Metric \| {model1_name} \| {model2_name} \| Winner \|\n"
	result += f"\|--------\|-------------\|-------------\|--------\|\n"

	# Define which is better (higher/lower)
	higher_better = {"Gen Speed (t/s)", "Prompt Speed (t/s)", "RAM Free (MB)", "Quality Score", "Context Length"}
	lower_better = {"Model Size (MB)", "Load Time (s)"}

	for metric, v1, v2 in rows:
	winner = ""
	if metric in higher_better or metric in lower_better:
	try:
	f1 = float(v1.split("/")[0].replace(",", ""))
	f2 = float(v2.split("/")[0].replace(",", ""))
	if metric in higher_better:
	winner = model1_name if f1 > f2 else (model2_name if f2 > f1 else "tie")
	else:
	winner = model1_name if f1 < f2 else (model2_name if f2 < f1 else "tie")
	winner = "🟢" if winner == model1_name else ("🔵" if winner == model2_name else "➖")
	except ValueError:
	pass
	result += f"\| {metric} \| {v1} \| {v2} \| {winner} \|\n"

	return result


	# --- UI -----------------------------------------------------------------------
	CSS = """
	#dispatch-header h1 {
	color: #FFFFFF; font-size: 2.2rem; margin: 0;
	background: linear-gradient(90deg, #1FE0E6 0%, #FFFFFF 60%);
	-webkit-background-clip: text; -webkit-text-fill-color: transparent;
	}
	#dispatch-header p { color: #1FE0E6; font-size: 1.05rem; margin: 6px 0 0 0; }
	.dispatch-footer { text-align: center; color: #8A8F9C; font-size: 0.9rem; padding-top: 8px; }
	"""

	with gr.Blocks(
	title="Dispatch AI — Model Comparison Visualizer",
	theme=gr.themes.Base(
	primary_hue="cyan", secondary_hue="cyan", neutral_hue="slate",
	font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"],
	).set(
	body_background_fill="#0A0F1A", body_background_fill_dark="#0A0F1A",
	body_text_color="#FFFFFF", body_text_color_dark="#FFFFFF",
	block_background_fill="#0E1424", block_background_fill_dark="#0E1424",
	block_border_color="#1FE0E6", block_border_width="1px",
	block_label_text_color="#1FE0E6", block_title_text_color="#1FE0E6",
	button_primary_background_fill="#1FE0E6", button_primary_background_fill_dark="#1FE0E6",
	button_primary_text_color="#0A0F1A", button_primary_border_color="#1FE0E6",
	input_background_fill="#0E1424", input_background_fill_dark="#0E1424",
	input_border_color="#1FE0E6", input_border_width="1px",
	),
	css=CSS,
	) as demo:
	with gr.Column(elem_id="dispatch-header"):
	gr.Markdown(
	"""
	# Dispatch AI — Model Comparison Visualizer
	Compare mobile AI models side-by-side with visual charts · Dispatch AI (FZE) · UAE
	"""
	)

	gr.Markdown(
	"""
	Pick two models to compare size, speed, quality, RAM, and more. Data from our 80-phone farm.
	🟢 = Model 1 wins · 🔵 = Model 2 wins · ➖ = tie
	"""
	)

	with gr.Row():
	model1 = gr.Dropdown(list(MODELS.keys()), label="Model 1 (🟢)", value="Qwen2.5-1.5B-Instruct")
	model2 = gr.Dropdown(list(MODELS.keys()), label="Model 2 (🔵)", value="Llama-3.2-3B-Instruct")
	compare_btn = gr.Button("⚔️ Compare Models", variant="primary")

	with gr.Row():
	bar_chart = gr.Plot(label="Bar Chart Comparison")
	radar_chart = gr.Plot(label="Radar Chart Comparison")

	comparison_table = gr.Markdown()

	# Events
	compare_btn.click(
	fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)),
	inputs=[model1, model2],
	outputs=[bar_chart, radar_chart, comparison_table],
	)
	# Also update on dropdown change
	model1.change(
	fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)),
	inputs=[model1, model2],
	outputs=[bar_chart, radar_chart, comparison_table],
	)
	model2.change(
	fn=lambda m1, m2: (create_comparison_chart(m1, m2), create_radar_chart(m1, m2), get_comparison_table(m1, m2)),
	inputs=[model1, model2],
	outputs=[bar_chart, radar_chart, comparison_table],
	)

	gr.Markdown(
	"""
	<div class="dispatch-footer">
	© 2026 Dispatch AI (FZE) · Sharjah, UAE · License 10818 ·
	Benchmarks from 80-device phone farm · Q4_K_M quants · llama.cpp
	</div>
	"""
	)

	if __name__ == "__main__":
	demo.queue()
	demo.launch()