Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import requests | |
| from io import StringIO | |
| # Description and Introduction texts | |
| DESCRIPTION = """ | |
| <h2 style='text-align: center; color: #00480a !important; text-shadow: 2px 2px 4px rgba(0,0,0,0.1);'>๐ LLM Inference Leaderboard: Pushing the Boundaries of Performance ๐</h2> | |
| """ | |
| INTRODUCTION = """ | |
| <div style='background-color: #e6ffd9; padding: 20px; border-radius: 15px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> | |
| <h3 style='color: #00480a;'>๐ฌ Our Exciting Quest</h3> | |
| <p style='color: #00480a;'>We're on a thrilling journey to help developers discover the perfect LLMs and libraries for their innovative projects! We've put these models through their paces using six cutting-edge inference engines:</p> | |
| <ul style='color: #00480a;'> | |
| <li>๐ vLLM</li> | |
| <li>๐ TGI</li> | |
| <li>โก TensorRT-LLM</li> | |
| <li>๐ฎ Tritonvllm</li> | |
| <li>๐ Deepspeed-mii</li> | |
| <li>๐ฏ ctranslate</li> | |
| </ul> | |
| <p style='color: #00480a;'>All our tests were conducted on state-of-the-art A100 GPUs hosted on Azure, ensuring a fair and neutral battleground!</p> | |
| <p style='color: #00480a; font-weight: bold;'>Our mission: Empower developers, researchers, and AI enthusiasts to find their perfect LLM match for both development and production environments!</p> | |
| </div> | |
| """ | |
| HOW_WE_TESTED = """ | |
| <div style='background-color: #cbff4d; padding: 20px; border-radius: 15px; margin-top: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> | |
| <h3 style='color: #00480a;'>๐งช Our Rigorous Testing Process</h3> | |
| <p style='color: #00480a;'>We left no stone unturned in our quest for reliable benchmarks:</p> | |
| <ul style='color: #00480a;'> | |
| <li><strong>๐ฅ๏ธ Platform:</strong> A100 GPUs from Azure - the ultimate testing ground!</li> | |
| <li><strong>๐ณ Setup:</strong> Docker containers for each library, ensuring a pristine environment.</li> | |
| <li><strong>โ๏ธ Configuration:</strong> Standardized settings (temperature 0.5, top_p 1) for laser-focused performance comparisons.</li> | |
| <li><strong>๐ Prompts & Token Ranges:</strong> Six diverse prompts, input lengths from 20 to 2,000 tokens, and generation lengths of 100, 200, and 500 tokens - pushing the boundaries of flexibility!</li> | |
| <li><strong>๐ค Models & Libraries Tested:</strong> We put the best through their paces: Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it, using TGI, vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.</li> | |
| </ul> | |
| </div> | |
| <div style='background-color: #e6ffd9; padding: 20px; border-radius: 15px; margin-top: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> | |
| <h3 style='color: #00480a;'>๐ Additional Resources</h3> | |
| <p style='color: #00480a;'>For a deeper dive into LLM speed benchmarks and independent analysis, check out these complete blogs:</p> | |
| <ul style='color: #00480a;'> | |
| <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis---part-3" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis - Part 3</a></li> | |
| <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis---part-2" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis - Part 2</a></li> | |
| <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis</a></li> | |
| </ul> | |
| </div> | |
| """ | |
| # URL of the CSV file | |
| CSV_URL = "hf://datasets/rbgo/llm-inference-benchmark/LLM-inference-benchmark-3.csv" | |
| def load_and_process_csv(): | |
| # response = requests.get(CSV_URL) | |
| # csv_content = StringIO(response.text) | |
| df = pd.read_csv(CSV_URL) | |
| columns_order = [ | |
| "Model_Name", "Library", "TTFT", "Tokens-per-Second","Latency","Token_Count", "input_length","output_length" | |
| ] | |
| for col in columns_order: | |
| if col not in df.columns: | |
| df[col] = pd.NA | |
| return df[columns_order] | |
| df = load_and_process_csv() | |
| def get_leaderboard_df(): | |
| return df | |
| def filter_and_search(model_filter, library_filter): | |
| filtered_df = df.copy() | |
| if model_filter != "All": | |
| filtered_df = filtered_df[filtered_df['Model_Name'] == model_filter] | |
| if library_filter != "All": | |
| filtered_df = filtered_df[filtered_df['Library'] == library_filter] | |
| return filtered_df | |
| custom_css = """ | |
| body { | |
| background-color: #f0fff0; | |
| font-family: 'Roboto', sans-serif; | |
| } | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .gradio-container .prose * { | |
| color: #00480a !important; | |
| } | |
| .gradio-container .prose h2, | |
| .gradio-container .prose h3 { | |
| color: #00480a !important; | |
| } | |
| .tabs { | |
| background-color: #e6ffd9; | |
| border-radius: 15px; | |
| overflow: hidden; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| } | |
| .tab-nav { | |
| background-color: #00480a; | |
| padding: 10px; | |
| } | |
| .tab-nav button { | |
| color: #cbff4d !important; | |
| background-color: #006400; | |
| border: none; | |
| padding: 10px 20px; | |
| margin-right: 5px; | |
| border-radius: 10px; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| } | |
| .tab-nav button:hover { | |
| background-color: #cbff4d; | |
| color: #00480a !important; | |
| } | |
| .tab-nav button.selected { | |
| background-color: #cbff4d; | |
| color: #00480a !important; | |
| font-weight: bold; | |
| } | |
| .gr-button-primary { | |
| background-color: #00480a !important; | |
| border-color: #00480a !important; | |
| color: #cbff4d !important; | |
| } | |
| .gr-button-primary:hover { | |
| background-color: #cbff4d !important; | |
| color: #00480a !important; | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.HTML(DESCRIPTION) | |
| gr.HTML(INTRODUCTION) | |
| with gr.Tabs(): | |
| with gr.TabItem("๐ Leaderboard"): | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown(choices=["All"] + df['Model_Name'].unique().tolist(), label="๐ท๏ธ Filter by Model_Name", value="All") | |
| library_dropdown = gr.Dropdown(choices=["All"] + df['Library'].unique().tolist(), label="๐ท๏ธ Filter by Library", value="All") | |
| leaderboard = gr.DataFrame(df) | |
| gr.HTML(HOW_WE_TESTED) | |
| model_dropdown.change(filter_and_search, inputs=[model_dropdown, library_dropdown], outputs=leaderboard) | |
| library_dropdown.change(filter_and_search, inputs=[model_dropdown, library_dropdown], outputs=leaderboard) | |
| demo.load(get_leaderboard_df, outputs=[leaderboard]) | |
| if __name__ == "__main__": | |
| demo.launch() |