Spaces:
Running
Running
yuhangzang Claude Opus 4.7 (1M context) commited on
Commit Β·
5bda49b
1
Parent(s): d5ebc54
Add modality/size/open-source filters and new model results
Browse filesAdd Claude 4.5 Opus, Gemini-3 Pro, and Qwen3.5-397B-A17B; introduce
modality and params_b fields with Gradio filter controls.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app.py +56 -31
- leaderboard_data.json +64 -0
- src/json_leaderboard.py +60 -19
app.py
CHANGED
|
@@ -2,7 +2,12 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
-
from src.json_leaderboard import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from src.about import (
|
| 7 |
CITATION_BUTTON_LABEL,
|
| 8 |
CITATION_BUTTON_TEXT,
|
|
@@ -14,39 +19,35 @@ from src.about import (
|
|
| 14 |
from src.display.css_html_js import custom_css
|
| 15 |
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
headers=list(df.columns),
|
| 28 |
-
datatype=["html", "str", "html", "str", "str", "str", "str"],
|
| 29 |
-
interactive=False,
|
| 30 |
-
wrap=True
|
| 31 |
)
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def get_stats_display():
|
| 35 |
"""Get formatted statistics for display"""
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
if not stats:
|
| 40 |
return "No statistics available"
|
| 41 |
-
|
| 42 |
-
|
| 43 |
### π Leaderboard Statistics
|
| 44 |
- **Total Models**: {stats['total_models']}
|
| 45 |
- **Best Score**: {stats['max_acc']:.1f}
|
| 46 |
- **Lowest Score**: {stats['min_acc']:.1f}
|
| 47 |
"""
|
| 48 |
-
|
| 49 |
-
return stats_text
|
| 50 |
|
| 51 |
|
| 52 |
# Create the Gradio interface
|
|
@@ -56,17 +57,41 @@ with demo:
|
|
| 56 |
gr.HTML(TITLE)
|
| 57 |
gr.HTML(LINKS_AND_INFO)
|
| 58 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 59 |
-
|
| 60 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 61 |
with gr.TabItem("π
Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
| 62 |
-
# Statistics display
|
| 63 |
stats_display = gr.Markdown(get_stats_display())
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
with gr.Row():
|
| 71 |
with gr.Column():
|
| 72 |
gr.Markdown("## π Citation")
|
|
@@ -82,4 +107,4 @@ with demo:
|
|
| 82 |
|
| 83 |
|
| 84 |
if __name__ == "__main__":
|
| 85 |
-
demo.launch()
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
+
from src.json_leaderboard import (
|
| 6 |
+
create_leaderboard_df,
|
| 7 |
+
get_leaderboard_stats,
|
| 8 |
+
SIZE_BANDS,
|
| 9 |
+
MODALITIES,
|
| 10 |
+
)
|
| 11 |
from src.about import (
|
| 12 |
CITATION_BUTTON_LABEL,
|
| 13 |
CITATION_BUTTON_TEXT,
|
|
|
|
| 19 |
from src.display.css_html_js import custom_css
|
| 20 |
|
| 21 |
|
| 22 |
+
JSON_PATH = str(Path(__file__).parent / "leaderboard_data.json")
|
| 23 |
+
TABLE_DATATYPES = ["html", "str", "html", "str", "str", "str", "str"]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def filter_leaderboard(open_source_only, size_band, modality):
|
| 27 |
+
df = create_leaderboard_df(
|
| 28 |
+
JSON_PATH,
|
| 29 |
+
open_source_only=open_source_only,
|
| 30 |
+
size_band=size_band,
|
| 31 |
+
modality=modality,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
)
|
| 33 |
+
if df.empty:
|
| 34 |
+
return pd.DataFrame({"Result": ["No models match the selected filters."]})
|
| 35 |
+
return df
|
| 36 |
|
| 37 |
|
| 38 |
def get_stats_display():
|
| 39 |
"""Get formatted statistics for display"""
|
| 40 |
+
stats = get_leaderboard_stats(JSON_PATH)
|
| 41 |
+
|
|
|
|
| 42 |
if not stats:
|
| 43 |
return "No statistics available"
|
| 44 |
+
|
| 45 |
+
return f"""
|
| 46 |
### π Leaderboard Statistics
|
| 47 |
- **Total Models**: {stats['total_models']}
|
| 48 |
- **Best Score**: {stats['max_acc']:.1f}
|
| 49 |
- **Lowest Score**: {stats['min_acc']:.1f}
|
| 50 |
"""
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
# Create the Gradio interface
|
|
|
|
| 57 |
gr.HTML(TITLE)
|
| 58 |
gr.HTML(LINKS_AND_INFO)
|
| 59 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 60 |
+
|
| 61 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 62 |
with gr.TabItem("π
Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
|
|
|
| 63 |
stats_display = gr.Markdown(get_stats_display())
|
| 64 |
+
|
| 65 |
+
with gr.Row():
|
| 66 |
+
open_source_filter = gr.Checkbox(
|
| 67 |
+
label="Open-source only",
|
| 68 |
+
value=False,
|
| 69 |
+
)
|
| 70 |
+
size_filter = gr.Dropdown(
|
| 71 |
+
choices=SIZE_BANDS,
|
| 72 |
+
value="All",
|
| 73 |
+
label="Model size (activated params)",
|
| 74 |
+
)
|
| 75 |
+
modality_filter = gr.Radio(
|
| 76 |
+
choices=MODALITIES,
|
| 77 |
+
value="All",
|
| 78 |
+
label="Modality",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
leaderboard_table = gr.Dataframe(
|
| 82 |
+
value=filter_leaderboard(False, "All", "All"),
|
| 83 |
+
datatype=TABLE_DATATYPES,
|
| 84 |
+
interactive=False,
|
| 85 |
+
wrap=True,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
for control in (open_source_filter, size_filter, modality_filter):
|
| 89 |
+
control.change(
|
| 90 |
+
fn=filter_leaderboard,
|
| 91 |
+
inputs=[open_source_filter, size_filter, modality_filter],
|
| 92 |
+
outputs=leaderboard_table,
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
with gr.Row():
|
| 96 |
with gr.Column():
|
| 97 |
gr.Markdown("## π Citation")
|
|
|
|
| 107 |
|
| 108 |
|
| 109 |
if __name__ == "__main__":
|
| 110 |
+
demo.launch()
|
leaderboard_data.json
CHANGED
|
@@ -1,10 +1,48 @@
|
|
| 1 |
{
|
| 2 |
"leaderboard": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
{
|
| 4 |
"model": "GLM-4.6V",
|
| 5 |
"link": "https://huggingface.co/zai-org/GLM-4.6V",
|
| 6 |
"hf": "https://huggingface.co/zai-org/GLM-4.6V",
|
| 7 |
"params": "12B activated (106B total)",
|
|
|
|
|
|
|
| 8 |
"open_source": true,
|
| 9 |
"acc": 54.9,
|
| 10 |
"release_date": "2025-12",
|
|
@@ -15,6 +53,8 @@
|
|
| 15 |
"link": "https://www.teleai.com.cn/",
|
| 16 |
"hf": "-",
|
| 17 |
"params": "-",
|
|
|
|
|
|
|
| 18 |
"open_source": false,
|
| 19 |
"acc": 56.1,
|
| 20 |
"release_date": "2026-01",
|
|
@@ -25,6 +65,8 @@
|
|
| 25 |
"link": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking",
|
| 26 |
"hf": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking",
|
| 27 |
"params": "22B activated (235B total)",
|
|
|
|
|
|
|
| 28 |
"open_source": true,
|
| 29 |
"acc": 56.2,
|
| 30 |
"release_date": "2025-09",
|
|
@@ -35,6 +77,8 @@
|
|
| 35 |
"link": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct",
|
| 36 |
"hf": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct",
|
| 37 |
"params": "22B activated (235B total)",
|
|
|
|
|
|
|
| 38 |
"open_source": true,
|
| 39 |
"acc": 57.0,
|
| 40 |
"release_date": "2025-09",
|
|
@@ -45,6 +89,8 @@
|
|
| 45 |
"link": "https://arxiv.org/pdf/2507.01006",
|
| 46 |
"hf": "https://huggingface.co/zai-org/GLM-4.5V",
|
| 47 |
"params": "12B activated (106B total)",
|
|
|
|
|
|
|
| 48 |
"open_source": true,
|
| 49 |
"acc": 44.7,
|
| 50 |
"release_date": "2025-07",
|
|
@@ -55,6 +101,8 @@
|
|
| 55 |
"link": "https://arxiv.org/pdf/2507.01006",
|
| 56 |
"hf": "https://huggingface.co/THUDM/GLM-4.1V-9B-Thinking",
|
| 57 |
"params": "9B",
|
|
|
|
|
|
|
| 58 |
"open_source": true,
|
| 59 |
"acc": 42.4,
|
| 60 |
"release_date": "2025-07",
|
|
@@ -65,6 +113,8 @@
|
|
| 65 |
"link": "https://openai.com/index/gpt-4-1/",
|
| 66 |
"hf": "-",
|
| 67 |
"params": "-",
|
|
|
|
|
|
|
| 68 |
"open_source": false,
|
| 69 |
"acc": 49.7,
|
| 70 |
"release_date": "2025-04",
|
|
@@ -75,6 +125,8 @@
|
|
| 75 |
"link": "https://arxiv.org/abs/2410.21276",
|
| 76 |
"hf": "-",
|
| 77 |
"params": "-",
|
|
|
|
|
|
|
| 78 |
"open_source": false,
|
| 79 |
"acc": 46.3,
|
| 80 |
"release_date": "2024-11",
|
|
@@ -85,6 +137,8 @@
|
|
| 85 |
"link": "https://arxiv.org/pdf/2504.07491",
|
| 86 |
"hf": "https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking-2506",
|
| 87 |
"params": "2.8B activated (16B total)",
|
|
|
|
|
|
|
| 88 |
"open_source": true,
|
| 89 |
"acc": 42.1,
|
| 90 |
"release_date": "2025-06",
|
|
@@ -95,6 +149,8 @@
|
|
| 95 |
"link": "https://arxiv.org/pdf/2504.07491",
|
| 96 |
"hf": "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct",
|
| 97 |
"params": "2.8B activated (16B total)",
|
|
|
|
|
|
|
| 98 |
"open_source": true,
|
| 99 |
"acc": 35.1,
|
| 100 |
"release_date": "2025-04",
|
|
@@ -105,6 +161,8 @@
|
|
| 105 |
"link": "https://arxiv.org/abs/2502.13923",
|
| 106 |
"hf": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct",
|
| 107 |
"params": "72B",
|
|
|
|
|
|
|
| 108 |
"open_source": true,
|
| 109 |
"acc": 35.2,
|
| 110 |
"release_date": "2025-02",
|
|
@@ -115,6 +173,8 @@
|
|
| 115 |
"link": "https://arxiv.org/abs/2502.13923",
|
| 116 |
"hf": "https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct",
|
| 117 |
"params": "7B",
|
|
|
|
|
|
|
| 118 |
"open_source": true,
|
| 119 |
"acc": 25.1,
|
| 120 |
"release_date": "2025-02",
|
|
@@ -125,6 +185,8 @@
|
|
| 125 |
"link": "https://arxiv.org/pdf/2501.08313",
|
| 126 |
"hf": "https://huggingface.co/MiniMaxAI/MiniMax-VL-01",
|
| 127 |
"params": "45.9B activated (456B total)",
|
|
|
|
|
|
|
| 128 |
"open_source": true,
|
| 129 |
"acc": 32.5,
|
| 130 |
"release_date": "2025-01",
|
|
@@ -135,6 +197,8 @@
|
|
| 135 |
"link": "https://arxiv.org/pdf/2410.05993",
|
| 136 |
"hf": "https://huggingface.co/rhymes-ai/Aria",
|
| 137 |
"params": "3.9B activated (25.3B total)",
|
|
|
|
|
|
|
| 138 |
"open_source": true,
|
| 139 |
"acc": 28.3,
|
| 140 |
"release_date": "2024-10",
|
|
|
|
| 1 |
{
|
| 2 |
"leaderboard": [
|
| 3 |
+
{
|
| 4 |
+
"model": "Claude 4.5 Opus",
|
| 5 |
+
"link": "https://www.anthropic.com/news/claude-opus-4-5",
|
| 6 |
+
"hf": "-",
|
| 7 |
+
"params": "-",
|
| 8 |
+
"params_b": null,
|
| 9 |
+
"modality": "VL",
|
| 10 |
+
"open_source": false,
|
| 11 |
+
"acc": 61.9,
|
| 12 |
+
"release_date": "2025-11",
|
| 13 |
+
"moe": "-"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"model": "Gemini-3 Pro",
|
| 17 |
+
"link": "https://blog.google/technology/google-deepmind/gemini-3/",
|
| 18 |
+
"hf": "-",
|
| 19 |
+
"params": "-",
|
| 20 |
+
"params_b": null,
|
| 21 |
+
"modality": "VL",
|
| 22 |
+
"open_source": false,
|
| 23 |
+
"acc": 60.5,
|
| 24 |
+
"release_date": "2025-11",
|
| 25 |
+
"moe": "-"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"model": "Qwen3.5-397B-A17B",
|
| 29 |
+
"link": "https://huggingface.co/Qwen/Qwen3.5-397B-A17B",
|
| 30 |
+
"hf": "https://huggingface.co/Qwen/Qwen3.5-397B-A17B",
|
| 31 |
+
"params": "17B activated (397B total)",
|
| 32 |
+
"params_b": 17,
|
| 33 |
+
"modality": "VL",
|
| 34 |
+
"open_source": true,
|
| 35 |
+
"acc": 61.5,
|
| 36 |
+
"release_date": "2026-02",
|
| 37 |
+
"moe": true
|
| 38 |
+
},
|
| 39 |
{
|
| 40 |
"model": "GLM-4.6V",
|
| 41 |
"link": "https://huggingface.co/zai-org/GLM-4.6V",
|
| 42 |
"hf": "https://huggingface.co/zai-org/GLM-4.6V",
|
| 43 |
"params": "12B activated (106B total)",
|
| 44 |
+
"params_b": 12,
|
| 45 |
+
"modality": "VL",
|
| 46 |
"open_source": true,
|
| 47 |
"acc": 54.9,
|
| 48 |
"release_date": "2025-12",
|
|
|
|
| 53 |
"link": "https://www.teleai.com.cn/",
|
| 54 |
"hf": "-",
|
| 55 |
"params": "-",
|
| 56 |
+
"params_b": null,
|
| 57 |
+
"modality": "VL",
|
| 58 |
"open_source": false,
|
| 59 |
"acc": 56.1,
|
| 60 |
"release_date": "2026-01",
|
|
|
|
| 65 |
"link": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking",
|
| 66 |
"hf": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking",
|
| 67 |
"params": "22B activated (235B total)",
|
| 68 |
+
"params_b": 22,
|
| 69 |
+
"modality": "VL",
|
| 70 |
"open_source": true,
|
| 71 |
"acc": 56.2,
|
| 72 |
"release_date": "2025-09",
|
|
|
|
| 77 |
"link": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct",
|
| 78 |
"hf": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct",
|
| 79 |
"params": "22B activated (235B total)",
|
| 80 |
+
"params_b": 22,
|
| 81 |
+
"modality": "VL",
|
| 82 |
"open_source": true,
|
| 83 |
"acc": 57.0,
|
| 84 |
"release_date": "2025-09",
|
|
|
|
| 89 |
"link": "https://arxiv.org/pdf/2507.01006",
|
| 90 |
"hf": "https://huggingface.co/zai-org/GLM-4.5V",
|
| 91 |
"params": "12B activated (106B total)",
|
| 92 |
+
"params_b": 12,
|
| 93 |
+
"modality": "VL",
|
| 94 |
"open_source": true,
|
| 95 |
"acc": 44.7,
|
| 96 |
"release_date": "2025-07",
|
|
|
|
| 101 |
"link": "https://arxiv.org/pdf/2507.01006",
|
| 102 |
"hf": "https://huggingface.co/THUDM/GLM-4.1V-9B-Thinking",
|
| 103 |
"params": "9B",
|
| 104 |
+
"params_b": 9,
|
| 105 |
+
"modality": "VL",
|
| 106 |
"open_source": true,
|
| 107 |
"acc": 42.4,
|
| 108 |
"release_date": "2025-07",
|
|
|
|
| 113 |
"link": "https://openai.com/index/gpt-4-1/",
|
| 114 |
"hf": "-",
|
| 115 |
"params": "-",
|
| 116 |
+
"params_b": null,
|
| 117 |
+
"modality": "VL",
|
| 118 |
"open_source": false,
|
| 119 |
"acc": 49.7,
|
| 120 |
"release_date": "2025-04",
|
|
|
|
| 125 |
"link": "https://arxiv.org/abs/2410.21276",
|
| 126 |
"hf": "-",
|
| 127 |
"params": "-",
|
| 128 |
+
"params_b": null,
|
| 129 |
+
"modality": "Omni",
|
| 130 |
"open_source": false,
|
| 131 |
"acc": 46.3,
|
| 132 |
"release_date": "2024-11",
|
|
|
|
| 137 |
"link": "https://arxiv.org/pdf/2504.07491",
|
| 138 |
"hf": "https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking-2506",
|
| 139 |
"params": "2.8B activated (16B total)",
|
| 140 |
+
"params_b": 2.8,
|
| 141 |
+
"modality": "VL",
|
| 142 |
"open_source": true,
|
| 143 |
"acc": 42.1,
|
| 144 |
"release_date": "2025-06",
|
|
|
|
| 149 |
"link": "https://arxiv.org/pdf/2504.07491",
|
| 150 |
"hf": "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct",
|
| 151 |
"params": "2.8B activated (16B total)",
|
| 152 |
+
"params_b": 2.8,
|
| 153 |
+
"modality": "VL",
|
| 154 |
"open_source": true,
|
| 155 |
"acc": 35.1,
|
| 156 |
"release_date": "2025-04",
|
|
|
|
| 161 |
"link": "https://arxiv.org/abs/2502.13923",
|
| 162 |
"hf": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct",
|
| 163 |
"params": "72B",
|
| 164 |
+
"params_b": 72,
|
| 165 |
+
"modality": "VL",
|
| 166 |
"open_source": true,
|
| 167 |
"acc": 35.2,
|
| 168 |
"release_date": "2025-02",
|
|
|
|
| 173 |
"link": "https://arxiv.org/abs/2502.13923",
|
| 174 |
"hf": "https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct",
|
| 175 |
"params": "7B",
|
| 176 |
+
"params_b": 7,
|
| 177 |
+
"modality": "VL",
|
| 178 |
"open_source": true,
|
| 179 |
"acc": 25.1,
|
| 180 |
"release_date": "2025-02",
|
|
|
|
| 185 |
"link": "https://arxiv.org/pdf/2501.08313",
|
| 186 |
"hf": "https://huggingface.co/MiniMaxAI/MiniMax-VL-01",
|
| 187 |
"params": "45.9B activated (456B total)",
|
| 188 |
+
"params_b": 45.9,
|
| 189 |
+
"modality": "VL",
|
| 190 |
"open_source": true,
|
| 191 |
"acc": 32.5,
|
| 192 |
"release_date": "2025-01",
|
|
|
|
| 197 |
"link": "https://arxiv.org/pdf/2410.05993",
|
| 198 |
"hf": "https://huggingface.co/rhymes-ai/Aria",
|
| 199 |
"params": "3.9B activated (25.3B total)",
|
| 200 |
+
"params_b": 3.9,
|
| 201 |
+
"modality": "VL",
|
| 202 |
"open_source": true,
|
| 203 |
"acc": 28.3,
|
| 204 |
"release_date": "2024-10",
|
src/json_leaderboard.py
CHANGED
|
@@ -3,6 +3,10 @@ import pandas as pd
|
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def load_leaderboard_from_json(json_path="leaderboard_data.json"):
|
| 7 |
"""Load leaderboard data from JSON file"""
|
| 8 |
try:
|
|
@@ -17,20 +21,58 @@ def load_leaderboard_from_json(json_path="leaderboard_data.json"):
|
|
| 17 |
return []
|
| 18 |
|
| 19 |
|
| 20 |
-
def
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
leaderboard_data = load_leaderboard_from_json(json_path)
|
| 23 |
-
|
| 24 |
if not leaderboard_data:
|
| 25 |
return pd.DataFrame()
|
| 26 |
-
|
| 27 |
-
# Convert to DataFrame
|
| 28 |
df = pd.DataFrame(leaderboard_data)
|
| 29 |
-
|
| 30 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
df = df.sort_values('acc', ascending=False).reset_index(drop=True)
|
| 32 |
-
|
| 33 |
-
# Add ranking icons and make model names clickable links to papers
|
| 34 |
def add_ranking_icon_and_link(index, model_name, paper_link):
|
| 35 |
if index == 0:
|
| 36 |
return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>'
|
|
@@ -40,37 +82,36 @@ def create_leaderboard_df(json_path="leaderboard_data.json"):
|
|
| 40 |
return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>'
|
| 41 |
else:
|
| 42 |
return f'<a href="{paper_link}" target="_blank">{model_name}</a>'
|
| 43 |
-
|
| 44 |
-
# Format the DataFrame for display
|
| 45 |
display_df = pd.DataFrame({
|
| 46 |
'Model': [add_ranking_icon_and_link(i, model, link) for i, (model, link) in enumerate(zip(df['model'], df['link']))],
|
| 47 |
'Release Date': df['release_date'],
|
| 48 |
'HF Model': df['hf'].apply(lambda x: f'<a href="{x}" target="_blank">π€</a>' if x != "-" else "-"),
|
| 49 |
-
'
|
| 50 |
'Parameters': df['params'],
|
| 51 |
'Open Source': df['open_source'].apply(lambda x: 'β' if x else 'β'),
|
| 52 |
'ACC Score': df['acc'].apply(lambda x: f"{x:.1f}")
|
| 53 |
})
|
| 54 |
-
|
| 55 |
return display_df
|
| 56 |
|
| 57 |
|
| 58 |
def get_leaderboard_stats(json_path="leaderboard_data.json"):
|
| 59 |
"""Get statistics about the leaderboard"""
|
| 60 |
leaderboard_data = load_leaderboard_from_json(json_path)
|
| 61 |
-
|
| 62 |
if not leaderboard_data:
|
| 63 |
return {}
|
| 64 |
-
|
| 65 |
df = pd.DataFrame(leaderboard_data)
|
| 66 |
-
|
| 67 |
stats = {
|
| 68 |
'total_models': len(df),
|
| 69 |
-
'open_source_models': df['open_source'].sum(),
|
| 70 |
-
'moe_models': df['moe'].apply(lambda x: 1 if x is True else 0).sum(),
|
| 71 |
'avg_acc': df['acc'].mean(),
|
| 72 |
'max_acc': df['acc'].max(),
|
| 73 |
'min_acc': df['acc'].min()
|
| 74 |
}
|
| 75 |
-
|
| 76 |
return stats
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
|
| 6 |
+
SIZE_BANDS = ["All", "<10B", "10-32B", "32-100B", ">100B", "Unknown"]
|
| 7 |
+
MODALITIES = ["All", "VL", "Omni"]
|
| 8 |
+
|
| 9 |
+
|
| 10 |
def load_leaderboard_from_json(json_path="leaderboard_data.json"):
|
| 11 |
"""Load leaderboard data from JSON file"""
|
| 12 |
try:
|
|
|
|
| 21 |
return []
|
| 22 |
|
| 23 |
|
| 24 |
+
def _in_size_band(params_b, band):
|
| 25 |
+
if band == "All":
|
| 26 |
+
return True
|
| 27 |
+
if params_b is None or pd.isna(params_b):
|
| 28 |
+
return band == "Unknown"
|
| 29 |
+
if band == "<10B":
|
| 30 |
+
return params_b < 10
|
| 31 |
+
if band == "10-32B":
|
| 32 |
+
return 10 <= params_b < 32
|
| 33 |
+
if band == "32-100B":
|
| 34 |
+
return 32 <= params_b < 100
|
| 35 |
+
if band == ">100B":
|
| 36 |
+
return params_b >= 100
|
| 37 |
+
if band == "Unknown":
|
| 38 |
+
return False
|
| 39 |
+
return True
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def create_leaderboard_df(
|
| 43 |
+
json_path="leaderboard_data.json",
|
| 44 |
+
open_source_only=False,
|
| 45 |
+
size_band="All",
|
| 46 |
+
modality="All",
|
| 47 |
+
):
|
| 48 |
+
"""Create a pandas DataFrame from JSON leaderboard data, with optional filters."""
|
| 49 |
leaderboard_data = load_leaderboard_from_json(json_path)
|
| 50 |
+
|
| 51 |
if not leaderboard_data:
|
| 52 |
return pd.DataFrame()
|
| 53 |
+
|
|
|
|
| 54 |
df = pd.DataFrame(leaderboard_data)
|
| 55 |
+
|
| 56 |
+
# Backfill optional columns for older JSON entries
|
| 57 |
+
if 'params_b' not in df.columns:
|
| 58 |
+
df['params_b'] = None
|
| 59 |
+
if 'modality' not in df.columns:
|
| 60 |
+
df['modality'] = "VL"
|
| 61 |
+
|
| 62 |
+
# Apply filters
|
| 63 |
+
if open_source_only:
|
| 64 |
+
df = df[df['open_source'] == True]
|
| 65 |
+
if modality and modality != "All":
|
| 66 |
+
df = df[df['modality'] == modality]
|
| 67 |
+
if size_band and size_band != "All":
|
| 68 |
+
df = df[df['params_b'].apply(lambda v: _in_size_band(v, size_band))]
|
| 69 |
+
|
| 70 |
+
if df.empty:
|
| 71 |
+
return pd.DataFrame()
|
| 72 |
+
|
| 73 |
+
# Sort by ACC score (descending) so medal icons reflect the filtered view
|
| 74 |
df = df.sort_values('acc', ascending=False).reset_index(drop=True)
|
| 75 |
+
|
|
|
|
| 76 |
def add_ranking_icon_and_link(index, model_name, paper_link):
|
| 77 |
if index == 0:
|
| 78 |
return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>'
|
|
|
|
| 82 |
return f'π₯ <a href="{paper_link}" target="_blank">{model_name}</a>'
|
| 83 |
else:
|
| 84 |
return f'<a href="{paper_link}" target="_blank">{model_name}</a>'
|
| 85 |
+
|
|
|
|
| 86 |
display_df = pd.DataFrame({
|
| 87 |
'Model': [add_ranking_icon_and_link(i, model, link) for i, (model, link) in enumerate(zip(df['model'], df['link']))],
|
| 88 |
'Release Date': df['release_date'],
|
| 89 |
'HF Model': df['hf'].apply(lambda x: f'<a href="{x}" target="_blank">π€</a>' if x != "-" else "-"),
|
| 90 |
+
'Modality': df['modality'],
|
| 91 |
'Parameters': df['params'],
|
| 92 |
'Open Source': df['open_source'].apply(lambda x: 'β' if x else 'β'),
|
| 93 |
'ACC Score': df['acc'].apply(lambda x: f"{x:.1f}")
|
| 94 |
})
|
| 95 |
+
|
| 96 |
return display_df
|
| 97 |
|
| 98 |
|
| 99 |
def get_leaderboard_stats(json_path="leaderboard_data.json"):
|
| 100 |
"""Get statistics about the leaderboard"""
|
| 101 |
leaderboard_data = load_leaderboard_from_json(json_path)
|
| 102 |
+
|
| 103 |
if not leaderboard_data:
|
| 104 |
return {}
|
| 105 |
+
|
| 106 |
df = pd.DataFrame(leaderboard_data)
|
| 107 |
+
|
| 108 |
stats = {
|
| 109 |
'total_models': len(df),
|
| 110 |
+
'open_source_models': int(df['open_source'].sum()),
|
| 111 |
+
'moe_models': int(df['moe'].apply(lambda x: 1 if x is True else 0).sum()),
|
| 112 |
'avg_acc': df['acc'].mean(),
|
| 113 |
'max_acc': df['acc'].max(),
|
| 114 |
'min_acc': df['acc'].min()
|
| 115 |
}
|
| 116 |
+
|
| 117 |
return stats
|