| import copy as cp |
| import json |
| from collections import defaultdict |
| from urllib.request import urlopen |
|
|
| import gradio as gr |
| import numpy as np |
| import pandas as pd |
|
|
| from meta_data import META_FIELDS, URL, DATASETS_ALL, DATASETS_ESS |
|
|
|
|
| def listinstr(lst, s): |
| assert isinstance(lst, list) |
| for item in lst: |
| if item in s: |
| return True |
| return False |
|
|
|
|
| def upper_key(k): |
| if k == 'ocr': |
| return 'OCR' |
| elif '_' in k: |
| k = k.split('_') |
| k = [x[0].upper() + x[1:] for x in k] |
| k = ' '.join(k) |
| return k |
| else: |
| return k |
| |
|
|
| def load_results(): |
| data = json.loads(urlopen(URL).read()) |
| return data |
|
|
|
|
| def nth_large(val, vals): |
| return sum([1 for v in vals if v > val]) + 1 |
|
|
|
|
| def model_size_flag(sz, FIELDS): |
| if pd.isna(sz) and 'Unknown' in FIELDS: |
| return True |
| if pd.isna(sz): |
| return False |
| sz = int(sz) |
| if '<4B' in FIELDS and sz < 4: |
| return True |
| if '4B-10B' in FIELDS and sz >= 4 and sz < 10: |
| return True |
| if '10B-20B' in FIELDS and sz >= 10 and sz < 20: |
| return True |
| if '20B-40B' in FIELDS and sz >= 20 and sz < 40: |
| return True |
| if '>40B' in FIELDS and sz >= 40: |
| return True |
| return False |
|
|
|
|
| def model_type_flag(line, FIELDS): |
| if 'OpenSource' in FIELDS and line['OpenSource'] == 'Yes': |
| return True |
| if 'API' in FIELDS and line['OpenSource'] == 'No': |
| return True |
| return False |
|
|
|
|
| def BUILD_L1_DF(results): |
| check_box = {} |
| check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] |
| |
| check_box['required'] = ['Overall'] + DATASETS_ESS |
| check_box['all'] = ['Overall'] + DATASETS_ALL |
| type_map = defaultdict(lambda: 'number') |
| type_map['Method'] = 'html' |
| type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' |
| type_map['OpenSource'] = type_map['Verified'] = 'str' |
| check_box['type_map'] = type_map |
|
|
| df = generate_table(results) |
| return df, check_box |
|
|
|
|
| def BUILD_L2_DF(results, dataset): |
| res = defaultdict(list) |
| sub = [v for v in results.values() if dataset in v] |
| assert len(sub), dataset |
| fields = list(sub[0][dataset].keys()) |
|
|
| if dataset == 'WeMath': |
| non_overall_fields = [x for x in fields if 'Score' in x] |
| overall_fields = [x for x in fields if 'Score' not in x] |
| else: |
| non_overall_fields = [x for x in fields if 'Overall' not in x] |
| overall_fields = [x for x in fields if 'Overall' in x] |
|
|
| for m in results: |
| item = results[m] |
| if dataset not in item: |
| continue |
| for k in META_FIELDS: |
| if k == 'Param (B)': |
| param = item['META']['Parameters'] |
| res[k].append(float(param.replace('B', '')) if param != '' else None) |
| elif k == 'Method': |
| name, url = item['META']['Method'] |
| res[k].append(f'<a href="{url}">{name}</a>') |
| else: |
| s = item['META'][k].replace('\n', '<br>') |
| s = s.replace(' & ', '<br>') |
| res[k].append(s) |
|
|
| for d in overall_fields: |
| res[d].append(float(item[dataset][d])) |
| for d in non_overall_fields: |
| res[d].append(float(item[dataset][d])) |
| |
| df = pd.DataFrame(res) |
| all_fields = overall_fields + non_overall_fields |
| |
| required_fields = overall_fields if len(overall_fields) else non_overall_fields[:5] |
|
|
| if 'Score (Strict)' in df: |
| df = df.sort_values('Score (Strict)') |
| else: |
| df = df.sort_values('Overall') |
|
|
| df = df.iloc[::-1] |
|
|
| check_box = {} |
| check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] |
| check_box['required'] = required_fields |
| check_box['all'] = all_fields |
| type_map = defaultdict(lambda: 'number') |
| type_map['Method'] = 'html' |
| type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' |
| type_map['OpenSource'] = type_map['Verified'] = 'str' |
| check_box['type_map'] = type_map |
| return df, check_box |
|
|
|
|
| def generate_table(results): |
|
|
| res = defaultdict(list) |
| for i, m in enumerate(results): |
| item = results[m] |
| avg = 0 |
| for k in META_FIELDS: |
| if k == 'Param (B)': |
| param = item['META']['Parameters'] |
| res[k].append(float(param.replace('B', '')) if param != '' else None) |
| elif k == 'Method': |
| name, url = item['META']['Method'] |
| res[k].append(f'<a href="{url}">{name}</a>') |
| else: |
| s = item['META'][k].replace('\n', '<br>') |
| s = s.replace(' & ', '<br>') |
| res[k].append(s) |
|
|
|
|
| for d in DATASETS_ALL: |
| key_name = 'Overall' |
| if d == 'WeMath': |
| key_name = 'Score (Strict)' |
| if d in item: |
| val = float(item[d][key_name]) |
| val = float(f'{val:.1f}') |
| res[d].append(val) |
| else: |
| res[d].append(None) |
| if d in DATASETS_ESS: |
| if d in item and avg is not None: |
| avg += res[d][-1] |
| else: |
| avg = None |
|
|
| if avg is not None: |
| avg = float(f'{avg / len(DATASETS_ESS):.1f}') |
| |
| res['Overall'].append(avg) |
|
|
| df = pd.DataFrame(res) |
| overall_isna = df[pd.isna(df['Overall'])] |
| overall_notna = df[~pd.isna(df['Overall'])] |
| overall_notna = overall_notna.sort_values('Overall') |
| overall_notna = overall_notna.iloc[::-1] |
| overall_isna = overall_isna.sort_values('MathVista') |
| overall_isna = overall_isna.iloc[::-1] |
| df = pd.concat([overall_notna, overall_isna]) |
| |
| return df |
|
|