import copy as cp import json from collections import defaultdict from urllib.request import urlopen import gradio as gr import numpy as np import pandas as pd from meta_data import MMBENCH_FIELDS, META_FIELDS, URL def listinstr(lst, s): assert isinstance(lst, list) for item in lst: if item in s: return True return False def upper_key(k): if k == 'ocr': return 'OCR' elif '_' in k: k = k.split('_') k = [x[0].upper() + x[1:] for x in k] k = ' '.join(k) return k else: return k def load_results(): data = json.loads(urlopen(URL).read()) names = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'CCBench', 'MMBench_TEST_EN', 'MMBench_TEST_CN'] skip_keys = ['Method', 'Parameters', 'Language Model', 'Vision Model', 'Org', 'Time', 'Verified', 'OpenSource', 'key'] META_MAP = data['META_MAP'] for n in names: print(n) res_map = {x['Method'][0]: {upper_key(k): v for k, v in x.items() if k not in skip_keys} for x in data[n + '_Data']} for r in res_map: META_MAP[r][n] = res_map[r] return META_MAP def nth_large(val, vals): return sum([1 for v in vals if v > val]) + 1 def model_size_flag(sz, FIELDS): if pd.isna(sz) and 'Unknown' in FIELDS: return True if pd.isna(sz): return False sz = int(sz) if '<4B' in FIELDS and sz < 4: return True if '4B-10B' in FIELDS and sz >= 4 and sz < 10: return True if '10B-20B' in FIELDS and sz >= 10 and sz < 20: return True if '20B-40B' in FIELDS and sz >= 20 and sz < 40: return True if '>40B' in FIELDS and sz >= 40: return True return False def model_type_flag(line, FIELDS): if 'Public' in FIELDS and line['OpenSource'] == 'Yes': return True if 'Private' in FIELDS and line['OpenSource'] == 'No': return True if 'Verified' in FIELDS and line['Verified'] == 'Yes': return True return False def BUILD_L1_DF(results): check_box = {} check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] # revise there to set default dataset check_box['required'] = ['MMBench_TEST_V11', 'MMBench_TEST', 'CCBench'] check_box['avg'] = ['MMBench_TEST_V11', 'MMBench_TEST'] check_box['all'] = check_box['avg'] + MMBENCH_FIELDS type_map = defaultdict(lambda: 'number') type_map['Method'] = 'html' type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' type_map['OpenSource'] = type_map['Verified'] = 'str' check_box['type_map'] = type_map df = generate_table(results) return df, check_box def BUILD_L2_DF(results, dataset): res = defaultdict(list) sub = [v for v in results.values() if dataset in v] assert len(sub) fields = list(sub[0][dataset].keys()) non_overall_fields = [x for x in fields if 'Overall' not in x] overall_fields = [x for x in fields if 'Overall' in x] for m in results: item = results[m] if dataset not in item: continue for k in META_FIELDS: if k == 'Param (B)': param = item['Parameters'] res[k].append(float(param.replace('B', '')) if param != '' else None) elif k == 'Method': name, url = item['Method'] res[k].append(f'{name}') else: s = item[k].replace('\n', '
') s = s.replace(' & ', '
') res[k].append(s) for d in overall_fields: res[d].append(float(item[dataset][d])) for d in non_overall_fields: res[d].append(float(item[dataset][d])) df = pd.DataFrame(res) all_fields = overall_fields + non_overall_fields # Use the first 5 non-overall fields as required fields required_fields = overall_fields if len(overall_fields) else non_overall_fields[:5] df = df.sort_values('Overall') df = df.iloc[::-1] check_box = {} check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] check_box['required'] = required_fields check_box['all'] = all_fields type_map = defaultdict(lambda: 'number') type_map['Method'] = 'html' type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' type_map['OpenSource'] = type_map['Verified'] = 'str' check_box['type_map'] = type_map return df, check_box def generate_table(results): res = defaultdict(list) for i, m in enumerate(results): item = results[m] for k in META_FIELDS: if k == 'Param (B)': param = item['Parameters'] res[k].append(float(param.replace('B', '')) if param != '' else None) elif k == 'Method': name, url = item['Method'] res[k].append(f'{name}') else: s = item[k].replace('\n', '
') s = s.replace(' & ', '
') res[k].append(s) for d in ['MMBench_TEST_V11', 'MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'CCBench', 'MMBench_TEST', 'MMBench_TEST_EN', 'MMBench_TEST_CN']: key_name = 'Overall' if d != 'OCRBench' else 'Final Score' # Every Model should have MMBench_V11 results if d == 'MMBench_TEST_V11': if 'MMBench_TEST_EN_V11' in item and 'MMBench_TEST_CN_V11' in item: val = item['MMBench_TEST_EN_V11'][key_name] + item['MMBench_TEST_CN_V11'][key_name] val = val / 2 val = float(f'{val:.1f}') res[d].append(val) else: res[d].append(None) elif d == 'MMBench_TEST': if 'MMBench_TEST_EN' in item and 'MMBench_TEST_CN' in item: val = float(item['MMBench_TEST_EN'][key_name]) + float(item['MMBench_TEST_CN'][key_name]) val = val / 2 val = float(f'{val:.1f}') res[d].append(val) else: res[d].append(None) elif d in item: val = float(item[d][key_name]) val = float(f'{val:.1f}') res[d].append(val) else: res[d].append(None) df = pd.DataFrame(res) df_list = [] for k in [ 'MMBench_TEST_V11', 'MMBench_TEST', 'MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench' ]: if len(df) == 0: break valid, missing = df[~pd.isna(df[k])], df[pd.isna(df[k])] valid = valid.sort_values(k) valid = valid.iloc[::-1] df_list.append(valid) df = missing df = pd.concat(df_list) return df