import copy as cp
import json
from collections import defaultdict
from urllib.request import urlopen
import gradio as gr
import numpy as np
import pandas as pd
from meta_data import MMBENCH_FIELDS, META_FIELDS, URL
def listinstr(lst, s):
assert isinstance(lst, list)
for item in lst:
if item in s:
return True
return False
def upper_key(k):
if k == 'ocr':
return 'OCR'
elif '_' in k:
k = k.split('_')
k = [x[0].upper() + x[1:] for x in k]
k = ' '.join(k)
return k
else:
return k
def load_results():
data = json.loads(urlopen(URL).read())
names = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'CCBench', 'MMBench_TEST_EN', 'MMBench_TEST_CN']
skip_keys = ['Method', 'Parameters', 'Language Model', 'Vision Model', 'Org', 'Time', 'Verified', 'OpenSource', 'key']
META_MAP = data['META_MAP']
for n in names:
print(n)
res_map = {x['Method'][0]: {upper_key(k): v for k, v in x.items() if k not in skip_keys} for x in data[n + '_Data']}
for r in res_map:
META_MAP[r][n] = res_map[r]
return META_MAP
def nth_large(val, vals):
return sum([1 for v in vals if v > val]) + 1
def model_size_flag(sz, FIELDS):
if pd.isna(sz) and 'Unknown' in FIELDS:
return True
if pd.isna(sz):
return False
sz = int(sz)
if '<4B' in FIELDS and sz < 4:
return True
if '4B-10B' in FIELDS and sz >= 4 and sz < 10:
return True
if '10B-20B' in FIELDS and sz >= 10 and sz < 20:
return True
if '20B-40B' in FIELDS and sz >= 20 and sz < 40:
return True
if '>40B' in FIELDS and sz >= 40:
return True
return False
def model_type_flag(line, FIELDS):
if 'Public' in FIELDS and line['OpenSource'] == 'Yes':
return True
if 'Private' in FIELDS and line['OpenSource'] == 'No':
return True
if 'Verified' in FIELDS and line['Verified'] == 'Yes':
return True
return False
def BUILD_L1_DF(results):
check_box = {}
check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model']
# revise there to set default dataset
check_box['required'] = ['MMBench_TEST_V11', 'MMBench_TEST', 'CCBench']
check_box['avg'] = ['MMBench_TEST_V11', 'MMBench_TEST']
check_box['all'] = check_box['avg'] + MMBENCH_FIELDS
type_map = defaultdict(lambda: 'number')
type_map['Method'] = 'html'
type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html'
type_map['OpenSource'] = type_map['Verified'] = 'str'
check_box['type_map'] = type_map
df = generate_table(results)
return df, check_box
def BUILD_L2_DF(results, dataset):
res = defaultdict(list)
sub = [v for v in results.values() if dataset in v]
assert len(sub)
fields = list(sub[0][dataset].keys())
non_overall_fields = [x for x in fields if 'Overall' not in x]
overall_fields = [x for x in fields if 'Overall' in x]
for m in results:
item = results[m]
if dataset not in item:
continue
for k in META_FIELDS:
if k == 'Param (B)':
param = item['Parameters']
res[k].append(float(param.replace('B', '')) if param != '' else None)
elif k == 'Method':
name, url = item['Method']
res[k].append(f'{name}')
else:
s = item[k].replace('\n', '
')
s = s.replace(' & ', '
')
res[k].append(s)
for d in overall_fields:
res[d].append(float(item[dataset][d]))
for d in non_overall_fields:
res[d].append(float(item[dataset][d]))
df = pd.DataFrame(res)
all_fields = overall_fields + non_overall_fields
# Use the first 5 non-overall fields as required fields
required_fields = overall_fields if len(overall_fields) else non_overall_fields[:5]
df = df.sort_values('Overall')
df = df.iloc[::-1]
check_box = {}
check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model']
check_box['required'] = required_fields
check_box['all'] = all_fields
type_map = defaultdict(lambda: 'number')
type_map['Method'] = 'html'
type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html'
type_map['OpenSource'] = type_map['Verified'] = 'str'
check_box['type_map'] = type_map
return df, check_box
def generate_table(results):
res = defaultdict(list)
for i, m in enumerate(results):
item = results[m]
for k in META_FIELDS:
if k == 'Param (B)':
param = item['Parameters']
res[k].append(float(param.replace('B', '')) if param != '' else None)
elif k == 'Method':
name, url = item['Method']
res[k].append(f'{name}')
else:
s = item[k].replace('\n', '
')
s = s.replace(' & ', '
')
res[k].append(s)
for d in ['MMBench_TEST_V11', 'MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'CCBench', 'MMBench_TEST', 'MMBench_TEST_EN', 'MMBench_TEST_CN']:
key_name = 'Overall' if d != 'OCRBench' else 'Final Score'
# Every Model should have MMBench_V11 results
if d == 'MMBench_TEST_V11':
if 'MMBench_TEST_EN_V11' in item and 'MMBench_TEST_CN_V11' in item:
val = item['MMBench_TEST_EN_V11'][key_name] + item['MMBench_TEST_CN_V11'][key_name]
val = val / 2
val = float(f'{val:.1f}')
res[d].append(val)
else:
res[d].append(None)
elif d == 'MMBench_TEST':
if 'MMBench_TEST_EN' in item and 'MMBench_TEST_CN' in item:
val = float(item['MMBench_TEST_EN'][key_name]) + float(item['MMBench_TEST_CN'][key_name])
val = val / 2
val = float(f'{val:.1f}')
res[d].append(val)
else:
res[d].append(None)
elif d in item:
val = float(item[d][key_name])
val = float(f'{val:.1f}')
res[d].append(val)
else:
res[d].append(None)
df = pd.DataFrame(res)
df_list = []
for k in [
'MMBench_TEST_V11', 'MMBench_TEST',
'MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11',
'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench'
]:
if len(df) == 0:
break
valid, missing = df[~pd.isna(df[k])], df[pd.isna(df[k])]
valid = valid.sort_values(k)
valid = valid.iloc[::-1]
df_list.append(valid)
df = missing
df = pd.concat(df_list)
return df