Spaces:
Running
on
Zero
Running
on
Zero
from ola_vlm.eval.mmstar.smp import * | |
from copy import deepcopy | |
def MMStar_eval(eval_file): | |
MMStar_score_l2 = { | |
'coarse perception': { | |
'image scene and topic': 0, | |
'image style & quality': 0, | |
'image emotion': 0 | |
}, | |
'fine-grained perception': { | |
'object counting': 0, | |
'recognition': 0, | |
'localization': 0 | |
}, | |
'instance reasoning': { | |
'single-instance reasoning': 0, | |
'cross-instance attribute reasoning': 0, | |
'cross-instance relation reasoning': 0 | |
}, | |
'logical reasoning': { | |
'code & sequence reasoning': 0, | |
'diagram reasoning': 0, | |
'common reasoning': 0 | |
}, | |
'science & technology': { | |
'biology & chemistry & physics': 0, | |
'electronics & energy & mechanical eng.': 0, | |
'geography & earth science & agriculture': 0 | |
}, | |
'math': { | |
'geometry': 0, | |
'numeric commonsense and calculation': 0, | |
'statistical reasoning': 0 | |
}, | |
} | |
MMStar_counter = deepcopy(MMStar_score_l2) | |
logger = get_logger('Evaluation') | |
data = load(eval_file) | |
lt = len(data) | |
lines = [data[i] for i in range(lt)] | |
for i in tqdm(range(len(lines))): | |
line = lines[i] | |
predict = str(line['prediction']) | |
answers = str(line['answer']) | |
category = str(line['category']) | |
l2_category = str(line['l2_category']) | |
MMStar_counter[category][l2_category] += 1 | |
answer = answers.lower().strip().replace('\n', ' ') | |
predict = predict.lower().strip().replace('\n', ' ') | |
try: | |
if answer == predict[0]: | |
MMStar_score_l2[category][l2_category] += 1 | |
elif predict[0] == '(' and answer == predict[1]: | |
MMStar_score_l2[category][l2_category] += 1 | |
elif predict[0:7] == 'option ' and answer == predict[7]: | |
MMStar_score_l2[category][l2_category] += 1 | |
elif predict[0:14] == 'the answer is ' and answer == predict[14]: | |
MMStar_score_l2[category][l2_category] += 1 | |
except Exception as e: | |
pass | |
MMStar_score = {} | |
MMStar_score['final score'] = 0 | |
for k, v in MMStar_score_l2.items(): | |
MMStar_score[k] = 0 | |
for l2_k, l2_v in v.items(): | |
MMStar_score[f'{k}({l2_k})'] = float(l2_v) / \ | |
float(MMStar_counter[k][l2_k]) | |
MMStar_score[k] += l2_v | |
MMStar_score['final score'] += MMStar_score[k] | |
MMStar_score[k] = float(MMStar_score[k]) / 250.0 | |
MMStar_score['final score'] = float(MMStar_score['final score']) / 1500.0 | |
score_pth = eval_file.replace('.jsonl', '_score.json') | |
dump(MMStar_score, score_pth) | |
logger.info( | |
f'MMStar_eval successfully finished evaluating {eval_file}, results saved in {score_pth}') | |
logger.info('Score: ') | |
for key, value in MMStar_score.items(): | |
logger.info('{}:{}'.format(key, value)) | |
return MMStar_score | |