Spaces:

AIM-Harvard
/

rabbits-leaderboard

Runtime error

magilogi

new data

cbf54c8 6 months ago

3.41 kB

	import os
	import json
	import pandas as pd
	import sys

	# Add the path to api-results.py
	sys.path.append(os.path.abspath('data/api-results'))

	# Now import the API results
	from api_results import gpt4, gpt4o, gpt35turbo, claude_opus, gemini_15_pro, gemini_pro_1, gemini_15_flash
	from models_info import model_info

	directory = 'data/raw-eval-outputs'
	data = []

	# Function to create a clickable hyperlink for the model name
	def model_hyperlink(link, model_name):
	return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'

	# Function to apply the hyperlink creation function to the DataFrame
	def make_clickable_names(df):
	df["Model"] = df.apply(
	lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
	)
	return df

	# Iterate over all the files in the directory
	for filename in os.listdir(directory):
	if filename.endswith(".json"):
	filepath = os.path.join(directory, filename)
	with open(filepath, 'r') as f:
	json_data = json.load(f)
	model_name = filename.replace("_results.json", "")

	# Extract the accuracy values
	results = json_data['results']
	row = {'Model': model_name}
	for key, value in results.items():
	row[key] = round(value['acc,none'] * 100, 2)

	# Add the tuning type and link to the row
	row['T'] = model_info[model_name]['tuning']
	row['Link'] = model_info[model_name]['link']

	data.append(row)

	# Prepare the API results for integration
	api_models = {
	'GPT-4': gpt4,
	'GPT-4o': gpt4o,
	'GPT-3.5 Turbo': gpt35turbo,
	'Claude Opus': claude_opus,
	'Gemini 1.5 Pro': gemini_15_pro,
	'Gemini Pro 1': gemini_pro_1,
	'Gemini 1.5 Flash': gemini_15_flash
	}

	for model_name, results in api_models.items():
	row = {
	'Model': model_name,
	'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
	'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
	'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
	'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
	'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
	'T': model_info[model_name]['tuning'],
	'Link': model_info[model_name]['link']
	}
	data.append(row)

	# Create DataFrame from the collected data
	df = pd.DataFrame(data)
	df = make_clickable_names(df)
	df.drop(columns=["Link"], inplace=True)

	# Calculate differences between specific evaluation metrics
	df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
	df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)

	# Reorder columns
	cols = [
	"T",
	"Model",
	"b4bqa",
	"b4b",
	"medmcqa_g2b",
	"medmcqa_orig_filtered",
	"medmcqa_diff",
	"medqa_4options_g2b",
	"medqa_4options_orig_filtered",
	"medqa_diff"
	] + [col for col in df.columns if col not in [
	"T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff"
	]]
	df = df[cols]

	# Save DataFrame to CSV
	output_csv = 'data/csv/models_data.csv'
	df.to_csv(output_csv, index=False)

	print(f"DataFrame saved to {output_csv}")