Spaces:
Running
Running
File size: 3,134 Bytes
d317f64 d6ca95d d317f64 d0e8be9 d317f64 d3db3e5 d317f64 d0e8be9 d317f64 d0e8be9 d317f64 381feac d317f64 d0e8be9 d317f64 4ade002 d317f64 455e9dc d317f64 f212dc5 d317f64 d0e8be9 d3db3e5 d317f64 d3db3e5 d317f64 d0e8be9 e348563 7555fc7 d6ca95d 7555fc7 d3db3e5 d6ca95d 103ed5f e348563 d0e8be9 d6ca95d 4ade002 4dd39c5 c30b150 3dc1e8c 55fc7f4 3dc1e8c d6ca95d 3dc1e8c d6ca95d d317f64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import json
import logging
import os
import time
import pandas as pd
from huggingface_hub import snapshot_download
from src.envs import DATA_PATH, HF_TOKEN_PRIVATE
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
def time_diff_wrapper(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
diff = end_time - start_time
logging.info("Time taken for %s: %s seconds", func.__name__, diff)
return result
return wrapper
@time_diff_wrapper
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
"""Download dataset with exponential backoff retries."""
os.makedirs(local_dir, exist_ok=True)
attempt = 0
while attempt < max_attempts:
try:
logging.info("Downloading %s to %s", repo_id, local_dir)
snapshot_download(
repo_id=repo_id,
local_dir=local_dir,
cache_dir='./tmp',
repo_type=repo_type,
tqdm_class=None,
token=HF_TOKEN_PRIVATE,
etag_timeout=30,
max_workers=8,
local_dir_use_symlinks=False
)
logging.info("Download successful")
return
except Exception as e:
wait_time = backoff_factor**attempt
logging.error("Error downloading %s: %s, retrying in %ss", repo_id, e, wait_time)
time.sleep(wait_time)
attempt += 1
logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
def download_openbench():
# download prev autogenerated leaderboard files
download_dataset("Vikhrmodels/s-shlepa-metainfo", DATA_PATH)
# download answers of different models that we trust
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
def build_leadearboard_df():
# Retrieve the leaderboard DataFrame
with open(f"{os.path.abspath(DATA_PATH)}/leaderboard.json", "r", encoding="utf-8") as eval_file:
f=json.load(eval_file)
print(f)
df = pd.DataFrame.from_records(f)
if 'mmluproru' in list(df.columns):
df['mmluproru'] = df['mmluproru'].fillna(0)
else:
df['mmluproru'] = 0
leaderboard_df = df[['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1).values
# print(leaderboard_df.columns)
if len(leaderboard_df)>3:
leaderboard_df = leaderboard_df[leaderboard_df['mmluproru']!=0]
logging.info("Leaderboard DataFrame shape:", leaderboard_df)
leaderboard_df.sort_values(by='avg',ascending=False,inplace=True,axis=0)
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
# print(numeric_cols)
leaderboard_df[numeric_cols] = leaderboard_df[numeric_cols].round(3)
return leaderboard_df.copy()
|