sherzod-hakimov commited on
Commit
18d5ac3
1 Parent(s): 69c36b6
Files changed (3) hide show
  1. app.py +2 -1
  2. requirements.txt +1 -1
  3. src/leaderboard_utils.py +27 -16
app.py CHANGED
@@ -8,7 +8,7 @@ from src.plot_utils import split_models, compare_plots
8
  dataframe_height = 800 # Height of the table in pixels
9
  # Get CSV data
10
  global primary_leaderboard_df, version_dfs, version_names
11
- primary_leaderboard_df, version_dfs, version_names = get_github_data()
12
 
13
  global prev_df
14
  prev_df = version_dfs[0]
@@ -48,6 +48,7 @@ with main_app:
48
  )
49
 
50
  gr.HTML(CLEMSCORE_TEXT)
 
51
 
52
  # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
53
  dummy_leaderboard_table = gr.Dataframe(
 
8
  dataframe_height = 800 # Height of the table in pixels
9
  # Get CSV data
10
  global primary_leaderboard_df, version_dfs, version_names
11
+ primary_leaderboard_df, version_dfs, version_names, date = get_github_data()
12
 
13
  global prev_df
14
  prev_df = version_dfs[0]
 
48
  )
49
 
50
  gr.HTML(CLEMSCORE_TEXT)
51
+ gr.HTML(f"Last updated - {date}")
52
 
53
  # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
54
  dummy_leaderboard_table = gr.Dataframe(
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- gradio==3.43.2
2
  pandas==2.0.0
3
  plotly==5.18.0
 
1
+ gradio==4.36.1
2
  pandas==2.0.0
3
  plotly==5.18.0
src/leaderboard_utils.py CHANGED
@@ -2,17 +2,19 @@ import os
2
  import pandas as pd
3
  import requests, json
4
  from io import StringIO
 
 
5
 
6
  def get_github_data():
7
- '''
8
  Get data from csv files on Github
9
  Args:
10
- None
11
- Returns:
12
- latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
13
  all_dfs: list of dataframes for previous versions + latest version including columns for all games
14
  all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
15
- '''
16
  uname = "clembench"
17
  repo = "clembench-runs"
18
  json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
@@ -27,11 +29,18 @@ def get_github_data():
27
  csv_path = ver['result_file'].split('/')[1:]
28
  csv_path = '/'.join(csv_path)
29
 
30
- #Sort by latest version
31
  float_content = [float(s[1:]) for s in version_names]
32
  float_content.sort(reverse=True)
33
  version_names = ['v'+str(s) for s in float_content]
34
 
 
 
 
 
 
 
 
35
  DFS = []
36
  for version in version_names:
37
  result_url = csv_url+ version + '/' + csv_path
@@ -44,7 +53,7 @@ def get_github_data():
44
  else:
45
  print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
46
 
47
- # Only keep relavant columns for the main leaderboard
48
  latest_df_dummy = DFS[0]
49
  all_columns = list(latest_df_dummy.columns)
50
  keep_columns = all_columns[0:4]
@@ -56,22 +65,23 @@ def get_github_data():
56
  for df, name in zip(DFS, version_names):
57
  all_dfs.append(df)
58
  all_vnames.append(name)
59
- return latest_df, all_dfs, all_vnames
60
 
61
  else:
62
  print(f"Failed to read JSON file: Status Code : {resp.status_code}")
63
 
 
64
  def process_df(df: pd.DataFrame) -> pd.DataFrame:
65
- '''
66
- Process dataframe
67
- - Remove repition in model names
68
  - Convert datatypes to sort by "float" instead of "str" for sorting
69
  - Update column names
70
  Args:
71
  df: Unprocessed Dataframe (after using update_cols)
72
  Returns:
73
  df: Processed Dataframe
74
- '''
75
 
76
  # Change column type to float from str
77
  list_column_names = list(df.columns)
@@ -107,15 +117,16 @@ def process_df(df: pd.DataFrame) -> pd.DataFrame:
107
  df = df.rename(columns=map_cols)
108
  return df
109
 
 
110
  def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
111
- '''
112
  Filter the dataframe based on the search query
113
  Args:
114
  df: Unfiltered dataframe
115
  query: a string of queries separated by ";"
116
  Return:
117
- filtered_df: Dataframe containing searched queries in the 'Model' column
118
- '''
119
  queries = query.split(';')
120
  list_cols = list(df.columns)
121
  df_len = len(df)
@@ -134,4 +145,4 @@ def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
134
  if query == "":
135
  return df
136
 
137
- return filtered_df
 
2
  import pandas as pd
3
  import requests, json
4
  from io import StringIO
5
+ from datetime import datetime
6
+
7
 
8
  def get_github_data():
9
+ """
10
  Get data from csv files on Github
11
  Args:
12
+ None
13
+ Returns:
14
+ latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
15
  all_dfs: list of dataframes for previous versions + latest version including columns for all games
16
  all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
17
+ """
18
  uname = "clembench"
19
  repo = "clembench-runs"
20
  json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
 
29
  csv_path = ver['result_file'].split('/')[1:]
30
  csv_path = '/'.join(csv_path)
31
 
32
+ # Sort by latest version
33
  float_content = [float(s[1:]) for s in version_names]
34
  float_content.sort(reverse=True)
35
  version_names = ['v'+str(s) for s in float_content]
36
 
37
+ # Get date of latest version
38
+ for data in versions:
39
+ if data['version'] == version_names[0]:
40
+ date = data['date'] # Should be in YYYY/MM/DD format
41
+ date_obj = datetime.strptime(date, "%Y/%m/%d")
42
+ date = date_obj.strftime("%d %b %Y")
43
+
44
  DFS = []
45
  for version in version_names:
46
  result_url = csv_url+ version + '/' + csv_path
 
53
  else:
54
  print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
55
 
56
+ # Only keep relevant columns for the main leaderboard
57
  latest_df_dummy = DFS[0]
58
  all_columns = list(latest_df_dummy.columns)
59
  keep_columns = all_columns[0:4]
 
65
  for df, name in zip(DFS, version_names):
66
  all_dfs.append(df)
67
  all_vnames.append(name)
68
+ return latest_df, all_dfs, all_vnames, date
69
 
70
  else:
71
  print(f"Failed to read JSON file: Status Code : {resp.status_code}")
72
 
73
+
74
  def process_df(df: pd.DataFrame) -> pd.DataFrame:
75
+ """
76
+ Process dataframe
77
+ - Remove repition in model names
78
  - Convert datatypes to sort by "float" instead of "str" for sorting
79
  - Update column names
80
  Args:
81
  df: Unprocessed Dataframe (after using update_cols)
82
  Returns:
83
  df: Processed Dataframe
84
+ """
85
 
86
  # Change column type to float from str
87
  list_column_names = list(df.columns)
 
117
  df = df.rename(columns=map_cols)
118
  return df
119
 
120
+
121
  def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
122
+ """
123
  Filter the dataframe based on the search query
124
  Args:
125
  df: Unfiltered dataframe
126
  query: a string of queries separated by ";"
127
  Return:
128
+ filtered_df: Dataframe containing searched queries in the 'Model' column
129
+ """
130
  queries = query.split(';')
131
  list_cols = list(df.columns)
132
  df_len = len(df)
 
145
  if query == "":
146
  return df
147
 
148
+ return filtered_df