alozowski HF staff commited on
Commit
09f61d7
1 Parent(s): 051fb0a

cache-management (#960)

Browse files

- conditionally refresh leaderboard during space restart (cdaca77188f584fc1b5fa9ce1905b6a42867eb01)

Files changed (1) hide show
  1. app.py +34 -29
app.py CHANGED
@@ -60,8 +60,14 @@ NEW_DATA_ON_LEADERBOARD = True
60
  LEADERBOARD_DF = None
61
 
62
  def restart_space():
 
63
  try:
64
- logging.info(f"Attempting to restart space with repo ID: {REPO_ID}")
 
 
 
 
 
65
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
66
  logging.info("Space restarted successfully.")
67
  except Exception as e:
@@ -107,23 +113,31 @@ def get_latest_data_leaderboard(leaderboard_initial_df=None):
107
  global NEW_DATA_ON_LEADERBOARD
108
  global LEADERBOARD_DF
109
  if NEW_DATA_ON_LEADERBOARD:
110
- print("Leaderboard updated at reload!")
111
- leaderboard_dataset = datasets.load_dataset(
112
- AGGREGATED_REPO,
113
- "default",
114
- split="train",
115
- cache_dir=None, # Disable cache directory usage
116
- download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
117
- verification_mode="no_checks"
118
- )
119
- LEADERBOARD_DF = get_leaderboard_df(
120
- leaderboard_dataset=leaderboard_dataset,
121
- cols=COLS,
122
- benchmark_cols=BENCHMARK_COLS,
123
- )
 
 
 
 
 
 
 
124
  NEW_DATA_ON_LEADERBOARD = False
125
  else:
126
  LEADERBOARD_DF = leaderboard_initial_df
 
127
  return LEADERBOARD_DF
128
 
129
 
@@ -450,25 +464,16 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
450
  # Add webhooks
451
  @webhooks_server.add_webhook
452
  def update_leaderboard(payload: WebhookPayload) -> None:
453
- """Redownloads the leaderboard dataset each time it updates."""
454
  if payload.repo.type == "dataset" and payload.event.action == "update":
455
  global NEW_DATA_ON_LEADERBOARD
456
- if NEW_DATA_ON_LEADERBOARD:
457
- logging.info("Leaderboard data is already marked for update, skipping...")
458
- return
459
  logging.info("New data detected, downloading updated leaderboard dataset.")
 
 
460
  NEW_DATA_ON_LEADERBOARD = True
461
 
462
- # Download the latest version of the dataset
463
- datasets.load_dataset(
464
- AGGREGATED_REPO,
465
- "default",
466
- split="train",
467
- cache_dir=HF_HOME,
468
- download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
469
- verification_mode="no_checks"
470
- )
471
- logging.info("Leaderboard dataset successfully downloaded.")
472
 
473
  # The below code is not used at the moment, as we can manage the queue file locally
474
  LAST_UPDATE_QUEUE = datetime.datetime.now()
 
60
  LEADERBOARD_DF = None
61
 
62
  def restart_space():
63
+ logging.info(f"Restarting space with repo ID: {REPO_ID}")
64
  try:
65
+ # Check if new data is pending and download if necessary
66
+ if NEW_DATA_ON_LEADERBOARD:
67
+ logging.info("Fetching latest leaderboard data before restart.")
68
+ get_latest_data_leaderboard()
69
+
70
+ # Now restart the space
71
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
72
  logging.info("Space restarted successfully.")
73
  except Exception as e:
 
113
  global NEW_DATA_ON_LEADERBOARD
114
  global LEADERBOARD_DF
115
  if NEW_DATA_ON_LEADERBOARD:
116
+ logging.info("Leaderboard updated at reload!")
117
+ try:
118
+ leaderboard_dataset = datasets.load_dataset(
119
+ AGGREGATED_REPO,
120
+ "default",
121
+ split="train",
122
+ cache_dir=HF_HOME,
123
+ download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
124
+ verification_mode="no_checks"
125
+ )
126
+ LEADERBOARD_DF = get_leaderboard_df(
127
+ leaderboard_dataset=leaderboard_dataset,
128
+ cols=COLS,
129
+ benchmark_cols=BENCHMARK_COLS,
130
+ )
131
+ logging.info("Leaderboard dataset successfully downloaded.")
132
+ except Exception as e:
133
+ logging.error(f"Failed to download leaderboard dataset: {e}")
134
+ return
135
+
136
+ # Reset the flag after successful download
137
  NEW_DATA_ON_LEADERBOARD = False
138
  else:
139
  LEADERBOARD_DF = leaderboard_initial_df
140
+ logging.info("Using cached leaderboard dataset.")
141
  return LEADERBOARD_DF
142
 
143
 
 
464
  # Add webhooks
465
  @webhooks_server.add_webhook
466
  def update_leaderboard(payload: WebhookPayload) -> None:
467
+ """Redownloads the leaderboard dataset each time it updates"""
468
  if payload.repo.type == "dataset" and payload.event.action == "update":
469
  global NEW_DATA_ON_LEADERBOARD
 
 
 
470
  logging.info("New data detected, downloading updated leaderboard dataset.")
471
+
472
+ # Mark the flag for new data
473
  NEW_DATA_ON_LEADERBOARD = True
474
 
475
+ # Now actually download the latest data immediately
476
+ get_latest_data_leaderboard()
 
 
 
 
 
 
 
 
477
 
478
  # The below code is not used at the moment, as we can manage the queue file locally
479
  LAST_UPDATE_QUEUE = datetime.datetime.now()