Restart space every 6 hours as workaround, refactor garbage remover, add apscheduler as new deps

#19
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -15,6 +15,8 @@ from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
15
  from mergekit.config import MergeConfiguration
16
 
17
  from clean_community_org import garbage_collect_empty_models
 
 
18
 
19
  has_gpu = torch.cuda.is_available()
20
 
@@ -186,10 +188,29 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
186
  )
187
  yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  with gr.Blocks() as demo:
191
  gr.Markdown(MARKDOWN_DESCRIPTION)
192
-
 
193
  with gr.Row():
194
  filename = gr.Textbox(visible=False, label="filename")
195
  config = gr.Code(language="yaml", lines=10, label="config.yaml")
@@ -221,18 +242,5 @@ with gr.Blocks() as demo:
221
  button.click(fn=merge, inputs=[config, token, repo_name], outputs=[logs])
222
 
223
 
224
- # Run garbage collection every hour to keep the community org clean.
225
- # Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
226
- def _garbage_collect_every_hour():
227
- while True:
228
- try:
229
- garbage_collect_empty_models(token=COMMUNITY_HF_TOKEN)
230
- except Exception as e:
231
- print("Error running garbage collection", e)
232
- time.sleep(3600)
233
-
234
-
235
- pool = ThreadPoolExecutor()
236
- pool.submit(_garbage_collect_every_hour)
237
 
238
  demo.queue(default_concurrency_limit=1).launch()
 
15
  from mergekit.config import MergeConfiguration
16
 
17
  from clean_community_org import garbage_collect_empty_models
18
+ from apscheduler.schedulers.background import BackgroundScheduler
19
+ from datetime import datetime, timezone
20
 
21
  has_gpu = torch.cuda.is_available()
22
 
 
188
  )
189
  yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
190
 
191
+ # This is workaround. As the space always getting stuck.
192
+ def _restart_space():
193
+ huggingface_hub.HfApi().restart_space(repo_id="arcee-ai/mergekit-gui", token=COMMUNITY_HF_TOKEN, factory_reboot=False)
194
+ # Run garbage collection every hour to keep the community org clean.
195
+ # Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
196
+ def _garbage_remover():
197
+ try:
198
+ garbage_collect_empty_models(token=COMMUNITY_HF_TOKEN)
199
+ except Exception as e:
200
+ print("Error running garbage collection", e)
201
+
202
+ scheduler = BackgroundScheduler()
203
+ restart_space_job = scheduler.add_job(_restart_space, "interval", seconds=21600)
204
+ garbage_remover_job = scheduler.add_job(_garbage_remover, "interval", seconds=3600)
205
+ scheduler.start()
206
+ next_run_time_utc = restart_space_job.next_run_time.astimezone(timezone.utc)
207
+
208
+ NEXT_RESTART = f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC)"
209
 
210
  with gr.Blocks() as demo:
211
  gr.Markdown(MARKDOWN_DESCRIPTION)
212
+ gr.Markdown(NEXT_RESTART)
213
+
214
  with gr.Row():
215
  filename = gr.Textbox(visible=False, label="filename")
216
  config = gr.Code(language="yaml", lines=10, label="config.yaml")
 
242
  button.click(fn=merge, inputs=[config, token, repo_name], outputs=[logs])
243
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  demo.queue(default_concurrency_limit=1).launch()