Jae-Won Chung commited on
Commit
76bf85e
1 Parent(s): 4e4fca8

Leaderboard tweaks

Browse files
Files changed (2) hide show
  1. app.py +49 -20
  2. data/diffusion/image-to-video/models.json +1 -1
app.py CHANGED
@@ -229,6 +229,13 @@ class LLMChatTableManager(LLMTableManager):
229
  def get_detail_text(self, detail_mode: bool) -> str:
230
  if detail_mode:
231
  text = """
 
 
 
 
 
 
 
232
  Columns
233
  - **Model**: The name of the model.
234
  - **Params (B)**: Number of parameters in the model.
@@ -242,10 +249,6 @@ class LLMChatTableManager(LLMTableManager):
242
  - **Avg BS**: Average batch size of the serving engine over time.
243
  - **Max BS**: Maximum batch size configuration of the serving engine.
244
 
245
- **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
246
- An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
247
- You can tweak the TPOT slider to adjust the target average TPOT for the models.
248
-
249
  For more detailed information, please take a look at the **About** tab.
250
  """
251
  else:
@@ -290,6 +293,13 @@ class LLMCodeTableManager(LLMTableManager):
290
  def get_detail_text(self, detail_mode: bool) -> str:
291
  if detail_mode:
292
  text = """
 
 
 
 
 
 
 
293
  Columns
294
  - **Model**: The name of the model.
295
  - **Params (B)**: Number of parameters in the model.
@@ -303,10 +313,6 @@ class LLMCodeTableManager(LLMTableManager):
303
  - **Avg BS**: Average batch size of the serving engine over time.
304
  - **Max BS**: Maximum batch size configuration of the serving engine.
305
 
306
- **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
307
- An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
308
- You can tweak the TPOT slider to adjust the target average TPOT for the models.
309
-
310
  For more detailed information, please take a look at the **About** tab.
311
  """
312
  else:
@@ -350,6 +356,13 @@ class VLMChatTableManager(LLMTableManager):
350
  def get_detail_text(self, detail_mode: bool) -> str:
351
  if detail_mode:
352
  text = """
 
 
 
 
 
 
 
353
  Columns
354
  - **Model**: The name of the model.
355
  - **Params (B)**: Number of parameters in the model.
@@ -363,10 +376,6 @@ class VLMChatTableManager(LLMTableManager):
363
  - **Avg BS**: Average batch size of the serving engine over time.
364
  - **Max BS**: Maximum batch size configuration of the serving engine.
365
 
366
- **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
367
- An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
368
- You can tweak the TPOT slider to adjust the target average TPOT for the models.
369
-
370
  For more detailed information, please take a look at the **About** tab.
371
  """
372
  else:
@@ -499,7 +508,7 @@ class DiffusionTableManager(TableManager):
499
  )
500
 
501
  if not detail_mode:
502
- core_columns = ["Model", "Denoising params", "GPU", "Denoising steps", "Resolution", "Frames", self.energy_col]
503
  readable_name_mapping = {
504
  "Denoising params": "Denoising parameters (Billions)",
505
  "GPU": "GPU model",
@@ -521,7 +530,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
521
 
522
  def get_intro_text(self) -> str:
523
  text = """
524
- <h2>Diffusion text-to-image generation</h2></br>
 
 
525
 
526
  <p style="font-size: 16px">
527
  Diffusion models generate images that align with input text prompts.
@@ -537,6 +548,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
537
  def get_detail_text(self, detail_mode: bool) -> str:
538
  if detail_mode:
539
  text = """
 
 
 
540
  Columns
541
  - **Model**: The name of the model.
542
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -557,6 +571,7 @@ class DiffusionT2ITableManager(DiffusionTableManager):
557
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
558
  - **GPU model**: Name of the GPU model used for benchmarking.
559
  - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
 
560
 
561
  Checking "Show more technical details" above the table will reveal more detailed columns.
562
  Also, for more detailed information, please take a look at the **About** tab.
@@ -575,7 +590,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
575
 
576
  def get_intro_text(self) -> str:
577
  text = """
578
- <h2>Diffusion text-to-video generation</h2></br>
 
 
579
 
580
  <p style="font-size: 16px">
581
  Diffusion models generate videos that align with input text prompts.
@@ -591,6 +608,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
591
  def get_detail_text(self, detail_mode: bool) -> str:
592
  if detail_mode:
593
  text = """
 
 
 
594
  Columns
595
  - **Model**: The name of the model.
596
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -612,6 +632,8 @@ class DiffusionT2VTableManager(DiffusionTableManager):
612
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
613
  - **GPU model**: Name of the GPU model used for benchmarking.
614
  - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
 
 
615
 
616
  Checking "Show more technical details" above the table will reveal more detailed columns.
617
  Also, for more detailed information, please take a look at the **About** tab.
@@ -630,7 +652,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
630
 
631
  def get_intro_text(self) -> str:
632
  text = """
633
- <h2>Diffusion image-to-video generation</h2></br>
 
 
634
 
635
  <p style="font-size: 16px">
636
  Diffusion models generate videos given an input image (and sometimes alongside with text).
@@ -646,6 +670,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
646
  def get_detail_text(self, detail_mode: bool) -> str:
647
  if detail_mode:
648
  text = """
 
 
 
649
  Columns
650
  - **Model**: The name of the model.
651
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -667,6 +694,8 @@ class DiffusionI2VTableManager(DiffusionTableManager):
667
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
668
  - **GPU model**: Name of the GPU model used for benchmarking.
669
  - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
 
 
670
 
671
  Checking "Show more technical details" above the table will reveal more detailed columns.
672
  Also, for more detailed information, please take a look at the **About** tab.
@@ -674,7 +703,7 @@ class DiffusionI2VTableManager(DiffusionTableManager):
674
  return text
675
 
676
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
677
- return {"Batch latency (s)": (0.0, 120.0, 1.0, 45.0)}
678
 
679
 
680
  class LegacyTableManager:
@@ -718,7 +747,7 @@ class LegacyTableManager:
718
  self.full_df = df
719
 
720
  # Default view of the table is to only show the first options.
721
- self.set_filter_get_df(detail_mode=False)
722
 
723
  def _read_tables(self, data_dir: str) -> pd.DataFrame:
724
  """Read tables."""
@@ -777,7 +806,7 @@ class LegacyTableManager:
777
  gr.Dropdown.update(choices=["None", *columns]),
778
  ]
779
 
780
- def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
781
  """Set the current set of filters and return the filtered DataFrame."""
782
  # If the filter is empty, we default to the first choice for each key.
783
  if not filters:
@@ -1027,7 +1056,7 @@ def consumed_more_energy_message(energy_a, energy_b):
1027
  # Colosseum event handlers
1028
  def on_load():
1029
  """Intialize the dataframe, shuffle the model preference dropdown choices."""
1030
- dataframe = global_ltbm.set_filter_get_df(detail_mode=False)
1031
  dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
1032
  return dataframe, *dataframes
1033
 
 
229
  def get_detail_text(self, detail_mode: bool) -> str:
230
  if detail_mode:
231
  text = """
232
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
233
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
234
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
235
+
236
+ Each row corresponds to one model, given a constraint on the maximum average TPOT.
237
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
238
+
239
  Columns
240
  - **Model**: The name of the model.
241
  - **Params (B)**: Number of parameters in the model.
 
249
  - **Avg BS**: Average batch size of the serving engine over time.
250
  - **Max BS**: Maximum batch size configuration of the serving engine.
251
 
 
 
 
 
252
  For more detailed information, please take a look at the **About** tab.
253
  """
254
  else:
 
293
  def get_detail_text(self, detail_mode: bool) -> str:
294
  if detail_mode:
295
  text = """
296
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
297
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
298
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
299
+
300
+ Each row corresponds to one model, given a constraint on the maximum average TPOT.
301
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
302
+
303
  Columns
304
  - **Model**: The name of the model.
305
  - **Params (B)**: Number of parameters in the model.
 
313
  - **Avg BS**: Average batch size of the serving engine over time.
314
  - **Max BS**: Maximum batch size configuration of the serving engine.
315
 
 
 
 
 
316
  For more detailed information, please take a look at the **About** tab.
317
  """
318
  else:
 
356
  def get_detail_text(self, detail_mode: bool) -> str:
357
  if detail_mode:
358
  text = """
359
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
360
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
361
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
362
+
363
+ Each row corresponds to one model, given a constraint on the maximum average TPOT.
364
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
365
+
366
  Columns
367
  - **Model**: The name of the model.
368
  - **Params (B)**: Number of parameters in the model.
 
376
  - **Avg BS**: Average batch size of the serving engine over time.
377
  - **Max BS**: Maximum batch size configuration of the serving engine.
378
 
 
 
 
 
379
  For more detailed information, please take a look at the **About** tab.
380
  """
381
  else:
 
508
  )
509
 
510
  if not detail_mode:
511
+ core_columns = ["Model", "Denoising params", "GPU", "Resolution", "Frames", self.energy_col]
512
  readable_name_mapping = {
513
  "Denoising params": "Denoising parameters (Billions)",
514
  "GPU": "GPU model",
 
530
 
531
  def get_intro_text(self) -> str:
532
  text = """
533
+ <h2>How much energy do GenAI models consume?</h2>
534
+
535
+ <h3>Diffusion text-to-image generation</h3>
536
 
537
  <p style="font-size: 16px">
538
  Diffusion models generate images that align with input text prompts.
 
548
  def get_detail_text(self, detail_mode: bool) -> str:
549
  if detail_mode:
550
  text = """
551
+ Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
552
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per image.
553
+
554
  Columns
555
  - **Model**: The name of the model.
556
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
 
571
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
572
  - **GPU model**: Name of the GPU model used for benchmarking.
573
  - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
574
+ - **Resolution**: Resolution of the generated image.
575
 
576
  Checking "Show more technical details" above the table will reveal more detailed columns.
577
  Also, for more detailed information, please take a look at the **About** tab.
 
590
 
591
  def get_intro_text(self) -> str:
592
  text = """
593
+ <h2>How much energy do GenAI models consume?</h2>
594
+
595
+ <h3>Diffusion text-to-video generation</h3>
596
 
597
  <p style="font-size: 16px">
598
  Diffusion models generate videos that align with input text prompts.
 
608
  def get_detail_text(self, detail_mode: bool) -> str:
609
  if detail_mode:
610
  text = """
611
+ Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
612
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
613
+
614
  Columns
615
  - **Model**: The name of the model.
616
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
 
632
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
633
  - **GPU model**: Name of the GPU model used for benchmarking.
634
  - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
635
+ - **Frames**: Number of frames in the generated video.
636
+ - **Resolution**: Resolution of the generated video.
637
 
638
  Checking "Show more technical details" above the table will reveal more detailed columns.
639
  Also, for more detailed information, please take a look at the **About** tab.
 
652
 
653
  def get_intro_text(self) -> str:
654
  text = """
655
+ <h2>How much energy do GenAI models consume?</h2>
656
+
657
+ <h3>Diffusion image-to-video generation</h3>
658
 
659
  <p style="font-size: 16px">
660
  Diffusion models generate videos given an input image (and sometimes alongside with text).
 
670
  def get_detail_text(self, detail_mode: bool) -> str:
671
  if detail_mode:
672
  text = """
673
+ Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
674
+ If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
675
+
676
  Columns
677
  - **Model**: The name of the model.
678
  - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
 
694
  - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
695
  - **GPU model**: Name of the GPU model used for benchmarking.
696
  - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
697
+ - **Frames**: Number of frames in the generated video.
698
+ - **Resolution**: Resolution of the generated video.
699
 
700
  Checking "Show more technical details" above the table will reveal more detailed columns.
701
  Also, for more detailed information, please take a look at the **About** tab.
 
703
  return text
704
 
705
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
706
+ return {"Batch latency (s)": (0.0, 120.0, 1.0, 60.0)}
707
 
708
 
709
  class LegacyTableManager:
 
747
  self.full_df = df
748
 
749
  # Default view of the table is to only show the first options.
750
+ self.set_filter_get_df()
751
 
752
  def _read_tables(self, data_dir: str) -> pd.DataFrame:
753
  """Read tables."""
 
806
  gr.Dropdown.update(choices=["None", *columns]),
807
  ]
808
 
809
+ def set_filter_get_df(self, *filters) -> pd.DataFrame:
810
  """Set the current set of filters and return the filtered DataFrame."""
811
  # If the filter is empty, we default to the first choice for each key.
812
  if not filters:
 
1056
  # Colosseum event handlers
1057
  def on_load():
1058
  """Intialize the dataframe, shuffle the model preference dropdown choices."""
1059
+ dataframe = global_ltbm.set_filter_get_df()
1060
  dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
1061
  return dataframe, *dataframes
1062
 
data/diffusion/image-to-video/models.json CHANGED
@@ -15,7 +15,7 @@
15
  },
16
  "stabilityai/stable-video-diffusion-img2vid-xt": {
17
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
18
- "nickname": "Stable Video Diffusion xt",
19
  "total_params": 2.3,
20
  "denoising_params": 1.5,
21
  "resolution": "1024x576"
 
15
  },
16
  "stabilityai/stable-video-diffusion-img2vid-xt": {
17
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
18
+ "nickname": "Stable Video Diffusion XT",
19
  "total_params": 2.3,
20
  "denoising_params": 1.5,
21
  "resolution": "1024x576"