diff --git a/.gitignore b/.gitignore index f9a2da9b3dad854e19140b9aa1808f597703fb71..5b7b7d3df859dca258588f880cba840ab2d3ab23 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ build/ # Data files *.log -pegasus/consumed.yaml +figures/ diff --git a/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml index 12c945c5bf31e6cc46da3946cde930efc0dac3bd..fe18d4d0edf9d7dbe23dabbb07c27e06cc89be00 100644 --- a/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml @@ -1,6 +1,6 @@ - command: - - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 8 4 2 1 --power-limits 400 --num-inference-steps 25" + - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50" model: - - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt' - - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14' - - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25' + - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720' + - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576' + - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576' diff --git a/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml index 3602b7c36fdd2b51ce0cdf14554e6ddb27ca1436..724a9edda5e111ecdf1e556dec8e293ca218fb02 100644 --- a/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml +++ b/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml @@ -1,6 +1,6 @@ - command: - - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_700.json --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 25" + - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50" model: - - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt' - - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14' - - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25' + - "--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720" + - "--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576" + - "--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576" diff --git a/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py index 6f894e8714eab9e6442e76b1423372387f73dc2d..066fa498f28d749ec71d2d996037303403d30f81 100644 --- a/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py index 653b92598f67d6323c19b14e278c340796fc37bd..b51442630b719545ea9a0ca670a28121d37b1800 100644 --- a/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py @@ -27,10 +27,10 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_infernece_steps: int - num_frames: int power_limit: int batch_size: int + num_inference_steps: int + num_frames: int num_prompts: int total_runtime: float = 0.0 total_energy: float = 0.0 @@ -80,6 +80,7 @@ def load_text_image_prompts( path: str, batch_size: int, num_batches: int | None = None, + image_resize: tuple[int, int] | None = None, ) -> tuple[int, list[tuple[list[str], list[Image.Image]]]]: """Load the dataset to feed the model and return it as a list of batches of prompts. @@ -93,6 +94,9 @@ def load_text_image_prompts( dataset = json.load(open(path)) assert len(dataset["caption"]) == len(dataset["video_id"]) + dataset["caption"] *= 10 + dataset["video_id"] *= 10 + if num_batches is not None: if len(dataset["caption"]) < num_batches * batch_size: raise ValueError("Not enough data for the requested number of batches.") @@ -103,6 +107,8 @@ def load_text_image_prompts( dataset["first_frame"] = [ load_image(str(image_path / f"{video_id}.jpg")) for video_id in dataset["video_id"] ] + if image_resize is not None: + dataset["first_frame"] = [image.resize(image_resize) for image in dataset["first_frame"]] batched = [ (dataset["caption"][i : i + batch_size], dataset["first_frame"][i : i + batch_size]) @@ -135,8 +141,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" video_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -150,11 +156,16 @@ def benchmark(args: argparse.Namespace) -> None: pynvml.nvmlInit() handle = pynvml.nvmlDeviceGetHandleByIndex(0) gpu_model = pynvml.nvmlDeviceGetName(handle) - pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED) - pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000) + # pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED) + # pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000) pynvml.nvmlShutdown() - num_prompts, batched_prompts = load_text_image_prompts(args.dataset_path, args.batch_size, args.num_batches) + num_prompts, batched_prompts = load_text_image_prompts( + args.dataset_path, + args.batch_size, + args.num_batches, + (args.width, args.height), + ) pipeline = get_pipeline(args.model) @@ -189,7 +200,7 @@ def benchmark(args: argparse.Namespace) -> None: fps_param_name = fps_param_name_candidates[0] torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) # Build common parameter dict for all batches params: dict[str, Any] = dict( @@ -210,15 +221,15 @@ def benchmark(args: argparse.Namespace) -> None: if args.add_text_prompt: params["prompt"] = intermediate.prompts - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) frames = pipeline(**params).frames - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.frames = frames intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") results: list[Result] = [] @@ -255,10 +266,10 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_infernece_steps=args.num_inference_steps, - num_frames=args.num_frames, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + num_frames=args.num_frames, num_prompts=num_prompts, total_runtime=measurements.time, total_energy=measurements.total_energy, @@ -289,8 +300,8 @@ if __name__ == "__main__": parser.add_argument("--num-inference-steps", type=int, default=50, help="The number of denoising steps.") parser.add_argument("--num-frames", type=int, default=1, help="The number of frames to generate.") parser.add_argument("--fps", type=int, default=16, help="Frames per second for micro-conditioning.") - parser.add_argument("--height", type=int, help="Height of the generated video.") - parser.add_argument("--width", type=int, help="Width of the generated video.") + parser.add_argument("--height", type=int, required=True, help="Height of the generated video.") + parser.add_argument("--width", type=int, required=True, help="Width of the generated video.") parser.add_argument("--num-batches", type=int, default=None, help="The number of batches to use from the dataset.") parser.add_argument("--save-every", type=int, default=10, help="Save generations to file every N prompts.") parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.") diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py index 4fee82fd197fa4fb4fa99116d4c1d22be2318ecd..0bf3aeb8846946033e212b03327ece8890b3ea70 100644 --- a/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py @@ -28,44 +28,48 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ - "docker", "run", - "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', - "--cap-add", "SYS_ADMIN", - "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}", - "--rm", - "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", - "-v", f"{os.getcwd()}:/workspace/image-to-video", - "mlenergy/leaderboard:diffusion-i2v", - "--dataset-path", args.dataset_path, - "--result-root", args.result_root, - "--batch-size", batch_size, - "--num-batches", "10", - "--power-limit", power_limit, - "--model", args.model, - "--huggingface-token", hf_token, - "--num-frames", args.num_frames, - "--num-inference-steps", args.num_inference_steps, - ] + (["--add-text-prompt"] if args.add_text_prompt else []), - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + with subprocess.Popen( + args=[ + "docker", "run", + "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', + "--cap-add", "SYS_ADMIN", + "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}", + "--rm", + "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", + "-v", f"{os.getcwd()}:/workspace/image-to-video", + "mlenergy/leaderboard:diffusion-i2v", + "--dataset-path", args.dataset_path, + "--result-root", args.result_root, + "--batch-size", batch_size, + "--num-batches", "8", + "--power-limit", power_limit, + "--model", args.model, + "--huggingface-token", hf_token, + "--num-frames", args.num_frames, + "--num-inference-steps", num_inference_steps, + "--width", str(args.width), + "--height", str(args.height), + ] + (["--add-text-prompt"] if args.add_text_prompt else []), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -77,8 +81,10 @@ if __name__ == "__main__": parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") parser.add_argument("--num-frames", type=str, help="Number of frames to generate") - parser.add_argument("--num-inference-steps", type=str, help="Number of denoising steps") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "30", "40", "50"], help="Number of inference steps to run") parser.add_argument("--add-text-prompt", action="store_true", help="Input text prompt alongside image.") + parser.add_argument("--height", type=int, required=True, help="Height of the generated video.") + parser.add_argument("--width", type=int, required=True, help="Width of the generated video.") parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.") args = parser.parse_args() main(args) diff --git a/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py b/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py index 66476538129b458b3174f936a1c351655e451906..ed808b2b28ce203c17b6ebacd09c906767498eac 100644 --- a/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py +++ b/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py @@ -3,7 +3,7 @@ import json import cv2 -DATASET_PATH = "sharegpt4video_700.json" +DATASET_PATH = "sharegpt4video_100.json" def main() -> None: diff --git a/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml index 7eae3fc38de4c9dac3fef3c8663f2df7204635b4..247f98264d3c65ec5f7ab453384b766a95be822b 100644 --- a/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --num-inference-steps 1 2 4 8 16 25 30 40 50 --power-limits 400" model: - stabilityai/stable-diffusion-2-1 - stabilityai/stable-diffusion-xl-base-1.0 diff --git a/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py index fb4865e75149bbe01526f44fae22ae86771536bc..066fa498f28d749ec71d2d996037303403d30f81 100644 --- a/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" @@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None: nickname=model_name.split("/")[-1].replace("-", " ").title(), total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())), denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]), + resolution="NA", ) assert model_name not in models models[model_name] = model_info diff --git a/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py index f42d60c54fffc8568c31b99280d2dd9d8cdc46e3..b719d6b088847d9c09314240d7cedc8a6ee4ada5 100644 --- a/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py @@ -1,8 +1,10 @@ from __future__ import annotations import os +import time import json import argparse +import multiprocessing as mp from pprint import pprint from pathlib import Path from contextlib import suppress @@ -11,6 +13,7 @@ from dataclasses import dataclass, field, asdict import torch import pynvml import numpy as np +import pandas as pd from PIL import Image from datasets import load_dataset, Dataset from transformers.trainer_utils import set_seed @@ -35,9 +38,9 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_inference_steps: int power_limit: int batch_size: int + num_inference_steps: int num_prompts: int average_clip_score: float = 0.0 total_runtime: float = 0.0 @@ -118,6 +121,28 @@ def load_partiprompts( return len(batched) * batch_size, batched +def power_monitor(csv_path: str, gpu_indices: list[int], chan: mp.SimpleQueue) -> None: + pynvml.nvmlInit() + handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in gpu_indices] + + fields = [ + (pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_GPU), + (pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_MEMORY), + ] + + columns = ["timestamp"] + sum([[f"gpu{i}", f"vram{i}"] for i in gpu_indices], []) + power: list[list] = [] + while chan.empty(): + row = [time.monotonic()] + values = [pynvml.nvmlDeviceGetFieldValues(h, fields) for h in handles] + for value in values: + row.extend((value[0].value.uiVal, value[1].value.uiVal)) + power.append(row) + time.sleep(max(0.0, 0.1 - (time.monotonic() - row[0]))) + + pd.DataFrame(power, columns=columns).to_csv(csv_path, index=False) + + def calculate_clip_score( model: CLIPModel, processor: CLIPProcessor, @@ -183,8 +208,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" image_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -222,27 +247,42 @@ def benchmark(args: argparse.Namespace) -> None: ResultIntermediateBatched(prompts=batch) for batch in batched_prompts ] + pmon = None + pmon_chan = None + if args.monitor_power: + pmon_chan = mp.SimpleQueue() + pmon = mp.get_context("spawn").Process( + target=power_monitor, + args=(f"{benchmark_name}+power.csv", [g.gpu_index for g in zeus_monitor.gpus.gpus], pmon_chan), + ) + pmon.start() + torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) for ind, intermediate in enumerate(intermediates): print(f"Batch {ind + 1}/{len(intermediates)}") - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) images = pipeline( intermediate.prompts, generator=rng, num_inference_steps=args.num_inference_steps, output_type="np", ).images - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.images = images intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") + if pmon is not None and pmon_chan is not None: + pmon_chan.put("stop") + pmon.join(timeout=5.0) + pmon.terminate() + # Scale images to [0, 256] and convert to uint8 for intermediate in intermediates: intermediate.images = (intermediate.images * 255).astype("uint8") @@ -292,9 +332,9 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_inference_steps=args.num_inference_steps, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, num_prompts=num_prompts, average_clip_score=sum(r.clip_score for r in results) / len(results), total_runtime=measurements.time, @@ -326,6 +366,7 @@ if __name__ == "__main__": parser.add_argument("--image-save-every", type=int, default=10, help="Save images to file every N prompts.") parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.") parser.add_argument("--huggingface-token", type=str, help="The HuggingFace token to use.") + parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.") args = parser.parse_args() benchmark(args) diff --git a/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py b/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py index 26deb32d115092779fe96bc4cc968ba3be3ac1e0..a71e245aac763317745303ea6052cf3a50da761b 100644 --- a/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py @@ -28,12 +28,13 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + cmd=[ "docker", "run", "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', "--cap-add", "SYS_ADMIN", @@ -48,22 +49,21 @@ def main(args: argparse.Namespace) -> None: "--power-limit", power_limit, "--model", args.model, "--huggingface-token", hf_token, - "--num-inference-steps", "25", - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + "--num-inference-steps", num_inference_steps, + ] + if args.monitor_power: + cmd.append("--monitor-power") + with subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -74,5 +74,7 @@ if __name__ == "__main__": parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use") parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of inference steps to run") + parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.") args = parser.parse_args() main(args) diff --git a/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml index 32921e5bff8f9298f40019b8952ef0d688a6d2ec..182e9d041cfef87c0350d082fe21588dfcefc1fc 100644 --- a/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400 --num-inference-steps 25 --num-frames 16" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16" model: - ali-vilab/text-to-video-ms-1.7b - guoyww/animatediff-motion-adapter-v1-5-3 diff --git a/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml b/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml index 318690cd6a00096f58e7a23a7630d561639f2777..a7f7520b548bec37f643a0fc63c1c987709ca66d 100644 --- a/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_700.json --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 25 --num-frames 16" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16" model: - ali-vilab/text-to-video-ms-1.7b - guoyww/animatediff-motion-adapter-v1-5-3 diff --git a/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py index fb4865e75149bbe01526f44fae22ae86771536bc..066fa498f28d749ec71d2d996037303403d30f81 100644 --- a/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" @@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None: nickname=model_name.split("/")[-1].replace("-", " ").title(), total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())), denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]), + resolution="NA", ) assert model_name not in models models[model_name] = model_info diff --git a/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py index 7e75928c594307a791beb1168e820316fe8576e7..aeca806c42a368583d11b5d88251a19406a6234e 100644 --- a/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py @@ -32,10 +32,10 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_inference_steps: int - num_frames: int power_limit: int batch_size: int + num_inference_steps: int + num_frames: int num_prompts: int total_runtime: float = 0.0 total_energy: float = 0.0 @@ -119,7 +119,7 @@ def load_text_prompts( Returns: Total number of prompts and a list of batches of prompts. """ - dataset = json.load(open(path))["caption"] + dataset = json.load(open(path))["caption"] * 10 if num_batches is not None: if len(dataset) < num_batches * batch_size: raise ValueError("Dataset is too small for the given number of batches.") @@ -151,8 +151,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" video_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -190,7 +190,7 @@ def benchmark(args: argparse.Namespace) -> None: ] torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) # Build common parameter dict for all batches params: dict[str, Any] = dict( @@ -208,15 +208,15 @@ def benchmark(args: argparse.Namespace) -> None: params["prompt"] = intermediate.prompts - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) frames = pipeline(**params).frames - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.frames = frames intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") results: list[Result] = [] @@ -253,10 +253,10 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_inference_steps=args.num_inference_steps, - num_frames=args.num_frames, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + num_frames=args.num_frames, num_prompts=num_prompts, total_runtime=measurements.time, total_energy=measurements.total_energy, diff --git a/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py b/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py index 781892f6c53cff4380a3eec738c8b55cddf3df61..96685157019080a0ac35509645076aa1e925a1b4 100644 --- a/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py @@ -28,44 +28,46 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ - "docker", "run", - "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', - "--cap-add", "SYS_ADMIN", - "--name", f"leaderboard-t2v-{''.join(args.gpu_ids)}", - "--rm", - "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", - "-v", f"{os.getcwd()}:/workspace/text-to-video", - "mlenergy/leaderboard:diffusion-t2v", - "--result-root", args.result_root, - "--batch-size", batch_size, - "--num-batches", "10", - "--power-limit", power_limit, - "--model", args.model, - "--dataset-path", args.dataset_path, - "--huggingface-token", hf_token, - "--num-inference-steps", args.num_inference_steps, - "--num-frames", args.num_frames, - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + with subprocess.Popen( + args=[ + "docker", "run", + "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', + "--cap-add", "SYS_ADMIN", + "--name", f"leaderboard-t2v-{''.join(args.gpu_ids)}", + "--rm", + "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", + "-v", f"{os.getcwd()}:/workspace/text-to-video", + "mlenergy/leaderboard:diffusion-t2v", + "--result-root", args.result_root, + "--batch-size", batch_size, + "--num-batches", "10", + "--power-limit", power_limit, + "--model", args.model, + "--dataset-path", args.dataset_path, + "--huggingface-token", hf_token, + "--num-inference-steps", num_inference_steps, + "--num-frames", args.num_frames, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -76,7 +78,7 @@ if __name__ == "__main__": parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use") parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") - parser.add_argument("--num-inference-steps", type=str, required=True, help="Number of denoising steps") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of denoising steps") parser.add_argument("--num-frames", type=str, required=True, help="Number of frames to generate") parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.") args = parser.parse_args() diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json index f4ef6d2ea49a7b01ddf4b79cea77e55f25c02108..33eb52e6ac37b40577dca425114d6c5603910397 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 16348.217100000009, - "Batch latency (s)": 44.41898396015167, + "Energy/video (J)": 16915.850124999997, + "Batch latency (s)": 46.14208295941353, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json index db07d02b403bc18bf465c9fd832836050b5c90f2..1b42eba83d65638ac413a6b558ec1368c78efe3f 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 16091.048200000008, - "Batch latency (s)": 85.8618726491928, + "Energy/video (J)": 16496.045437499997, + "Batch latency (s)": 89.03019031882286, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json index 60ef57cdf7254e8fa5734b926d5af40f3ba92bec..db242dfaa5a18ae46ff277d33d45b802c968af26 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid-xt", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 15346.527300000005, - "Batch latency (s)": 42.11920440196991, + "Energy/video (J)": 15709.767625000095, + "Batch latency (s)": 42.397395104169846, "Batch size": 1, "Denoising steps": 25, "Frames": 25 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json new file mode 100644 index 0000000000000000000000000000000000000000..c3aaf60c21ed8ceadc1c2c0ea2ba66460cf739ff --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 15291.016625000047, + "Batch latency (s)": 82.90474811196327, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json new file mode 100644 index 0000000000000000000000000000000000000000..139155f4d45251cf78e379aefa423c47393f03d5 --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 14761.389999999976, + "Batch latency (s)": 120.65004900523594, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json index 81599ccb61e007d43e9f4aac97c2d56e10723797..280d06e2ec12389808776e5a245f122145ddcf18 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 8803.383999999985, - "Batch latency (s)": 24.10387804508209, + "Energy/video (J)": 9066.434124999912, + "Batch latency (s)": 24.369865357875824, "Batch size": 1, "Denoising steps": 25, "Frames": 14 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json new file mode 100644 index 0000000000000000000000000000000000000000..4a338847ab116961161f1b9ade7ce78174891296 --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 8835.22312499996, + "Batch latency (s)": 47.65615049004555, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json new file mode 100644 index 0000000000000000000000000000000000000000..69fe154f614f1f2c56bdca9b9107379a128ebf7d --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 8683.536285714292, + "Batch latency (s)": 70.55723374230521, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json index 458f226a0f3e52e6cadb75268479bc5ff6ee4d87..24e16e99c9d008d3f875864e53f6a89fc7d25edc 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 14222.658400000026, - "Batch latency (s)": 22.950254821777342, + "Energy/video (J)": 14867.419125000015, + "Batch latency (s)": 23.717748790979385, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json index b18a013a945093105097bd6f6affb641c1d5f409..182e4da306ecab5c530c3a163fed460c87111469 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 13657.628800000017, - "Batch latency (s)": 42.94859471321106, + "Energy/video (J)": 14348.508499999996, + "Batch latency (s)": 44.71498331427574, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json index d4753953cc184f632dad67dcc450eb87313dea57..c608cac9c7d29e312bc82b121f6544f48f358c51 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid-xt", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 13366.447699999995, - "Batch latency (s)": 20.89660472869873, + "Energy/video (J)": 13392.813624999952, + "Batch latency (s)": 20.788252592086792, "Batch size": 1, "Denoising steps": 25, "Frames": 25 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json new file mode 100644 index 0000000000000000000000000000000000000000..34ffaa49397a0c6ddc23a3eebce32afec8b5c54c --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 12901.83275000006, + "Batch latency (s)": 39.99498334527016, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json new file mode 100644 index 0000000000000000000000000000000000000000..7b974a9e2638d5840433708e63fb7db5576e9d66 --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 12790.552809523862, + "Batch latency (s)": 59.380911929266794, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json index 8c35e8693be2b56cdc77473a043b13856bac6f81..56d76380378074c3a9c8f8d0124296407a37bb21 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 7550.921200000029, - "Batch latency (s)": 12.265265846252442, + "Energy/video (J)": 7623.074500000104, + "Batch latency (s)": 12.191031396389008, "Batch size": 1, "Denoising steps": 25, "Frames": 14 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json new file mode 100644 index 0000000000000000000000000000000000000000..df7125db726096c4e251968a8bd46c1879021f7b --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 7416.721437499975, + "Batch latency (s)": 23.368041068315506, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json new file mode 100644 index 0000000000000000000000000000000000000000..863f0b14985d03054cbb4624565e08cea519dcc9 --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 7354.00133333333, + "Batch latency (s)": 34.5100462777274, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json index 3d45658bd48cca5286be495eaa29dafcbf5c87f1..a68072aa5226473108a277cd8db04ff1227746ff 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 329.6848999999929, - "Batch latency (s)": 1.808762288093567, + "Energy/image (J)": 324.06850000005215, + "Batch latency (s)": 1.6537675857543945, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json index f2c21cb59415fdb3569b99d6968f899e507af96e..cc971fa7d4e99fa3d7a4050ccabead671224642a 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 174.24531874999812, - "Batch latency (s)": 7.439638161659241, + "Energy/image (J)": 172.51030000000029, + "Batch latency (s)": 7.375234842300415, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json index 3eb9add138b9a22e12444c1969521ceb93945f38..bc08770f86ca3fe6010106d04316cca2eefcd6a2 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 232.40825000000186, - "Batch latency (s)": 1.640995717048645, + "Energy/image (J)": 230.3378000000026, + "Batch latency (s)": 1.5861663103103638, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json new file mode 100644 index 0000000000000000000000000000000000000000..f7233a0bf7693297f4b5abb39069089027be58fe --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "kandinsky-community/kandinsky-2-2-decoder", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 163.0797656249997, + "Batch latency (s)": 13.998618459701538, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json index 982270484f9b02ef0b757d10c76e5bb6abbac0d8..2d8ce8e8fcfbaf97b6d80052599523456b9c5a25 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 202.8745750000002, - "Batch latency (s)": 2.3463359832763673, + "Energy/image (J)": 200.16462499999906, + "Batch latency (s)": 2.299217462539673, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json index 0c9b9f0d3b95d431780d1d8344bc546f854fee54..37450b5e8dd04ee1b633f3a291ada960461a02cd 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 187.65767500000075, - "Batch latency (s)": 4.030062103271485, + "Energy/image (J)": 184.9021625000052, + "Batch latency (s)": 4.0124232292175295, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json index 2f24dd0cf90aed2794e48a33e7d93af8955aff92..f04f1d9a28ec3928833820e3740e63af8133a2ea 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 914.0325000000187, - "Batch latency (s)": 3.1329710721969604, + "Energy/image (J)": 930.2532999999821, + "Batch latency (s)": 3.0359585523605346, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json index 64d0878e9117394b69b75d1353b699370cdbb6c6..76aa047e551e6824c493f7c1d8d4ba60a0ed3890 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 875.4787999999942, - "Batch latency (s)": 5.2747025966644285, + "Energy/image (J)": 895.7575500000036, + "Batch latency (s)": 5.261959171295166, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json index f6f9bd54a010b7b310bf5189cbb888dcde9b48ed..0c9ec1ddb68723148d8e53f108efa50e4a786164 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 235.712099999981, - "Batch latency (s)": 1.0208970069885255, + "Energy/image (J)": 227.21699999999254, + "Batch latency (s)": 0.9210062503814698, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json index aad1d2e7d94098039bc6c267df5d63b31a6115da..30469a413ef3ac5689a6a15778d90860b8babdfe 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 157.4185124999989, - "Batch latency (s)": 6.579187059402466, + "Energy/image (J)": 156.51368749999673, + "Batch latency (s)": 6.559858226776123, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json index 28e86a3658faa30abcb99808eb7c8bef21728f29..ca6fd195d3485719170029a0b6a8efd80d99793c 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 196.30995000000112, - "Batch latency (s)": 1.1641260623931884, + "Energy/image (J)": 188.78500000000932, + "Batch latency (s)": 1.1187455892562865, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json new file mode 100644 index 0000000000000000000000000000000000000000..6be148d620c6a01830ab05ec0b1d60ac2dfa0451 --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "prompthero/openjourney-v4", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 154.23499999999768, + "Batch latency (s)": 12.850126147270203, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json index 4eb8c054a40ba56e929722be9a4e9eb5b03eac8e..03b555864cd46af75d3edd691648ee5a0221a354 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 177.43804999999702, - "Batch latency (s)": 1.884285831451416, + "Energy/image (J)": 175.33082500000017, + "Batch latency (s)": 1.8664743423461914, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json new file mode 100644 index 0000000000000000000000000000000000000000..ef6d36f5ff848abfbd1d13c8e9f54e450a35ebe9 --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "prompthero/openjourney-v4", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 150.57691875000017, + "Batch latency (s)": 25.000647592544556, + "Batch size": 64, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json index e101e6efca69cdcd72e9f8109a0be4ee202cb746..b91c18752ebbd15bed71444a20909614c78f573c 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 162.92667500000098, - "Batch latency (s)": 3.505508875846863, + "Energy/image (J)": 163.7534500000067, + "Batch latency (s)": 3.423132634162903, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json index 5021be5df56b6da3d8e9af91c1503817589b5e83..b5beef62f56c91f499b1663e2dac3763c3a40cf7 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 717.2012000000104, - "Batch latency (s)": 1.9508831262588502, + "Energy/image (J)": 745.7899999999441, + "Batch latency (s)": 1.9644724607467652, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json index 4ea6ea5a54bdb2844cd905929723c0edc32cf0ea..d21b87b639862934912b8a59ad37539779cc874e 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 681.1273499999894, - "Batch latency (s)": 3.633535361289978, + "Energy/image (J)": 700.4580500000156, + "Batch latency (s)": 3.6897377252578734, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json index c9084a067979c005901f7a14dea1cc8165fbe354..be83817c742939bd1a07c9c0b22fc32b3e612c2c 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 672.6853499999968, - "Batch latency (s)": 7.193562436103821, + "Energy/image (J)": 688.6121250000084, + "Batch latency (s)": 7.168970584869385, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json index 86c82bd1ccde371c891c20dd828b57a35c77a579..a629442d234fc93c479e153756b188bbe42b654d 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 688.7974999999976, - "Batch latency (s)": 14.561952710151672, + "Energy/image (J)": 697.7047875000047, + "Batch latency (s)": 14.703205680847168, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json index fb90e6d05dd3b213e6ba65fb572a08380ddc26e6..df948b17288d3ddb97c6ea68e6b5de63531f4717 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 372.9794999999925, - "Batch latency (s)": 2.0116413831710815, + "Energy/image (J)": 414.02970000002534, + "Batch latency (s)": 2.0992990016937254, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json index 8c4ec6ad6eda4df8ae954df9db19b9eca551816b..402ce40745cafc6662fcc6dad653e04a058ace48 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 234.4104250000004, - "Batch latency (s)": 9.666603064537048, + "Energy/image (J)": 242.709375, + "Batch latency (s)": 9.941586756706238, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json index 8dd4789c356459d4b8ef0d39513a3372ad8200fe..95f0f835453d2898057dc8dd6c85db824a7dbe9e 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 289.37170000000623, - "Batch latency (s)": 2.0955519914627074, + "Energy/image (J)": 343.14144999999553, + "Batch latency (s)": 2.6075665235519407, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json index 8f97b2d79bc74faba25d6c719f62ff7b48090ce3..b6565c81731020d4d9c873c4bb6a108fdd299df5 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 249.84987499999698, - "Batch latency (s)": 2.723399114608765, + "Energy/image (J)": 260.6321250000037, + "Batch latency (s)": 2.6943087577819824, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json index b5914d7fce59b56bd38af9edc6233fc5dce2e984..1e3b03bd6cf605f4add095f232ba53c508fd6567 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 231.1957374999998, - "Batch latency (s)": 4.824169707298279, + "Energy/image (J)": 239.71523749999469, + "Batch latency (s)": 4.928032088279724, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json index a3f9bd7a89cae63e225656ecdf0e880eb1a584d7..54b6a6147ed46aaedda4e472f6cae700f6b04fe3 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 437.2461000000127, - "Batch latency (s)": 1.1940542221069337, + "Energy/image (J)": 431.7285000000149, + "Batch latency (s)": 1.1978053092956542, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json index 0e2147c3b7b4fd9fb87e2f76ab099012c63d4a55..72501626728d910fc3942665876f7b661e6ad6aa 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 345.6643937500019, - "Batch latency (s)": 14.331708741188049, + "Energy/image (J)": 349.6556749999989, + "Batch latency (s)": 14.506024074554443, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json index b22ad0726ff8f2eb953839b29b3f521b9df202ef..cdbff65c9fd8b0feaeb9c587f0f1c5202c58295e 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 397.6420500000007, - "Batch latency (s)": 2.0922271490097044, + "Energy/image (J)": 397.4403999999631, + "Batch latency (s)": 2.0987526416778564, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json new file mode 100644 index 0000000000000000000000000000000000000000..97eb38ff4cbd5324e66be75d01d27e9892a92c56 --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "stabilityai/stable-diffusion-2-1", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 344.0007781249995, + "Batch latency (s)": 28.606084370613097, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json index 1037a7f86016c4463f43e92bdf752ffc4681fdbe..9b73041e0e49c517e8c106c9f2ba8fc081696045 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 369.5769999999902, - "Batch latency (s)": 3.833626127243042, + "Energy/image (J)": 370.8419500000076, + "Batch latency (s)": 3.870126795768738, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json index 58ffc435e08f32dfd945a1dc1363cad58719bbf2..0ca5b1a72c7945b60310b1e5dcf07bc9449af926 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 352.8435999999987, - "Batch latency (s)": 7.322762203216553, + "Energy/image (J)": 357.5101125000045, + "Batch latency (s)": 7.4118963241577145, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json index 0f953f76304fa307054156127da96b5b496854ae..f99e6da6550f800fb4160de6ca96eddbb1ed340d 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1447.3444000000134, - "Batch latency (s)": 3.7704660654067994, + "Energy/image (J)": 1457.9797000000253, + "Batch latency (s)": 3.7812204360961914, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json index a9b8b2735c55c5bc24a518bfafe81354fd575658..bc363cc340ec2c11177919516e1b16fa18cbe073 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1398.8784500000068, - "Batch latency (s)": 7.196404767036438, + "Energy/image (J)": 1417.0265999999829, + "Batch latency (s)": 7.296204352378846, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json index 59b084eb0a4b56e02664e1b971bbf2dd2b94520d..738121c7f889d9416ff36a9d821487d3aaf4e0c0 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1340.9431999999913, - "Batch latency (s)": 13.802976179122926, + "Energy/image (J)": 1376.6305249999975, + "Batch latency (s)": 14.180507826805115, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json index 2be9b998b57efff060b4055fd396245063ca4357..c4e504d8978c610fb22885b6e881498cbf6fc3ac 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1340.4551500000002, - "Batch latency (s)": 27.784875440597535, + "Energy/image (J)": 1353.8191374999938, + "Batch latency (s)": 28.03936712741852, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json index dd1047d220ac41adfc34782d57cc79efcf1f35b6..a5f4b228e981ca291f17d97309006600c0f29702 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1064.0938000000083, - "Batch latency (s)": 2.820074677467346, + "Energy/image (J)": 1104.114100000076, + "Batch latency (s)": 2.8582629680633547, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json index 2201d304ece2f8aaac62ede754e20ebbb51604e4..c2cca9a1fb9d74447fccd553486143a65df82a18 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 994.3445500000147, - "Batch latency (s)": 5.212948894500732, + "Energy/image (J)": 1023.2370500000194, + "Batch latency (s)": 5.309733629226685, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json index 9defeb3b86ab1f46b36520610f20de611509edb2..626ce8fca820b08bd1ef718c421b39661105cb7b 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1004.3355749999988, - "Batch latency (s)": 10.405498218536376, + "Energy/image (J)": 1028.9778500000016, + "Batch latency (s)": 10.595553398132324, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json index fa520af5eb8c25b6efcc087f60f332cb22308721..6e9c732ca8200614419df079d0d1d74aee928c09 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1010.2810624999984, - "Batch latency (s)": 21.15771155357361, + "Energy/image (J)": 1039.4479500000016, + "Batch latency (s)": 21.538306522369385, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json index 75b4f1968262112a9ff0bee30348b4aed0d891de..c7ea97a178d616188585d138830401d695063aff 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 329.0941999999806, - "Batch latency (s)": 1.3033519506454467, + "Energy/image (J)": 316.22510000001637, + "Batch latency (s)": 1.2899317026138306, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json index c016517407df85e2acd4ff28a6137720116a38f6..98ce74a2eab446438405c41948341f5e8d54e368 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 145.14649999999966, - "Batch latency (s)": 3.6811126232147218, + "Energy/image (J)": 145.30115625000326, + "Batch latency (s)": 3.6781134366989137, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json index 69e577ab7005e48a18ecd513102a08c6985e7a3a..82e2c3d44866296c4472a73a969716fc9bd22086 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 222.06345000001602, - "Batch latency (s)": 1.374358630180359, + "Energy/image (J)": 221.32535000001081, + "Batch latency (s)": 1.3821177244186402, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json index 76375f5667db458a701e7c9c52188942af9f236a..a31efe8f51d79695f1c60d62c7b63b17f3aba46c 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 136.49577499999867, - "Batch latency (s)": 6.730837726593018, + "Energy/image (J)": 135.961328125, + "Batch latency (s)": 6.7253422975540165, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json index 3f3fb25aa3911e6ef8a803ed8b9b044539eb4d40..940447b68d113c85245331839356d9925e726694 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 177.44447500000243, - "Batch latency (s)": 1.5876455783843995, + "Energy/image (J)": 173.91712500001304, + "Batch latency (s)": 1.573417329788208, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json index 91ad5561b237d8c7604842c6238cbc57a2af7b3c..d7e4c22d9d03fc654139b8bb650a155bb72c234c 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 131.1387625000003, - "Batch latency (s)": 12.868691635131835, + "Energy/image (J)": 130.85997968750016, + "Batch latency (s)": 12.837305545806885, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json index 1eef640025c26148ff227c84c06986f390bf2ef3..4ed393fc51448503f5e14739d7c9d7729dd68c9b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 155.2100000000035, - "Batch latency (s)": 2.180539679527283, + "Energy/image (J)": 155.19958750000222, + "Batch latency (s)": 2.1782283782958984, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json index 842085c24a525a55d66d715457b0153b47f1968f..4b74487485fbcf6cea15e59928cc0776483f4ee3 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 860.5385999999941, - "Batch latency (s)": 2.3725571155548097, + "Energy/image (J)": 848.3177000001073, + "Batch latency (s)": 2.351728391647339, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json index fe422005e3a7348f07da6ba47b9b4562783cc5db..a7133793d7e1f2cd8b85fccdf3730977730a17d3 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 727.0428500000155, - "Batch latency (s)": 2.8992382049560548, + "Energy/image (J)": 716.0031000000424, + "Batch latency (s)": 2.8724076986312865, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json index 58835db28d7431884da5330f7dd635a3973208e2..de7637215f1110a86651168051a49b71d029aafe 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 677.2662250000052, - "Batch latency (s)": 4.657700920104981, + "Energy/image (J)": 682.126500000013, + "Batch latency (s)": 4.641835880279541, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json index f79ab3f504605e1e028e3fbb4c7a239d7862966d..3fcf25b57c5a0d244272dd52e5664054953d9bb2 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 644.7600749999983, - "Batch latency (s)": 8.445084881782531, + "Energy/image (J)": 655.685175000003, + "Batch latency (s)": 8.430445384979247, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json index 86a5cf87b9f2babb489cb15c3801671703fdc4dd..d4a850341656bde8c23846258a2e44d64e88cdbc 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 243.28739999998362, - "Batch latency (s)": 0.855378270149231, + "Energy/image (J)": 232.24340000003576, + "Batch latency (s)": 0.8665567636489868, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json index c7d98190068be7dbc0a22dd31ea14bf705fe7e71..1a83837ebe4dafb06c81b741b906ba2d8ce23faf 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 136.89135625000054, - "Batch latency (s)": 3.2747750997543337, + "Energy/image (J)": 135.34282499999972, + "Batch latency (s)": 3.239760994911194, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json index 25320351f99a0afc3cee6ed4b1c37a419f28efd2..e97f5fa0e5cc2956ffd0972f900ee9f03b2711f9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 182.5311500000069, - "Batch latency (s)": 0.9119171619415283, + "Energy/image (J)": 175.78234999999404, + "Batch latency (s)": 0.9105970144271851, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json index dbfd64f7e6c5930ee65cbfd821e5569613637860..af8e7955374b3680b80b04361f25d8f4ea560389 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 132.28641249999927, - "Batch latency (s)": 6.297622609138489, + "Energy/image (J)": 130.1285124999995, + "Batch latency (s)": 6.225514149665832, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json index 0622229b527595b84a3e5d37cdd0ca429802d86b..c3a581eb2952fefbc76a2a37cf27c5b03c090254 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 154.80212499999908, - "Batch latency (s)": 1.0733203649520875, + "Energy/image (J)": 148.74832499998155, + "Batch latency (s)": 1.1036246299743653, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json index 7e61eae0242c14e4fa3359a5e913cec3a2624ba2..f9b0d730d153c050c5b94aeb28864dcd5ccbcbee 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 128.81374062500035, - "Batch latency (s)": 12.13134765625, + "Energy/image (J)": 128.0899343750003, + "Batch latency (s)": 12.070884728431702, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json index b14b29f0a73ebc6e2bcb24fecb41a19c1d235af3..4367e0953f92b0d9c5d93f880d78713812547148 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 143.28031249999768, - "Batch latency (s)": 1.7443701505661011, + "Energy/image (J)": 139.91437499999302, + "Batch latency (s)": 1.74277982711792, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json index 7e63d7ff575d3574941ba75a9bee4a779396a316..613b1af6acc43c7141dabc0168f91483fe97cbf5 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 625.1895999999717, - "Batch latency (s)": 1.38781898021698, + "Energy/image (J)": 666.479899999965, + "Batch latency (s)": 1.3885040760040284, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json index cb6a2bbe1ed48e5d29fff859b2b5b3ba1f30d20c..c12deb74aacb3bc1ff1048944fada03945ed0949 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 497.0412249999994, - "Batch latency (s)": 11.863849401473999, + "Energy/image (J)": 513.199212499999, + "Batch latency (s)": 12.176180934906006, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json index 28e638029df126f7c9098bb3ef8cb036c01ac25d..91e675a5cf2a6ca5040be77b70ed6e32b13590ea 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 569.9780000000028, - "Batch latency (s)": 1.8244082450866699, + "Energy/image (J)": 592.11455000001, + "Batch latency (s)": 1.8233376026153565, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json index cae226224431ec0c71f2053f8fe6012ad0c8a0e0..f44e8b91076c2fca118f1216c0c7f7144b8c0896 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 540.4337500000023, - "Batch latency (s)": 3.2643563508987428, + "Energy/image (J)": 544.8429999999935, + "Batch latency (s)": 3.286959099769592, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json index 033e90aea8d80428a1f876627873cc9c37d84ea6..8bb536f94436ed62501b6f0224111d4086990f63 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 509.72669999999925, - "Batch latency (s)": 6.086679577827454, + "Energy/image (J)": 522.7006874999963, + "Batch latency (s)": 6.223434543609619, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json index 2762b1a23150e317c5e13a702d95b3b062d84930..59f162f609205b6a2da5a907934e90960e30a79f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 432.39010000000707, - "Batch latency (s)": 2.015624976158142, + "Energy/image (J)": 419.5822999998927, + "Batch latency (s)": 1.9746390342712403, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json index 0ced43458e7141401f4cdf43b6d974c1ab0bc8b4..167a8e9f61e240f8eca313277d923596bec6a08b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 180.98546874999883, - "Batch latency (s)": 4.272360563278198, + "Energy/image (J)": 179.42289374999237, + "Batch latency (s)": 4.241718673706055, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json index ecd709457abb2236cbeee30ea6d107b7dd042675..015bae503974324a64caadd93f9780c9cc7aad22 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 305.55374999998605, - "Batch latency (s)": 2.05529043674469, + "Energy/image (J)": 295.792149999924, + "Batch latency (s)": 2.0774401664733886, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json index 9aaf26986ca9201ea6454338e839cfe7a20c9c37..611686ff03d8abd36d56fe76d4c9a7bfe02e9a49 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 173.50129687500012, - "Batch latency (s)": 8.135975241661072, + "Energy/image (J)": 174.0245281249983, + "Batch latency (s)": 8.14413080215454, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json index 5171163e44ccfd3fe30c38b8243d1c1af349a0c0..13bb08ea876135094a866d68842aaeea0ec2a7a3 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 231.15972499999916, - "Batch latency (s)": 2.187738275527954, + "Energy/image (J)": 230.345924999984, + "Batch latency (s)": 2.223876476287842, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json index de417967a76973664b373efc1510c3cd588e71e6..c9be1820552a192f98846f3eb4f003ed6c109bce 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 167.10275156249992, - "Batch latency (s)": 15.62219078540802, + "Energy/image (J)": 166.73651874999922, + "Batch latency (s)": 15.59785532951355, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json index a674ed45c4b8f2730b94a5ef6feffa74d6c37164..e7cd7ed460e17f40d0fe14e2120492234a587894 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 201.1591625000001, - "Batch latency (s)": 2.4453672647476195, + "Energy/image (J)": 198.7015374999959, + "Batch latency (s)": 2.459192657470703, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json index d54e9419b0e80d98b8af11bc960a593f43a559eb..d81b612771eea9eeed9d5f8e10c16493800b9cdb 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 402.25479999999516, - "Batch latency (s)": 0.9360565900802612, + "Energy/image (J)": 386.4609999999404, + "Batch latency (s)": 0.9410791873931885, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json index 9e9cb85ef1a6c788abe7ddcb790516eeed586b14..23e014db8cdd54f3fb8b0b00b84189f9e001f32a 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 295.6397187499999, - "Batch latency (s)": 7.022916412353515, + "Energy/image (J)": 295.0764937500004, + "Batch latency (s)": 7.02507450580597, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json index 0742ce9000431476e26c1f3c9e83f65b553f7bd8..ac4f1d96f470317884529655187a96fadb27cdd9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 347.869849999994, - "Batch latency (s)": 1.1735167980194092, + "Energy/image (J)": 341.5639000000432, + "Batch latency (s)": 1.1783596992492675, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json index c6490541d9628e8a5cb58d27c69bbe6a0b371350..9745565ea3404ad2802b561f96be36d136853099 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 288.93712499999964, - "Batch latency (s)": 13.71097764968872, + "Energy/image (J)": 289.42614687500173, + "Batch latency (s)": 13.744895315170288, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json index 4863ae4a01f8500b7eade7e242756d60fc6b4be4..fa741a333f040f4cb7db4724e064e8102146abed 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 328.95322500000475, - "Batch latency (s)": 1.9817272901535035, + "Energy/image (J)": 323.79292500000446, + "Batch latency (s)": 1.9873192310333252, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json index e3fa33dcd9734e3cb630f2ed9d9b5889eb8d2e9d..1484244d3641f0af4d991441199c8325e10569cf 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 279.46398281250003, - "Batch latency (s)": 26.251275372505187, + "Energy/image (J)": 279.7018828125001, + "Batch latency (s)": 26.282402443885804, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json index 36b3361e0361fc31f6efa96e04a2681e5bae6956..5c4eac4daa53360a725120155b075c23e622e469 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 307.12399999999906, - "Batch latency (s)": 3.64911527633667, + "Energy/image (J)": 305.03673749999143, + "Batch latency (s)": 3.63439359664917, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json index 93b2555910cb56013a1c4db98e2f1645b90d2a04..b1aaa2782a7eba883f722315ac8fec57cf2a20cd 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1219.5621000000276, - "Batch latency (s)": 1.8901970863342286, + "Energy/image (J)": 1256.9625999998302, + "Batch latency (s)": 1.9030212879180908, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json index a52245fa6cb02fb4b08b01fe544a1e1699025d44..c7defb04a50bfe94b460c17b93098dd44d667e33 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1101.8141374999977, - "Batch latency (s)": 26.064258456230164, + "Energy/image (J)": 1115.0813562500057, + "Batch latency (s)": 26.295916223526, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json index b6763f84d48ec8a9c5b5bb639d189ad0f4706275..799d5950c2f8cd6560698e2bc05d8390aa6c36a9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1164.132700000005, - "Batch latency (s)": 3.4976581573486327, + "Energy/image (J)": 1187.2511500000953, + "Batch latency (s)": 3.544024109840393, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json index 679ff0ef388c3f32217ece10de9b53ceef921060..8d3afbfc68d6f8ff1a34d242502f590b169ee440 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1103.1854343750006, - "Batch latency (s)": 52.5412620306015, + "Energy/image (J)": 1111.7095656249962, + "Batch latency (s)": 52.63584921360016, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json index 082b34b0a65ba42b40a12918e486e8a979a13090..3f0838f4d573b61ed1e36d0bb74347bbd2dd3a10 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1124.7332500000018, - "Batch latency (s)": 6.681292104721069, + "Energy/image (J)": 1141.1115500000305, + "Batch latency (s)": 6.750077819824218, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json index 1adfbb11eab76f1a30d10830edc2ead030de6176..6053567796b3684672789d01eb73c3a33c382869 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1105.709275000001, - "Batch latency (s)": 13.087377643585205, + "Energy/image (J)": 1124.4100500000175, + "Batch latency (s)": 13.255334210395812, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json index d15517aa639c5b626f36bcd4b9a94a5a7ec86bd3..52dd4470d2e9873b2d10acbc13703928a4fcf783 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 973.6592999999878, - "Batch latency (s)": 2.2974732398986815, + "Energy/image (J)": 969.3598000001163, + "Batch latency (s)": 2.2578482627868652, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json index f5e9a3d3c7e71077c3531f05e2c59b8d114c255a..ac6bde9041ee06427994aabe2b16c7c6321a13cc 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 733.4240562499996, - "Batch latency (s)": 17.358140754699708, + "Energy/image (J)": 737.6392125000013, + "Batch latency (s)": 17.373131418228148, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json index 38dadd452e6e3da77ce8d6fe68535c229d370122..3d349ec65b3960445241c3d118137e870330b111 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 848.840699999989, - "Batch latency (s)": 2.6611390113830566, + "Energy/image (J)": 868.7413499999791, + "Batch latency (s)": 2.683417248725891, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json index 0491a96817f66895c7dbce8dcd9327abe65cf045..a63a91d90ce6627d404db7eb95c1921a89e27677 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 789.0821250000038, - "Batch latency (s)": 4.744464302062989, + "Energy/image (J)": 794.8234249999747, + "Batch latency (s)": 4.7213153600692745, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json index 0568d56825fb9de42c87771507f29c4e6cdbdca0..979aa4bfb589deb5ca3c83c0e265ffdf26ff961b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 752.1045125000004, - "Batch latency (s)": 8.926730370521545, + "Energy/image (J)": 756.267812499986, + "Batch latency (s)": 8.936180830001831, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json index 16902b5173b1aa26b6ed93809848e8381082e8b3..e3da32aca1643f8220ea99fec88d494b8b64ae66 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1168.710700000031, - "Batch latency (s)": 3.279584217071533, + "Energy/video (J)": 1153.5816999999806, + "Batch latency (s)": 3.23746497631073, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json new file mode 100644 index 0000000000000000000000000000000000000000..fdfcf2ca6d87d73d766adaa7afbcad87398426e3 --- /dev/null +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json @@ -0,0 +1,9 @@ +{ + "Model": "ali-vilab/text-to-video-ms-1.7b", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 1002.3830562499992, + "Batch latency (s)": 42.21276063919068, + "Batch size": 16, + "Denoising steps": 25, + "Frames": 16 +} \ No newline at end of file diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json index 2d5a958af0059e0758230a9eba4e53f79b17467c..5e15332e119541e30d53c79d02d4907c37d33e37 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1083.7407000000123, - "Batch latency (s)": 5.75505154132843, + "Energy/video (J)": 1088.2321500000078, + "Batch latency (s)": 5.810182595252991, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json index 07e9c28509f076fd85fb9e746728ae0e1f426dbc..a4636e29a194b6bf0ee795bab62af58531ab36d6 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1031.8761250000098, - "Batch latency (s)": 10.693570613861084, + "Energy/video (J)": 1039.8423750000075, + "Batch latency (s)": 10.828980302810669, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json index 9b941b99c36ad68011baeba800abcb96a4065550..175bccd514e0e2fdc78981292844fbf4eb76c7dd 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 995.5903875000018, - "Batch latency (s)": 20.66424689292908, + "Energy/video (J)": 1001.8907250000047, + "Batch latency (s)": 20.911450886726378, "Batch size": 8, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json index d608d5741220dc12491c38115ff285f44363fdf0..0025f2c8e734b23d93fc502017aeb396c0c0a70e 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3613.7171999999787, - "Batch latency (s)": 9.765414237976074, + "Energy/video (J)": 3808.3875, + "Batch latency (s)": 9.97215178012848, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json index fb8c9adc45ea54dd2795c191095409321fc21032..b196f268b51172a8e792d0855742e379fe2ac8fd 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3613.7226499999874, - "Batch latency (s)": 19.047373509407045, + "Energy/video (J)": 3714.9077000000048, + "Batch latency (s)": 19.319639086723328, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json index fe48e4bf0790240e018d11c7ab4df4ff5f2da9dc..5ed74a04bca7a4eb4a5d45c09725d06142db9473 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3579.347100000002, - "Batch latency (s)": 38.19397940635681, + "Energy/video (J)": 3723.060124999983, + "Batch latency (s)": 38.925279235839845, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json index a3cb4e1c59ed9d6bc97e368929f2ac2e8f8d5c0e..9ca43d3fd015e4b51c713134d9b5f9a0be832e97 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 972.1845000000205, - "Batch latency (s)": 1.972856879234314, + "Energy/video (J)": 956.181299999915, + "Batch latency (s)": 1.9992478847503663, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json index 9dc988d5a358bdbac42efcf8dd86becd5c59936d..a564b92a716bd52c7dc6a33d4632487e68eb8d07 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 816.0705249999999, - "Batch latency (s)": 19.527364635467528, + "Energy/video (J)": 812.8134187500109, + "Batch latency (s)": 19.53088092803955, "Batch size": 16, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json index 614a330e53b83d70741892127e12180c3906d148..07543b32965303fe72fa01a25845e7777c5eb8ce 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 889.6341500000096, - "Batch latency (s)": 3.0206708192825316, + "Energy/video (J)": 894.7264999999664, + "Batch latency (s)": 3.0091302156448365, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json index 7e4cdb51c567fd5b43504405689f1231fdcfafd3..38848c768e6176b2aee01ba3720c5c6dae24dded 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 864.8537000000011, - "Batch latency (s)": 5.358541631698609, + "Energy/video (J)": 858.7707249999978, + "Batch latency (s)": 5.359495830535889, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json index ecda3cc401f751cbfd68cc67870488e83f0e9a80..a29c909bf3a315b096d9e5d5e53da7cfa295073b 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 825.4678125000034, - "Batch latency (s)": 10.00869529247284, + "Energy/video (J)": 824.5901250000112, + "Batch latency (s)": 9.995107746124267, "Batch size": 8, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json index 3e44f5a0d9a3de4bdb250805bb551947d3cc409d..f8c65b90d6f45878361664860ea323cefde5979b 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 3202.3456999999935, - "Batch latency (s)": 5.03039321899414, + "Energy/video (J)": 3220.4186000000686, + "Batch latency (s)": 5.014125680923462, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json index c73e26de892bd347fc2f0ce9806540e24809c179..d33b8512650df03e27ec8d4643bdebfc407f8bd4 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 3080.158299999987, - "Batch latency (s)": 9.498830604553223, + "Energy/video (J)": 3143.508899999969, + "Batch latency (s)": 9.543243718147277, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json index e8f6bc6a72bb1299eddb42f3fe312df9e4d42369..b43f88bb4d5165811ac75eeeac288e1a31413b88 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 2999.9867499999937, - "Batch latency (s)": 18.28913300037384, + "Energy/video (J)": 3071.656475000037, + "Batch latency (s)": 18.417469120025636, "Batch size": 4, "Denoising steps": 25, "Frames": 16