Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
•
31b5924
1
Parent(s):
9fd9223
Update benchmark.py
Browse files- scripts/benchmark.py +38 -38
scripts/benchmark.py
CHANGED
@@ -40,12 +40,36 @@ SYSTEM_PROMPTS = {
|
|
40 |
),
|
41 |
}
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
@dataclass
|
44 |
class Output:
|
45 |
response_length: int
|
46 |
input: str
|
47 |
output: str
|
48 |
|
|
|
49 |
@torch.inference_mode()
|
50 |
def run_inference(
|
51 |
model,
|
@@ -218,9 +242,6 @@ def run_inference(
|
|
218 |
|
219 |
return result
|
220 |
|
221 |
-
def write_error_to_file(filename, error_message):
|
222 |
-
with open(filename, 'a') as file:
|
223 |
-
file.write(error_message + '\n')
|
224 |
|
225 |
def main(
|
226 |
model_path: str,
|
@@ -232,7 +253,7 @@ def main(
|
|
232 |
temperature: float = 0.7,
|
233 |
repitition_penalty: float = 1.0,
|
234 |
max_new_tokens: int = 512,
|
235 |
-
|
236 |
) -> None:
|
237 |
"""Run benchmarking for one model on the entire input file.
|
238 |
|
@@ -262,8 +283,8 @@ def main(
|
|
262 |
model_path = model_path[:-1]
|
263 |
model_name_cleaned = "--".join(model_path.split("/")[-2:])
|
264 |
output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
|
265 |
-
output_csv_path = f"{output_dir}/benchmark_batch_{
|
266 |
-
config_json_path = f"{output_dir}/
|
267 |
table = Table(title="Benchmark")
|
268 |
table.add_column("Configuration")
|
269 |
table.add_column("Value")
|
@@ -341,45 +362,23 @@ def main(
|
|
341 |
"temperature": temperature,
|
342 |
"repitition_penalty": repitition_penalty,
|
343 |
"max_new_tokens": max_new_tokens,
|
344 |
-
"batch_size":
|
345 |
},
|
346 |
config_json,
|
347 |
indent=4,
|
348 |
)
|
349 |
config_json.write("\n")
|
350 |
|
351 |
-
class CustomDataset(Dataset):
|
352 |
-
def __init__(self, data):
|
353 |
-
self.data = data
|
354 |
-
|
355 |
-
def __len__(self):
|
356 |
-
return len(self.data)
|
357 |
-
|
358 |
-
def __getitem__(self, index):
|
359 |
-
sample = self.data[index]
|
360 |
-
return sample["conversations"][0]["value"]
|
361 |
-
|
362 |
-
|
363 |
-
def dataloader(input_file: str, batch_size: batch) -> Generator[tuple[bool, str], None, None]:
|
364 |
-
"""Yields a tuple of whether this is a warmup run and the input prompt."""
|
365 |
-
for _ in range(3):
|
366 |
-
yield True, ["Say something long and random. I don't care about the content." for _ in range (batch)]
|
367 |
-
data = json.load(open(input_file, "r"))
|
368 |
-
custom_dataset = CustomDataset(data)
|
369 |
-
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
|
370 |
-
for prompt in data_loader:
|
371 |
-
yield False, prompt
|
372 |
-
|
373 |
# Warm up the GPU with some random prompts.
|
374 |
# Forward through all the prompts.
|
375 |
is_first = True
|
376 |
convs = []
|
377 |
prompts = []
|
378 |
-
data_iter = iter(dataloader(input_file,
|
379 |
|
380 |
for is_warmup, input_prompts in data_iter:
|
381 |
# Construct the input prompt.
|
382 |
-
for i in range(
|
383 |
conv = copy.deepcopy(conv_base)
|
384 |
conv.append_message(conv.roles[0], input_prompts[i])
|
385 |
conv.append_message(conv.roles[1], "")
|
@@ -404,18 +403,19 @@ def main(
|
|
404 |
if results:
|
405 |
# Record numbers.
|
406 |
if not is_warmup:
|
407 |
-
|
408 |
-
latency = measurements.time
|
409 |
-
throughput =
|
410 |
-
energy = measurements.total_energy
|
|
|
411 |
output = {
|
412 |
"model": model_name_cleaned,
|
413 |
"throughput": throughput,
|
414 |
-
"response_length":
|
415 |
"latency": latency,
|
416 |
-
"energy": energy,
|
417 |
"input": [prompt.strip() for prompt in prompts],
|
418 |
-
"output": [
|
419 |
}
|
420 |
output_str = json.dumps(output, indent=4)
|
421 |
if not is_warmup:
|
|
|
40 |
),
|
41 |
}
|
42 |
|
43 |
+
class CustomDataset(Dataset):
|
44 |
+
def __init__(self, data):
|
45 |
+
self.data = data
|
46 |
+
|
47 |
+
def __len__(self):
|
48 |
+
return len(self.data)
|
49 |
+
|
50 |
+
def __getitem__(self, index):
|
51 |
+
sample = self.data[index]
|
52 |
+
return sample["conversations"][0]["value"]
|
53 |
+
|
54 |
+
|
55 |
+
def dataloader(input_file: str, batch_size: int) -> Generator[tuple[bool, list[str]], None, None]:
|
56 |
+
"""Yields a tuple of whether this is a warmup run and the input prompt."""
|
57 |
+
for _ in range(3):
|
58 |
+
yield True, ["Say something long and random. I don't care about the content." for _ in range (batch_size)]
|
59 |
+
data = json.load(open(input_file, "r"))
|
60 |
+
custom_dataset = CustomDataset(data)
|
61 |
+
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
|
62 |
+
for prompt in data_loader:
|
63 |
+
yield False, prompt
|
64 |
+
|
65 |
+
|
66 |
@dataclass
|
67 |
class Output:
|
68 |
response_length: int
|
69 |
input: str
|
70 |
output: str
|
71 |
|
72 |
+
|
73 |
@torch.inference_mode()
|
74 |
def run_inference(
|
75 |
model,
|
|
|
242 |
|
243 |
return result
|
244 |
|
|
|
|
|
|
|
245 |
|
246 |
def main(
|
247 |
model_path: str,
|
|
|
253 |
temperature: float = 0.7,
|
254 |
repitition_penalty: float = 1.0,
|
255 |
max_new_tokens: int = 512,
|
256 |
+
batch_size: int = 1,
|
257 |
) -> None:
|
258 |
"""Run benchmarking for one model on the entire input file.
|
259 |
|
|
|
283 |
model_path = model_path[:-1]
|
284 |
model_name_cleaned = "--".join(model_path.split("/")[-2:])
|
285 |
output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
|
286 |
+
output_csv_path = f"{output_dir}/benchmark_batch_{batch_size}.json"
|
287 |
+
config_json_path = f"{output_dir}/config_batch_{batch_size}.json"
|
288 |
table = Table(title="Benchmark")
|
289 |
table.add_column("Configuration")
|
290 |
table.add_column("Value")
|
|
|
362 |
"temperature": temperature,
|
363 |
"repitition_penalty": repitition_penalty,
|
364 |
"max_new_tokens": max_new_tokens,
|
365 |
+
"batch_size": batch_size,
|
366 |
},
|
367 |
config_json,
|
368 |
indent=4,
|
369 |
)
|
370 |
config_json.write("\n")
|
371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
# Warm up the GPU with some random prompts.
|
373 |
# Forward through all the prompts.
|
374 |
is_first = True
|
375 |
convs = []
|
376 |
prompts = []
|
377 |
+
data_iter = iter(dataloader(input_file, batch_size))
|
378 |
|
379 |
for is_warmup, input_prompts in data_iter:
|
380 |
# Construct the input prompt.
|
381 |
+
for i in range(batch_size):
|
382 |
conv = copy.deepcopy(conv_base)
|
383 |
conv.append_message(conv.roles[0], input_prompts[i])
|
384 |
conv.append_message(conv.roles[1], "")
|
|
|
403 |
if results:
|
404 |
# Record numbers.
|
405 |
if not is_warmup:
|
406 |
+
total_num_tokens = sum([result.response_length for result in results]) # total number of tokens
|
407 |
+
latency = measurements.time # seconds, identical for all requests
|
408 |
+
throughput = total_num_tokens / latency # tokens per second
|
409 |
+
energy = measurements.total_energy # Joules, total across all requests
|
410 |
+
# Fields should be interpreted as per-request
|
411 |
output = {
|
412 |
"model": model_name_cleaned,
|
413 |
"throughput": throughput,
|
414 |
+
"response_length": total_num_tokens / batch_size,
|
415 |
"latency": latency,
|
416 |
+
"energy": energy / batch_size,
|
417 |
"input": [prompt.strip() for prompt in prompts],
|
418 |
+
"output": [result.output.strip() for result in results],
|
419 |
}
|
420 |
output_str = json.dumps(output, indent=4)
|
421 |
if not is_warmup:
|