Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,129 Bytes
e368cec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import fire
import time
import json
from collections import defaultdict
from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
from .utils import detect_language, get_time_stamp_from_date, get_input_image_path, load_image_from_path
from tqdm import tqdm
VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote", "chat"]
def remove_html(raw):
if raw.startswith("<h3>"):
return raw[raw.find(": ") + 2 : -len("</h3>\n")]
if raw.startswith("### Model A: ") or raw.startswith("### Model B: "):
return raw[13:]
return raw
def read_file(filename):
data = []
for retry in range(5):
try:
# lines = open(filename).readlines()
for l in open(filename):
row = json.loads(l)
if row["type"] in VOTES:
data.append(row)
break
except FileNotFoundError:
time.sleep(2)
return data
def read_file_parallel(log_files, num_threads=16):
data_all = []
from multiprocessing import Pool
with Pool(num_threads) as p:
ret_all = list(tqdm(p.imap(read_file, log_files), total=len(log_files)))
for ret in ret_all:
data_all.extend(ret)
return data_all
def num_tokens(s:str):
if s is None:
return 0
return len(s) / 4
def main(
):
log_files = get_log_files()
data = read_file_parallel(log_files)
all_model_counts = defaultdict(int)
all_model_input_tokens_counts = defaultdict(list)
all_model_output_tokens_counts = defaultdict(list)
all_model_image_sizes = defaultdict(list)
chat_battle_counts = defaultdict(int)
for row in tqdm(data, desc="counting"):
if row['type'] == "chat":
chat_battle_counts["chat"] += 1
all_model_counts[row['model']] += 1
tstamp = row["tstamp"]
conv_id = row["state"]["conv_id"]
image = load_image_from_path(get_input_image_path(tstamp, conv_id))
if image is None:
image_size = None
else:
image_size = load_image_from_path(get_input_image_path(tstamp, conv_id)).size
all_model_image_sizes[row['model']].append(image_size)
try:
for message in row["state"]["messages"][row["state"]["offset"] :: 2]:
all_model_input_tokens_counts[row['model']].append(num_tokens(message[1]))
for message in row["state"]["messages"][row["state"]["offset"] + 1 :: 2]:
all_model_output_tokens_counts[row['model']].append(num_tokens(message[1]))
except Exception as e:
print(row)
raise e
else:
chat_battle_counts[row['type']] += 1
if row["models"][0] is None or row["models"][1] is None:
continue
# Resolve model names
models_public = [remove_html(row["models"][0]), remove_html(row["models"][1])]
if "model_name" in row["states"][0]:
models_hidden = [
row["states"][0]["model_name"],
row["states"][1]["model_name"],
]
if models_hidden[0] is None:
models_hidden = models_public
else:
models_hidden = models_public
if (models_public[0] == "" and models_public[1] != "") or (
models_public[1] == "" and models_public[0] != ""
):
continue
if models_public[0] == "" or models_public[0] == "Model A":
anony = True
models = models_hidden
else:
anony = False
models = models_public
if not models_public == models_hidden:
continue
all_model_counts[models[0]] += 1
all_model_counts[models[1]] += 1
tstamp = row["tstamp"]
conv_id1 = row["states"][0]["conv_id"]
conv_id2 = row["states"][1]["conv_id"]
image1 = load_image_from_path(get_input_image_path(tstamp, conv_id1))
image2 = load_image_from_path(get_input_image_path(tstamp, conv_id2))
all_model_image_sizes[models[0]].append(None if image1 is None else image1.size)
all_model_image_sizes[models[1]].append(None if image2 is None else image2.size)
for message in row["states"][0]["messages"][row["states"][0]["offset"] :: 2]:
all_model_input_tokens_counts[models[0]].append(num_tokens(message[1]))
for message in row["states"][0]["messages"][row["states"][0]["offset"] + 1 :: 2]:
all_model_output_tokens_counts[models[0]].append(num_tokens(message[1]))
for message in row["states"][1]["messages"][row["states"][1]["offset"] :: 2]:
all_model_input_tokens_counts[models[1]].append(num_tokens(message[1]))
for message in row["states"][1]["messages"][row["states"][1]["offset"] + 1 :: 2]:
all_model_output_tokens_counts[models[1]].append(num_tokens(message[1]))
print("### Chat battle counts (requests)")
print(json.dumps(chat_battle_counts, indent=4))
print("### Model counts (requests)")
print(json.dumps(all_model_counts, indent=4))
print("### Model Avg input tokens counts (tokens)")
average_input_tokens_counts = {}
for model, counts in all_model_input_tokens_counts.items():
average_input_tokens_counts[model] = sum(counts) / len(counts)
print(json.dumps(average_input_tokens_counts, indent=4))
print("### Model AVg output tokens counts (tokens)")
average_output_tokens_counts = {}
for model, counts in all_model_output_tokens_counts.items():
average_output_tokens_counts[model] = sum(counts) / len(counts)
print(json.dumps(average_output_tokens_counts, indent=4))
print("### Model Avg image sizes (height, width)")
average_image_sizes = {}
for model, sizes in all_model_image_sizes.items():
avg_height = sum([size[0] for size in sizes if size is not None]) / len(sizes)
avg_width = sum([size[1] for size in sizes if size is not None]) / len(sizes)
average_image_sizes[model] = (avg_height, avg_width)
print(json.dumps(average_image_sizes, indent=4))
print("### GPT-4V estimated cost (USD)")
gpt_4v_name = "gpt-4-vision-preview"
gpt_4v_cost = {}
gpt_4v_cost['input'] = sum(all_model_input_tokens_counts[gpt_4v_name]) / 1000 * 0.01
gpt_4v_cost['output'] = sum(all_model_output_tokens_counts[gpt_4v_name]) / 1000 * 0.03
all_image_cost = 0
for size in all_model_image_sizes[gpt_4v_name]:
if size is None:
continue
all_image_tokens = (size[0] // 512 + 1) * (size[1] // 512 + 1) * 170 + 85
all_image_cost += all_image_tokens / 1000 * 0.01
gpt_4v_cost['image'] = all_image_cost
print(json.dumps(gpt_4v_cost, indent=4))
if __name__ == "__main__":
fire.Fire(main) |