File size: 1,925 Bytes
f71c233
1
{"shakespeare_char": {"means": {"final_train_loss_mean": 0.8107348283131918, "best_val_loss_mean": 1.469569722811381, "total_train_time_mean": 99.789883852005, "avg_inference_tokens_per_second_mean": 617.3886088455373}, "stderrs": {"final_train_loss_stderr": 0.0034371159910373005, "best_val_loss_stderr": 0.0029907013350700465, "total_train_time_stderr": 0.6856674925383229, "avg_inference_tokens_per_second_stderr": 4.480879055062257}, "final_info_dict": {"final_train_loss": [0.8153533935546875, 0.7964469194412231, 0.8204041719436646], "best_val_loss": [1.4731581211090088, 1.4783154726028442, 1.45723557472229], "total_train_time": [97.26092314720154, 99.80931115150452, 102.29941725730896], "avg_inference_tokens_per_second": [634.8615557273264, 602.1653151988776, 615.1389556104078]}}, "enwik8": {"means": {"final_train_loss_mean": 0.9349523782730103, "best_val_loss_mean": 1.0040355920791626, "total_train_time_mean": 1212.1419475078583, "avg_inference_tokens_per_second_mean": 651.9546315089241}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [0.9349523782730103], "best_val_loss": [1.0040355920791626], "total_train_time": [1212.1419475078583], "avg_inference_tokens_per_second": [651.9546315089241]}}, "text8": {"means": {"final_train_loss_mean": 0.9981321692466736, "best_val_loss_mean": 0.9795330762863159, "total_train_time_mean": 1213.3201868534088, "avg_inference_tokens_per_second_mean": 575.7515195836003}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [0.9981321692466736], "best_val_loss": [0.9795330762863159], "total_train_time": [1213.3201868534088], "avg_inference_tokens_per_second": [575.7515195836003]}}}