File size: 1,920 Bytes
f71c233 |
1 |
{"shakespeare_char": {"means": {"final_train_loss_mean": 0.8070969978968302, "best_val_loss_mean": 1.4605447848637898, "total_train_time_mean": 76.28451323509216, "avg_inference_tokens_per_second_mean": 682.249943875929}, "stderrs": {"final_train_loss_stderr": 0.0022763731268803317, "best_val_loss_stderr": 0.0011012520313362566, "total_train_time_stderr": 0.46271068920474306, "avg_inference_tokens_per_second_stderr": 2.0192100707045397}, "final_info_dict": {"final_train_loss": [0.815872311592102, 0.7992163896560669, 0.8062022924423218], "best_val_loss": [1.4568231105804443, 1.4648518562316895, 1.4599593877792358], "total_train_time": [78.24760484695435, 75.29463386535645, 75.3113009929657], "avg_inference_tokens_per_second": [673.7276235128363, 687.2659996805114, 685.7562084344394]}}, "enwik8": {"means": {"final_train_loss_mean": 0.9162352681159973, "best_val_loss_mean": 0.994733452796936, "total_train_time_mean": 800.9844655990601, "avg_inference_tokens_per_second_mean": 666.6401679692085}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [0.9162352681159973], "best_val_loss": [0.994733452796936], "total_train_time": [800.9844655990601], "avg_inference_tokens_per_second": [666.6401679692085]}}, "text8": {"means": {"final_train_loss_mean": 0.9799667596817017, "best_val_loss_mean": 0.960930585861206, "total_train_time_mean": 796.8736915588379, "avg_inference_tokens_per_second_mean": 681.5641643940587}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0}, "final_info_dict": {"final_train_loss": [0.9799667596817017], "best_val_loss": [0.960930585861206], "total_train_time": [796.8736915588379], "avg_inference_tokens_per_second": [681.5641643940587]}}} |