{"shakespeare_char": {"means": {"final_train_loss_mean": 1.3304622968037922, "best_val_loss_mean": 1.4966087341308594, "total_train_time_mean": 104.24611830711365, "avg_inference_tokens_per_second_mean": 402.23806255735764, "style_consistency_scores": {"mean_consistency": 0.9666666666666668, "std_consistency": 0.06788635809607159}}, "stderrs": {"final_train_loss_stderr": 0.010637172321278423, "best_val_loss_stderr": 0.0014393780238156743, "total_train_time_stderr": 0.5304034805574575, "avg_inference_tokens_per_second_stderr": 0.54141608696792, "style_consistency_scores": {"mean_consistency": 0.01200137166371825, "std_consistency": 0.021331109488662167}}, "final_info_dict": {"final_train_loss": [1.33839750289917, 1.288020133972168, 1.364969253540039], "best_val_loss": [1.5009182691574097, 1.4907069206237793, 1.4982010126113892], "total_train_time": [106.42000102996826, 102.655588388443, 103.66276550292969], "avg_inference_tokens_per_second": [403.7661215348543, 402.95931096871493, 399.98875516850364], "style_consistency_scores": [{"mean_consistency": 0.9833333333333334, "std_consistency": 0.04999999999999999}, {"mean_consistency": 1.0, "std_consistency": 0.0}, {"mean_consistency": 0.9166666666666667, "std_consistency": 0.1536590742882148}]}}, "enwik8": {"means": {"final_train_loss_mean": 1.0843100547790527, "best_val_loss_mean": 0.9584192037582397, "total_train_time_mean": 1198.6353631019592, "avg_inference_tokens_per_second_mean": 400.9799186059553, "style_consistency_scores": {"mean_consistency": 1.0, "std_consistency": 0.0}}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0, "style_consistency_scores": {"mean_consistency": 0.0, "std_consistency": 0.0}}, "final_info_dict": {"final_train_loss": [1.0843100547790527], "best_val_loss": [0.9584192037582397], "total_train_time": [1198.6353631019592], "avg_inference_tokens_per_second": [400.9799186059553], "style_consistency_scores": [{"mean_consistency": 1.0, "std_consistency": 0.0}]}}, "text8": {"means": {"final_train_loss_mean": 1.107680320739746, "best_val_loss_mean": 0.9144911170005798, "total_train_time_mean": 1191.0737359523773, "avg_inference_tokens_per_second_mean": 399.1246811178914, "style_consistency_scores": {"mean_consistency": 1.0, "std_consistency": 0.0}}, "stderrs": {"final_train_loss_stderr": 0.0, "best_val_loss_stderr": 0.0, "total_train_time_stderr": 0.0, "avg_inference_tokens_per_second_stderr": 0.0, "style_consistency_scores": {"mean_consistency": 0.0, "std_consistency": 0.0}}, "final_info_dict": {"final_train_loss": [1.107680320739746], "best_val_loss": [0.9144911170005798], "total_train_time": [1191.0737359523773], "avg_inference_tokens_per_second": [399.1246811178914], "style_consistency_scores": [{"mean_consistency": 1.0, "std_consistency": 0.0}]}}} |