{ "best_metric": 0.5696164965629578, "best_model_checkpoint": "/kaggle/output/checkpoint-100", "epoch": 1.6528925619834711, "eval_steps": 50, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.7246, "step": 1 }, { "epoch": 0.1, "learning_rate": 7.333333333333332e-05, "loss": 0.5982, "step": 50 }, { "epoch": 0.1, "eval_accuracy": 0.75, "eval_loss": 0.6296697854995728, "eval_runtime": 30.5612, "eval_samples_per_second": 38.742, "eval_steps_per_second": 19.371, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00015, "loss": 0.5505, "step": 100 }, { "epoch": 0.21, "eval_accuracy": 0.75, "eval_loss": 0.5696164965629578, "eval_runtime": 30.5816, "eval_samples_per_second": 38.716, "eval_steps_per_second": 19.358, "step": 100 }, { "epoch": 0.31, "learning_rate": 0.0002333333333333333, "loss": 0.5838, "step": 150 }, { "epoch": 0.31, "eval_accuracy": 0.75, "eval_loss": 0.562861442565918, "eval_runtime": 31.0771, "eval_samples_per_second": 38.099, "eval_steps_per_second": 19.049, "step": 150 }, { "epoch": 0.41, "learning_rate": 0.00025333333333333333, "loss": 0.5925, "step": 200 }, { "epoch": 0.41, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6023, "eval_samples_per_second": 38.69, "eval_steps_per_second": 19.345, "step": 200 }, { "epoch": 0.52, "learning_rate": 0.00025333333333333333, "loss": 0.7003, "step": 250 }, { "epoch": 0.52, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.7477, "eval_samples_per_second": 38.507, "eval_steps_per_second": 19.253, "step": 250 }, { "epoch": 0.62, "learning_rate": 0.00025333333333333333, "loss": 0.606, "step": 300 }, { "epoch": 0.62, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6306, "eval_samples_per_second": 38.654, "eval_steps_per_second": 19.327, "step": 300 }, { "epoch": 0.72, "learning_rate": 0.0002985365853658536, "loss": 0.6744, "step": 350 }, { "epoch": 0.72, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.5975, "eval_samples_per_second": 38.696, "eval_steps_per_second": 19.348, "step": 350 }, { "epoch": 0.83, "learning_rate": 0.0002802439024390244, "loss": 0.6448, "step": 400 }, { "epoch": 0.83, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6461, "eval_samples_per_second": 38.635, "eval_steps_per_second": 19.317, "step": 400 }, { "epoch": 0.93, "learning_rate": 0.0002619512195121951, "loss": 0.7365, "step": 450 }, { "epoch": 0.93, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.713, "eval_samples_per_second": 38.55, "eval_steps_per_second": 19.275, "step": 450 }, { "epoch": 1.03, "learning_rate": 0.00024365853658536585, "loss": 0.6083, "step": 500 }, { "epoch": 1.03, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6783, "eval_samples_per_second": 38.594, "eval_steps_per_second": 19.297, "step": 500 }, { "epoch": 1.14, "learning_rate": 0.00022536585365853657, "loss": 0.6217, "step": 550 }, { "epoch": 1.14, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.5897, "eval_samples_per_second": 38.706, "eval_steps_per_second": 19.353, "step": 550 }, { "epoch": 1.24, "learning_rate": 0.00020707317073170728, "loss": 0.642, "step": 600 }, { "epoch": 1.24, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6541, "eval_samples_per_second": 38.624, "eval_steps_per_second": 19.312, "step": 600 }, { "epoch": 1.34, "learning_rate": 0.00018878048780487803, "loss": 0.6433, "step": 650 }, { "epoch": 1.34, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.5944, "eval_samples_per_second": 38.7, "eval_steps_per_second": 19.35, "step": 650 }, { "epoch": 1.45, "learning_rate": 0.00017048780487804874, "loss": 0.7497, "step": 700 }, { "epoch": 1.45, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 31.0188, "eval_samples_per_second": 38.17, "eval_steps_per_second": 19.085, "step": 700 }, { "epoch": 1.55, "learning_rate": 0.0001521951219512195, "loss": 0.6385, "step": 750 }, { "epoch": 1.55, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6101, "eval_samples_per_second": 38.68, "eval_steps_per_second": 19.34, "step": 750 }, { "epoch": 1.65, "learning_rate": 0.00013390243902439023, "loss": 0.6581, "step": 800 }, { "epoch": 1.65, "eval_accuracy": 0.75, "eval_loss": 0.5931239724159241, "eval_runtime": 30.6994, "eval_samples_per_second": 38.568, "eval_steps_per_second": 19.284, "step": 800 } ], "logging_steps": 50, "max_steps": 1000, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2789684993326080.0, "trial_name": null, "trial_params": null }