{ "best_metric": 0.7575757575757576, "best_model_checkpoint": "dit-base-rvlcdip-finetuned-grp-actual/checkpoint-93", "epoch": 6.72, "eval_steps": 500, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 3.846153846153846e-05, "loss": 2.3577, "step": 10 }, { "epoch": 0.96, "eval_accuracy": 0.5113636363636364, "eval_loss": 2.086306571960449, "eval_runtime": 240.3077, "eval_samples_per_second": 1.099, "eval_steps_per_second": 0.037, "step": 18 }, { "epoch": 1.07, "learning_rate": 4.690265486725664e-05, "loss": 2.2163, "step": 20 }, { "epoch": 1.6, "learning_rate": 4.247787610619469e-05, "loss": 2.0601, "step": 30 }, { "epoch": 1.97, "eval_accuracy": 0.6477272727272727, "eval_loss": 1.8153679370880127, "eval_runtime": 14.6331, "eval_samples_per_second": 18.041, "eval_steps_per_second": 0.615, "step": 37 }, { "epoch": 2.13, "learning_rate": 3.8053097345132744e-05, "loss": 1.943, "step": 40 }, { "epoch": 2.67, "learning_rate": 3.3628318584070804e-05, "loss": 1.8068, "step": 50 }, { "epoch": 2.99, "eval_accuracy": 0.6704545454545454, "eval_loss": 1.5881296396255493, "eval_runtime": 14.8318, "eval_samples_per_second": 17.8, "eval_steps_per_second": 0.607, "step": 56 }, { "epoch": 3.2, "learning_rate": 2.9203539823008852e-05, "loss": 1.66, "step": 60 }, { "epoch": 3.73, "learning_rate": 2.4778761061946905e-05, "loss": 1.5953, "step": 70 }, { "epoch": 4.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 1.4111517667770386, "eval_runtime": 14.6707, "eval_samples_per_second": 17.995, "eval_steps_per_second": 0.613, "step": 75 }, { "epoch": 4.27, "learning_rate": 2.0353982300884957e-05, "loss": 1.4929, "step": 80 }, { "epoch": 4.8, "learning_rate": 1.592920353982301e-05, "loss": 1.4304, "step": 90 }, { "epoch": 4.96, "eval_accuracy": 0.7575757575757576, "eval_loss": 1.3033273220062256, "eval_runtime": 14.9724, "eval_samples_per_second": 17.632, "eval_steps_per_second": 0.601, "step": 93 }, { "epoch": 5.33, "learning_rate": 1.1504424778761062e-05, "loss": 1.3606, "step": 100 }, { "epoch": 5.87, "learning_rate": 7.079646017699115e-06, "loss": 1.3458, "step": 110 }, { "epoch": 5.97, "eval_accuracy": 0.75, "eval_loss": 1.2400753498077393, "eval_runtime": 14.8483, "eval_samples_per_second": 17.78, "eval_steps_per_second": 0.606, "step": 112 }, { "epoch": 6.4, "learning_rate": 2.6548672566371683e-06, "loss": 1.3523, "step": 120 }, { "epoch": 6.72, "eval_accuracy": 0.7575757575757576, "eval_loss": 1.2240339517593384, "eval_runtime": 14.9425, "eval_samples_per_second": 17.668, "eval_steps_per_second": 0.602, "step": 126 }, { "epoch": 6.72, "step": 126, "total_flos": 1.2388649195611423e+18, "train_loss": 1.7003454405163962, "train_runtime": 3667.1764, "train_samples_per_second": 4.535, "train_steps_per_second": 0.034 } ], "logging_steps": 10, "max_steps": 126, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1.2388649195611423e+18, "trial_name": null, "trial_params": null }