{ "best_metric": 0.4772830307483673, "best_model_checkpoint": "Phi-3.5-mini-instruct_text_to_sql\\checkpoint-500", "epoch": 0.7451564828614009, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07451564828614009, "grad_norm": 0.05202275142073631, "learning_rate": 0.0002, "loss": 0.8765, "step": 50 }, { "epoch": 0.07451564828614009, "eval_loss": 0.5696993470191956, "eval_runtime": 27.4333, "eval_samples_per_second": 4.593, "eval_steps_per_second": 0.583, "step": 50 }, { "epoch": 0.14903129657228018, "grad_norm": 0.038158852607011795, "learning_rate": 0.0001996800092633612, "loss": 0.5364, "step": 100 }, { "epoch": 0.14903129657228018, "eval_loss": 0.5185158848762512, "eval_runtime": 26.2689, "eval_samples_per_second": 4.797, "eval_steps_per_second": 0.609, "step": 100 }, { "epoch": 0.22354694485842028, "grad_norm": 0.12953701615333557, "learning_rate": 0.00019872208493487546, "loss": 0.5099, "step": 150 }, { "epoch": 0.22354694485842028, "eval_loss": 0.5075405240058899, "eval_runtime": 26.9551, "eval_samples_per_second": 4.674, "eval_steps_per_second": 0.594, "step": 150 }, { "epoch": 0.29806259314456035, "grad_norm": 0.036496683955192566, "learning_rate": 0.0001971323575527731, "loss": 0.5012, "step": 200 }, { "epoch": 0.29806259314456035, "eval_loss": 0.49924516677856445, "eval_runtime": 26.2588, "eval_samples_per_second": 4.798, "eval_steps_per_second": 0.609, "step": 200 }, { "epoch": 0.37257824143070045, "grad_norm": 0.047366924583911896, "learning_rate": 0.0001949210010777752, "loss": 0.4915, "step": 250 }, { "epoch": 0.37257824143070045, "eval_loss": 0.49416425824165344, "eval_runtime": 26.2568, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.609, "step": 250 }, { "epoch": 0.44709388971684055, "grad_norm": 0.035322971642017365, "learning_rate": 0.00019210216778162994, "loss": 0.4908, "step": 300 }, { "epoch": 0.44709388971684055, "eval_loss": 0.48849040269851685, "eval_runtime": 26.2674, "eval_samples_per_second": 4.797, "eval_steps_per_second": 0.609, "step": 300 }, { "epoch": 0.5216095380029806, "grad_norm": 0.04145614430308342, "learning_rate": 0.0001886938976751951, "loss": 0.486, "step": 350 }, { "epoch": 0.5216095380029806, "eval_loss": 0.48535990715026855, "eval_runtime": 26.2577, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.609, "step": 350 }, { "epoch": 0.5961251862891207, "grad_norm": 0.04975809529423714, "learning_rate": 0.00018471800305571129, "loss": 0.4867, "step": 400 }, { "epoch": 0.5961251862891207, "eval_loss": 0.4834245443344116, "eval_runtime": 26.1969, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.611, "step": 400 }, { "epoch": 0.6706408345752608, "grad_norm": 0.05116498842835426, "learning_rate": 0.00018019992891214008, "loss": 0.4792, "step": 450 }, { "epoch": 0.6706408345752608, "eval_loss": 0.4825398325920105, "eval_runtime": 26.1757, "eval_samples_per_second": 4.814, "eval_steps_per_second": 0.611, "step": 450 }, { "epoch": 0.7451564828614009, "grad_norm": 0.039921361953020096, "learning_rate": 0.00017516859008194938, "loss": 0.4781, "step": 500 }, { "epoch": 0.7451564828614009, "eval_loss": 0.4772830307483673, "eval_runtime": 26.1948, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.611, "step": 500 } ], "logging_steps": 50, "max_steps": 2013, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.90386203000832e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }