|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03240440699935191, |
|
"eval_steps": 500, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012961762799740765, |
|
"grad_norm": 6.410052299499512, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6378, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002592352559948153, |
|
"grad_norm": 5.500060081481934, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4793, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0038885288399222295, |
|
"grad_norm": 4.095714569091797, |
|
"learning_rate": 0.00019978589232386035, |
|
"loss": 0.988, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005184705119896306, |
|
"grad_norm": 2.0647408962249756, |
|
"learning_rate": 0.00019914448613738106, |
|
"loss": 0.4445, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0064808813998703824, |
|
"grad_norm": 1.4352781772613525, |
|
"learning_rate": 0.00019807852804032305, |
|
"loss": 0.2778, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007777057679844459, |
|
"grad_norm": 2.030522108078003, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 0.1893, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009073233959818535, |
|
"grad_norm": 1.0315437316894531, |
|
"learning_rate": 0.0001946930129495106, |
|
"loss": 0.0831, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.010369410239792612, |
|
"grad_norm": 1.5501576662063599, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 0.0533, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011665586519766688, |
|
"grad_norm": 0.883230984210968, |
|
"learning_rate": 0.00018968727415326884, |
|
"loss": 0.0376, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.012961762799740765, |
|
"grad_norm": 1.0092142820358276, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.0378, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014257939079714841, |
|
"grad_norm": 1.1502794027328491, |
|
"learning_rate": 0.00018314696123025454, |
|
"loss": 0.0822, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.015554115359688918, |
|
"grad_norm": 0.4328291714191437, |
|
"learning_rate": 0.00017933533402912354, |
|
"loss": 0.0046, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.016850291639662993, |
|
"grad_norm": 0.6591416001319885, |
|
"learning_rate": 0.00017518398074789775, |
|
"loss": 0.0069, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01814646791963707, |
|
"grad_norm": 0.8374271392822266, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.0054, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.019442644199611146, |
|
"grad_norm": 0.335828959941864, |
|
"learning_rate": 0.00016593458151000688, |
|
"loss": 0.0071, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.020738820479585224, |
|
"grad_norm": 0.4225008189678192, |
|
"learning_rate": 0.00016087614290087208, |
|
"loss": 0.0046, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0220349967595593, |
|
"grad_norm": 0.8661593198776245, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 0.0469, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.023331173039533377, |
|
"grad_norm": 0.4766126871109009, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.0093, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02462734931950745, |
|
"grad_norm": 0.6881564855575562, |
|
"learning_rate": 0.00014422886902190014, |
|
"loss": 0.0135, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02592352559948153, |
|
"grad_norm": 0.6868959665298462, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 0.0049, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.027219701879455604, |
|
"grad_norm": 0.38503697514533997, |
|
"learning_rate": 0.00013214394653031616, |
|
"loss": 0.0097, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.028515878159429683, |
|
"grad_norm": 1.0754024982452393, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 0.0203, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.029812054439403757, |
|
"grad_norm": 0.6338533163070679, |
|
"learning_rate": 0.00011950903220161285, |
|
"loss": 0.0152, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.031108230719377836, |
|
"grad_norm": 0.6074821352958679, |
|
"learning_rate": 0.00011305261922200519, |
|
"loss": 0.0099, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03240440699935191, |
|
"grad_norm": 0.687092125415802, |
|
"learning_rate": 0.00010654031292301432, |
|
"loss": 0.016, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.83447180771328e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|