|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 25, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/generated": -2.788468599319458, |
|
"logits/real": -2.8911099433898926, |
|
"logps/generated": -226.66921997070312, |
|
"logps/real": -283.6243896484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.5652173913043473e-07, |
|
"logits/generated": -2.8515138626098633, |
|
"logits/real": -2.8768396377563477, |
|
"logps/generated": -354.09619140625, |
|
"logps/real": -350.52911376953125, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7569444179534912, |
|
"rewards/generated": -0.21337264776229858, |
|
"rewards/margins": 0.5563015341758728, |
|
"rewards/real": 0.3429288864135742, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.478260869565217e-07, |
|
"logits/generated": -2.792628049850464, |
|
"logits/real": -2.7778868675231934, |
|
"logps/generated": -351.04638671875, |
|
"logps/real": -327.13482666015625, |
|
"loss": 0.3011, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/generated": -0.7889599800109863, |
|
"rewards/margins": 1.546514868736267, |
|
"rewards/real": 0.7575550675392151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/generated": -2.764375686645508, |
|
"eval_logits/real": -2.7640507221221924, |
|
"eval_logps/generated": -310.69891357421875, |
|
"eval_logps/real": -306.61572265625, |
|
"eval_loss": 0.24416939914226532, |
|
"eval_rewards/accuracies": 0.9791666865348816, |
|
"eval_rewards/generated": -0.9850902557373047, |
|
"eval_rewards/margins": 2.14570689201355, |
|
"eval_rewards/real": 1.1606166362762451, |
|
"eval_runtime": 27.6861, |
|
"eval_samples_per_second": 6.646, |
|
"eval_steps_per_second": 0.217, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.391304347826087e-07, |
|
"logits/generated": -2.7530007362365723, |
|
"logits/real": -2.734692096710205, |
|
"logps/generated": -310.22607421875, |
|
"logps/real": -306.02044677734375, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/generated": -1.791497826576233, |
|
"rewards/margins": 3.7750840187072754, |
|
"rewards/real": 1.9835857152938843, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.3043478260869563e-07, |
|
"logits/generated": -2.7655322551727295, |
|
"logits/real": -2.776773691177368, |
|
"logps/generated": -358.19403076171875, |
|
"logps/real": -309.92767333984375, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -3.1963627338409424, |
|
"rewards/margins": 6.872523307800293, |
|
"rewards/real": 3.6761608123779297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.1739130434782606e-08, |
|
"logits/generated": -2.7564454078674316, |
|
"logits/real": -2.7757363319396973, |
|
"logps/generated": -357.3354797363281, |
|
"logps/real": -296.8515930175781, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9351892471313477, |
|
"rewards/margins": 6.2575507164001465, |
|
"rewards/real": 3.322361707687378, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_logits/generated": -2.7557647228240967, |
|
"eval_logits/real": -2.7546520233154297, |
|
"eval_logps/generated": -309.8145446777344, |
|
"eval_logps/real": -304.967041015625, |
|
"eval_loss": 0.23592980206012726, |
|
"eval_rewards/accuracies": 0.9791666865348816, |
|
"eval_rewards/generated": -0.8966498374938965, |
|
"eval_rewards/margins": 2.2221336364746094, |
|
"eval_rewards/real": 1.3254839181900024, |
|
"eval_runtime": 27.8272, |
|
"eval_samples_per_second": 6.612, |
|
"eval_steps_per_second": 0.216, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 52, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2113667087486157, |
|
"train_runtime": 1162.1581, |
|
"train_samples_per_second": 2.836, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|