|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.897486925125122, |
|
"logits/rejected": -2.9312877655029297, |
|
"logps/chosen": -227.6373291015625, |
|
"logps/pi_response": -109.72225952148438, |
|
"logps/ref_response": -109.72225952148438, |
|
"logps/rejected": -190.0130615234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.990486745229364e-07, |
|
"logits/chosen": -2.757150411605835, |
|
"logits/rejected": -2.7352075576782227, |
|
"logps/chosen": -220.00762939453125, |
|
"logps/pi_response": -118.12198638916016, |
|
"logps/ref_response": -117.70235443115234, |
|
"logps/rejected": -177.66741943359375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0035162584390491247, |
|
"rewards/margins": 0.005680772475898266, |
|
"rewards/rejected": -0.0021645138040184975, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"logits/chosen": -2.7867398262023926, |
|
"logits/rejected": -2.737657308578491, |
|
"logps/chosen": -214.31796264648438, |
|
"logps/pi_response": -134.75608825683594, |
|
"logps/ref_response": -125.65106201171875, |
|
"logps/rejected": -227.33175659179688, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.046200741082429886, |
|
"rewards/margins": 0.019284024834632874, |
|
"rewards/rejected": -0.06548477709293365, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.933941090877615e-07, |
|
"logits/chosen": -2.6279125213623047, |
|
"logits/rejected": -2.613940715789795, |
|
"logps/chosen": -222.9117431640625, |
|
"logps/pi_response": -128.2218780517578, |
|
"logps/ref_response": -110.140380859375, |
|
"logps/rejected": -215.9916229248047, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.08697148412466049, |
|
"rewards/margins": 0.11129184067249298, |
|
"rewards/rejected": -0.19826331734657288, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": -2.6441240310668945, |
|
"logits/rejected": -2.65305757522583, |
|
"logps/chosen": -238.2937469482422, |
|
"logps/pi_response": -141.17239379882812, |
|
"logps/ref_response": -110.28767395019531, |
|
"logps/rejected": -214.75643920898438, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17858782410621643, |
|
"rewards/margins": 0.15444216132164001, |
|
"rewards/rejected": -0.33302998542785645, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8529523872436977e-07, |
|
"logits/chosen": -2.576162099838257, |
|
"logits/rejected": -2.580824375152588, |
|
"logps/chosen": -219.0809783935547, |
|
"logps/pi_response": -139.94015502929688, |
|
"logps/ref_response": -102.89493560791016, |
|
"logps/rejected": -216.08120727539062, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.18389426171779633, |
|
"rewards/margins": 0.17146775126457214, |
|
"rewards/rejected": -0.3553619980812073, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.930309757836516e-08, |
|
"logits/chosen": -2.601134777069092, |
|
"logits/rejected": -2.5447657108306885, |
|
"logps/chosen": -250.886962890625, |
|
"logps/pi_response": -163.75860595703125, |
|
"logps/ref_response": -112.12379455566406, |
|
"logps/rejected": -244.28335571289062, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3884199261665344, |
|
"rewards/margins": 0.17365404963493347, |
|
"rewards/rejected": -0.5620739459991455, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3423053240837514e-08, |
|
"logits/chosen": -2.5715625286102295, |
|
"logits/rejected": -2.552263021469116, |
|
"logps/chosen": -254.3754425048828, |
|
"logps/pi_response": -162.65504455566406, |
|
"logps/ref_response": -97.93321228027344, |
|
"logps/rejected": -240.8832550048828, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.44511038064956665, |
|
"rewards/margins": 0.2850010395050049, |
|
"rewards/rejected": -0.7301114201545715, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.6229751110076904, |
|
"logits/rejected": -2.600135564804077, |
|
"logps/chosen": -251.10275268554688, |
|
"logps/pi_response": -162.8291778564453, |
|
"logps/ref_response": -112.6815185546875, |
|
"logps/rejected": -245.9313507080078, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.35662582516670227, |
|
"rewards/margins": 0.25557661056518555, |
|
"rewards/rejected": -0.6122024655342102, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 80, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6470192432403564, |
|
"train_runtime": 2118.6618, |
|
"train_samples_per_second": 4.809, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 80, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|