|
{ |
|
"best_metric": 0.09643030911684036, |
|
"best_model_checkpoint": "./StableLM-WI-DPO/checkpoint-45", |
|
"epoch": 0.2, |
|
"eval_steps": 45, |
|
"global_step": 45, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022222222222222223, |
|
"grad_norm": 4.194765090942383, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"logits/chosen": 1.6977390050888062, |
|
"logits/rejected": 1.6941993236541748, |
|
"logps/chosen": -87.9476318359375, |
|
"logps/rejected": -238.43380737304688, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0017920683603733778, |
|
"rewards/margins": 0.06744769960641861, |
|
"rewards/rejected": -0.06565563380718231, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 1.8380119800567627, |
|
"learning_rate": 2.173913043478261e-05, |
|
"logits/chosen": 1.9384359121322632, |
|
"logits/rejected": 1.8992607593536377, |
|
"logps/chosen": -104.84549713134766, |
|
"logps/rejected": -238.51876831054688, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0692732185125351, |
|
"rewards/margins": 0.14073553681373596, |
|
"rewards/rejected": -0.21000878512859344, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 2.176408290863037, |
|
"learning_rate": 3.260869565217392e-05, |
|
"logits/chosen": 1.7943300008773804, |
|
"logits/rejected": 1.785248041152954, |
|
"logps/chosen": -135.22303771972656, |
|
"logps/rejected": -216.9629364013672, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08426995575428009, |
|
"rewards/margins": 0.3454127907752991, |
|
"rewards/rejected": -0.42968273162841797, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 1.9848376512527466, |
|
"learning_rate": 4.347826086956522e-05, |
|
"logits/chosen": 1.613822340965271, |
|
"logits/rejected": 1.584276795387268, |
|
"logps/chosen": -82.66014099121094, |
|
"logps/rejected": -186.2796173095703, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11625717580318451, |
|
"rewards/margins": 0.613094687461853, |
|
"rewards/rejected": -0.7293518781661987, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 1.4978930950164795, |
|
"learning_rate": 4.9997293511641216e-05, |
|
"logits/chosen": 1.6724803447723389, |
|
"logits/rejected": 1.6774917840957642, |
|
"logps/chosen": -121.98885345458984, |
|
"logps/rejected": -262.0728759765625, |
|
"loss": 0.217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4427986741065979, |
|
"rewards/margins": 1.5746344327926636, |
|
"rewards/rejected": -2.0174331665039062, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.20637647807598114, |
|
"learning_rate": 4.9966852247120764e-05, |
|
"logits/chosen": 1.5546514987945557, |
|
"logits/rejected": 1.556323528289795, |
|
"logps/chosen": -102.07676696777344, |
|
"logps/rejected": -206.6514434814453, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9394552111625671, |
|
"rewards/margins": 2.5691986083984375, |
|
"rewards/rejected": -3.5086536407470703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15555555555555556, |
|
"grad_norm": 7.478163719177246, |
|
"learning_rate": 4.9902627935540205e-05, |
|
"logits/chosen": 1.541826844215393, |
|
"logits/rejected": 1.5505328178405762, |
|
"logps/chosen": -151.88739013671875, |
|
"logps/rejected": -329.95953369140625, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0381178855895996, |
|
"rewards/margins": 4.932946681976318, |
|
"rewards/rejected": -7.971064567565918, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 7.0293235694407485e-06, |
|
"learning_rate": 4.980470747984265e-05, |
|
"logits/chosen": 1.455780029296875, |
|
"logits/rejected": 1.4680273532867432, |
|
"logps/chosen": -117.02046203613281, |
|
"logps/rejected": -331.1054992675781, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2030303478240967, |
|
"rewards/margins": 7.786482334136963, |
|
"rewards/rejected": -9.989511489868164, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.7971689105033875, |
|
"learning_rate": 4.9673223377762715e-05, |
|
"logits/chosen": 1.3849093914031982, |
|
"logits/rejected": 1.408332109451294, |
|
"logps/chosen": -101.07164001464844, |
|
"logps/rejected": -294.77874755859375, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.74613356590271, |
|
"rewards/margins": 8.281633377075195, |
|
"rewards/rejected": -10.027766227722168, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": 1.615716576576233, |
|
"eval_logits/rejected": 1.645801305770874, |
|
"eval_logps/chosen": -124.92742156982422, |
|
"eval_logps/rejected": -353.66717529296875, |
|
"eval_loss": 0.09643030911684036, |
|
"eval_rewards/accuracies": 0.9642857313156128, |
|
"eval_rewards/chosen": -2.8166019916534424, |
|
"eval_rewards/margins": 10.488396644592285, |
|
"eval_rewards/rejected": -13.305000305175781, |
|
"eval_runtime": 26.191, |
|
"eval_samples_per_second": 1.909, |
|
"eval_steps_per_second": 0.267, |
|
"step": 45 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 45, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|