|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 274, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18248175182481752, |
|
"grad_norm": 0.11348239332437515, |
|
"learning_rate": 9.803954791481238e-07, |
|
"logits/chosen": 0.7525291442871094, |
|
"logits/rejected": 0.862596869468689, |
|
"logps/chosen": -57.87046432495117, |
|
"logps/rejected": -91.25581359863281, |
|
"loss": 0.042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.735088586807251, |
|
"rewards/margins": 5.059007167816162, |
|
"rewards/rejected": -1.3239188194274902, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36496350364963503, |
|
"grad_norm": 2.2142693996429443, |
|
"learning_rate": 8.03112705483319e-07, |
|
"logits/chosen": 0.7221750020980835, |
|
"logits/rejected": 0.8843123912811279, |
|
"logps/chosen": -54.99198913574219, |
|
"logps/rejected": -95.34280395507812, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.793600559234619, |
|
"rewards/margins": 5.281071186065674, |
|
"rewards/rejected": -1.4874707460403442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5474452554744526, |
|
"grad_norm": 1.848400354385376, |
|
"learning_rate": 5.063851773579869e-07, |
|
"logits/chosen": 0.7279171943664551, |
|
"logits/rejected": 0.8646959662437439, |
|
"logps/chosen": -55.23713684082031, |
|
"logps/rejected": -100.29174041748047, |
|
"loss": 0.031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7684295177459717, |
|
"rewards/margins": 5.4548797607421875, |
|
"rewards/rejected": -1.6864502429962158, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7299270072992701, |
|
"grad_norm": 0.9244675040245056, |
|
"learning_rate": 2.071415028359026e-07, |
|
"logits/chosen": 0.7067604064941406, |
|
"logits/rejected": 0.8388283252716064, |
|
"logps/chosen": -50.11094284057617, |
|
"logps/rejected": -98.2806625366211, |
|
"loss": 0.0274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.066249847412109, |
|
"rewards/margins": 5.748508930206299, |
|
"rewards/rejected": -1.6822593212127686, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9124087591240876, |
|
"grad_norm": 0.7083641886711121, |
|
"learning_rate": 2.3301803972534728e-08, |
|
"logits/chosen": 0.7070822715759277, |
|
"logits/rejected": 0.857603132724762, |
|
"logps/chosen": -51.985496520996094, |
|
"logps/rejected": -93.11666870117188, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9429595470428467, |
|
"rewards/margins": 5.623107433319092, |
|
"rewards/rejected": -1.6801483631134033, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 274, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|