|
{ |
|
"epoch": 1.0, |
|
"eval_count/fg_chosen": 30.183246612548828, |
|
"eval_count/fg_rejected": 6.92391300201416, |
|
"eval_fg_kl": NaN, |
|
"eval_fg_logps/policy_KL": -14.863760948181152, |
|
"eval_fg_logps/policy_chosen": -6.796307563781738, |
|
"eval_fg_logps/policy_rejected": -8.7196044921875, |
|
"eval_fg_logps/reference_KL": -11.47359848022461, |
|
"eval_fg_logps/reference_chosen": -6.041894912719727, |
|
"eval_fg_logps/reference_rejected": -7.58065938949585, |
|
"eval_fg_loss": 0.7616299986839294, |
|
"eval_fg_rewards/chosen_sum": -1.7056890726089478, |
|
"eval_fg_rewards/rejected_sum": -0.9767768979072571, |
|
"eval_kl": 0.01342250220477581, |
|
"eval_logps/chosen": -337.8388459335624, |
|
"eval_logps/rejected": -409.12525347624563, |
|
"eval_loss": 0.4091770052909851, |
|
"eval_rewards/chosen": 1.55619121608319, |
|
"eval_rewards/margins": 4.119737088494111, |
|
"eval_rewards/rejected": -2.5635458724109212, |
|
"eval_runtime": 463.0413, |
|
"eval_samples": 1733, |
|
"eval_samples_per_second": 3.743, |
|
"eval_steps_per_second": 0.937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.45996271347388246, |
|
"train_runtime": 8430.3956, |
|
"train_samples": 15597, |
|
"train_samples_per_second": 1.85, |
|
"train_steps_per_second": 0.116 |
|
} |