cira-7b-dpo-lora-merge / all_results.json
David-Xu's picture
Training in progress, step 900
89bd182 verified
raw
history blame contribute delete
741 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.030522108078003,
"eval_logits/rejected": -1.1812418699264526,
"eval_logps/chosen": -795.1126098632812,
"eval_logps/rejected": -652.2422485351562,
"eval_loss": 0.6183284521102905,
"eval_rewards/accuracies": 0.6783919334411621,
"eval_rewards/chosen": 0.5534913539886475,
"eval_rewards/margins": 0.214975506067276,
"eval_rewards/rejected": 0.33851587772369385,
"eval_runtime": 181.6928,
"eval_samples": 398,
"eval_samples_per_second": 2.191,
"eval_steps_per_second": 2.191,
"train_loss": 0.06080360662445443,
"train_runtime": 395.6281,
"train_samples": 3588,
"train_samples_per_second": 9.069,
"train_steps_per_second": 2.267
}