{ "epoch": 2.9984268484530676, "eval_log_odds_chosen": 1.220078706741333, "eval_log_odds_ratio": -1.047989010810852, "eval_logits/chosen": -2.849764585494995, "eval_logits/rejected": -2.8669533729553223, "eval_logps/chosen": -2.7812891006469727, "eval_logps/rejected": -3.8601787090301514, "eval_loss": 1.3770909309387207, "eval_nll_loss": 1.3531930446624756, "eval_rewards/accuracies": 0.6527777910232544, "eval_rewards/chosen": -0.13906444609165192, "eval_rewards/margins": 0.05394447594881058, "eval_rewards/rejected": -0.1930089294910431, "eval_runtime": 135.9849, "eval_samples": 1994, "eval_samples_per_second": 14.663, "eval_steps_per_second": 0.463, "total_flos": 0.0, "train_loss": 0.32389816019492534, "train_runtime": 62235.4926, "train_samples": 61005, "train_samples_per_second": 2.941, "train_steps_per_second": 0.046 }