dpo_qlora_hh / eval_trajectory.jsonl
abaheti95's picture
Upload 8 files
e25af87
raw
history blame contribute delete
No virus
542 Bytes
{"step": 0, "avg_reward": 0.4714483038156426}
{"step": 7200, "avg_reward": 0.5663666518924791}
{"step": 14400, "avg_reward": 0.569705726823096}
{"step": 21600, "avg_reward": 0.5740199956743579}
{"step": 28800, "avg_reward": 0.573887342033309}
{"step": 36000, "avg_reward": 0.6045272605834595}
{"step": 43200, "avg_reward": 0.5405997881724034}
{"step": 50400, "avg_reward": 0.502069472695335}
{"step": 57600, "avg_reward": 0.6652014100092596}
{"step": 64800, "avg_reward": 0.5776967554685792}
{"step": 72000, "avg_reward": 0.6279109552519263}