Training in progress, step 400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cf04fb89d8ab3e3f8dbe70117278b7eb9197385d8313cabcc8300268cf28a2f
|
3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42546196
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a421a299ad6a3eae913817fb92227d46b3871f84045ac02a9c0ccd6f0ee6ce58
|
3 |
size 42546196
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d38fd17de7c489936706f3143d8f4e16cd16a35673d1c10a9410665266a51b56
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d523f99908891a364ebfcdaaec71fd2a09caeab25c8c6a44dbc4b9707d82c8ed
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2257,13 +2257,163 @@
|
|
2257 |
"learning_rate": 5.161290322580645e-05,
|
2258 |
"loss": 1.6664,
|
2259 |
"step": 375
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2260 |
}
|
2261 |
],
|
2262 |
"logging_steps": 1,
|
2263 |
"max_steps": 501,
|
2264 |
"num_train_epochs": 1,
|
2265 |
"save_steps": 25,
|
2266 |
-
"total_flos": 2.
|
2267 |
"trial_name": null,
|
2268 |
"trial_params": null
|
2269 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.07562151432082427,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2257 |
"learning_rate": 5.161290322580645e-05,
|
2258 |
"loss": 1.6664,
|
2259 |
"step": 375
|
2260 |
+
},
|
2261 |
+
{
|
2262 |
+
"epoch": 0.07,
|
2263 |
+
"learning_rate": 5.120967741935484e-05,
|
2264 |
+
"loss": 1.6375,
|
2265 |
+
"step": 376
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 0.07,
|
2269 |
+
"learning_rate": 5.080645161290323e-05,
|
2270 |
+
"loss": 1.5933,
|
2271 |
+
"step": 377
|
2272 |
+
},
|
2273 |
+
{
|
2274 |
+
"epoch": 0.07,
|
2275 |
+
"learning_rate": 5.040322580645161e-05,
|
2276 |
+
"loss": 1.6126,
|
2277 |
+
"step": 378
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 0.07,
|
2281 |
+
"learning_rate": 5e-05,
|
2282 |
+
"loss": 1.645,
|
2283 |
+
"step": 379
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 0.07,
|
2287 |
+
"learning_rate": 4.959677419354839e-05,
|
2288 |
+
"loss": 1.5649,
|
2289 |
+
"step": 380
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.07,
|
2293 |
+
"learning_rate": 4.9193548387096775e-05,
|
2294 |
+
"loss": 1.5601,
|
2295 |
+
"step": 381
|
2296 |
+
},
|
2297 |
+
{
|
2298 |
+
"epoch": 0.07,
|
2299 |
+
"learning_rate": 4.8790322580645164e-05,
|
2300 |
+
"loss": 1.6028,
|
2301 |
+
"step": 382
|
2302 |
+
},
|
2303 |
+
{
|
2304 |
+
"epoch": 0.07,
|
2305 |
+
"learning_rate": 4.8387096774193554e-05,
|
2306 |
+
"loss": 1.675,
|
2307 |
+
"step": 383
|
2308 |
+
},
|
2309 |
+
{
|
2310 |
+
"epoch": 0.07,
|
2311 |
+
"learning_rate": 4.7983870967741937e-05,
|
2312 |
+
"loss": 1.5058,
|
2313 |
+
"step": 384
|
2314 |
+
},
|
2315 |
+
{
|
2316 |
+
"epoch": 0.07,
|
2317 |
+
"learning_rate": 4.7580645161290326e-05,
|
2318 |
+
"loss": 1.7313,
|
2319 |
+
"step": 385
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 0.07,
|
2323 |
+
"learning_rate": 4.7177419354838716e-05,
|
2324 |
+
"loss": 1.609,
|
2325 |
+
"step": 386
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"epoch": 0.07,
|
2329 |
+
"learning_rate": 4.67741935483871e-05,
|
2330 |
+
"loss": 1.5993,
|
2331 |
+
"step": 387
|
2332 |
+
},
|
2333 |
+
{
|
2334 |
+
"epoch": 0.07,
|
2335 |
+
"learning_rate": 4.637096774193548e-05,
|
2336 |
+
"loss": 1.7103,
|
2337 |
+
"step": 388
|
2338 |
+
},
|
2339 |
+
{
|
2340 |
+
"epoch": 0.07,
|
2341 |
+
"learning_rate": 4.596774193548387e-05,
|
2342 |
+
"loss": 1.6556,
|
2343 |
+
"step": 389
|
2344 |
+
},
|
2345 |
+
{
|
2346 |
+
"epoch": 0.07,
|
2347 |
+
"learning_rate": 4.556451612903226e-05,
|
2348 |
+
"loss": 1.6981,
|
2349 |
+
"step": 390
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 0.07,
|
2353 |
+
"learning_rate": 4.516129032258064e-05,
|
2354 |
+
"loss": 1.6413,
|
2355 |
+
"step": 391
|
2356 |
+
},
|
2357 |
+
{
|
2358 |
+
"epoch": 0.07,
|
2359 |
+
"learning_rate": 4.475806451612903e-05,
|
2360 |
+
"loss": 1.5337,
|
2361 |
+
"step": 392
|
2362 |
+
},
|
2363 |
+
{
|
2364 |
+
"epoch": 0.07,
|
2365 |
+
"learning_rate": 4.435483870967742e-05,
|
2366 |
+
"loss": 1.7622,
|
2367 |
+
"step": 393
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 0.07,
|
2371 |
+
"learning_rate": 4.395161290322581e-05,
|
2372 |
+
"loss": 1.6373,
|
2373 |
+
"step": 394
|
2374 |
+
},
|
2375 |
+
{
|
2376 |
+
"epoch": 0.07,
|
2377 |
+
"learning_rate": 4.3548387096774194e-05,
|
2378 |
+
"loss": 1.7476,
|
2379 |
+
"step": 395
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 0.07,
|
2383 |
+
"learning_rate": 4.3145161290322584e-05,
|
2384 |
+
"loss": 1.7093,
|
2385 |
+
"step": 396
|
2386 |
+
},
|
2387 |
+
{
|
2388 |
+
"epoch": 0.08,
|
2389 |
+
"learning_rate": 4.2741935483870973e-05,
|
2390 |
+
"loss": 1.225,
|
2391 |
+
"step": 397
|
2392 |
+
},
|
2393 |
+
{
|
2394 |
+
"epoch": 0.08,
|
2395 |
+
"learning_rate": 4.2338709677419356e-05,
|
2396 |
+
"loss": 1.8272,
|
2397 |
+
"step": 398
|
2398 |
+
},
|
2399 |
+
{
|
2400 |
+
"epoch": 0.08,
|
2401 |
+
"learning_rate": 4.1935483870967746e-05,
|
2402 |
+
"loss": 1.6983,
|
2403 |
+
"step": 399
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 0.08,
|
2407 |
+
"learning_rate": 4.1532258064516135e-05,
|
2408 |
+
"loss": 1.5777,
|
2409 |
+
"step": 400
|
2410 |
}
|
2411 |
],
|
2412 |
"logging_steps": 1,
|
2413 |
"max_steps": 501,
|
2414 |
"num_train_epochs": 1,
|
2415 |
"save_steps": 25,
|
2416 |
+
"total_flos": 2.4726679032889344e+16,
|
2417 |
"trial_name": null,
|
2418 |
"trial_params": null
|
2419 |
}
|