Nadav commited on
Commit
780dbc7
1 Parent(s): 7ca2b44

Training in progress, step 190000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb517a7d240e3e73a30347675f0925c48b4c67d49c744f7c61da80c7cbb5907
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf19b8ac1ef38105671f7e3ed466178582ff690b2e4244bda799fabd849a44a
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41f052da26766284555376f7008fbf54434a0f94e18219790a1add696aa730b3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34cdc450f36015ade18e6cf8d347ba98eed346e9fca052902560b578799df39f
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93a801e054b84a74c1aa696aa3eb20a01eba526d71ba963d6ff62baa4215cff4
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2a05b33f27bc07e845ac5ddf394e12d5aa9e01cff4a464ac84d19c70049e32
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c34d541836f44843b83614838d1ed970e46818ac9660cad2b368bbd3750bc9f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ac2f6b5dab3c2f241653e24158eb8e5933ba687a501a1fa916c1b82a746b90
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9a56b0f434ebff426094d923d9ec43ec4d1dfb4624425a04e541b139922a1e0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2639f53746eae2335e88ed02acce8977dcbe3ece7ab8cbccde5b25d715fd5406
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.15,
5
- "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2326,11 +2326,131 @@
2326
  "learning_rate": 1.2236361151893606e-05,
2327
  "loss": 0.3715,
2328
  "step": 180000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2329
  }
2330
  ],
2331
  "max_steps": 200000,
2332
  "num_train_epochs": 9223372036854775807,
2333
- "total_flos": 4.238875212565709e+21,
2334
  "trial_name": null,
2335
  "trial_params": null
2336
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2,
5
+ "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2326
  "learning_rate": 1.2236361151893606e-05,
2327
  "loss": 0.3715,
2328
  "step": 180000
2329
+ },
2330
+ {
2331
+ "epoch": 0.15,
2332
+ "learning_rate": 1.2127172828691041e-05,
2333
+ "loss": 0.373,
2334
+ "step": 180500
2335
+ },
2336
+ {
2337
+ "epoch": 0.15,
2338
+ "learning_rate": 1.2020863570515961e-05,
2339
+ "loss": 0.3743,
2340
+ "step": 181000
2341
+ },
2342
+ {
2343
+ "epoch": 0.16,
2344
+ "learning_rate": 1.1917013848007706e-05,
2345
+ "loss": 0.3745,
2346
+ "step": 181500
2347
+ },
2348
+ {
2349
+ "epoch": 0.16,
2350
+ "learning_rate": 1.1815845782614282e-05,
2351
+ "loss": 0.3729,
2352
+ "step": 182000
2353
+ },
2354
+ {
2355
+ "epoch": 0.16,
2356
+ "learning_rate": 1.1717365614858533e-05,
2357
+ "loss": 0.373,
2358
+ "step": 182500
2359
+ },
2360
+ {
2361
+ "epoch": 0.17,
2362
+ "learning_rate": 1.162157941946108e-05,
2363
+ "loss": 0.3733,
2364
+ "step": 183000
2365
+ },
2366
+ {
2367
+ "epoch": 0.17,
2368
+ "learning_rate": 1.1528493104965648e-05,
2369
+ "loss": 0.3736,
2370
+ "step": 183500
2371
+ },
2372
+ {
2373
+ "epoch": 0.17,
2374
+ "learning_rate": 1.1438112413374588e-05,
2375
+ "loss": 0.3744,
2376
+ "step": 184000
2377
+ },
2378
+ {
2379
+ "epoch": 0.17,
2380
+ "learning_rate": 1.1350442919794692e-05,
2381
+ "loss": 0.3743,
2382
+ "step": 184500
2383
+ },
2384
+ {
2385
+ "epoch": 0.17,
2386
+ "learning_rate": 1.126582442516417e-05,
2387
+ "loss": 0.3733,
2388
+ "step": 185000
2389
+ },
2390
+ {
2391
+ "epoch": 0.18,
2392
+ "learning_rate": 1.1183582486034581e-05,
2393
+ "loss": 0.3739,
2394
+ "step": 185500
2395
+ },
2396
+ {
2397
+ "epoch": 0.18,
2398
+ "learning_rate": 1.1104067445521018e-05,
2399
+ "loss": 0.3735,
2400
+ "step": 186000
2401
+ },
2402
+ {
2403
+ "epoch": 0.18,
2404
+ "learning_rate": 1.102728420848572e-05,
2405
+ "loss": 0.3732,
2406
+ "step": 186500
2407
+ },
2408
+ {
2409
+ "epoch": 0.18,
2410
+ "learning_rate": 1.0953237511280449e-05,
2411
+ "loss": 0.3731,
2412
+ "step": 187000
2413
+ },
2414
+ {
2415
+ "epoch": 0.19,
2416
+ "learning_rate": 1.0882071794057046e-05,
2417
+ "loss": 0.3732,
2418
+ "step": 187500
2419
+ },
2420
+ {
2421
+ "epoch": 0.19,
2422
+ "learning_rate": 1.0813506214785774e-05,
2423
+ "loss": 0.3732,
2424
+ "step": 188000
2425
+ },
2426
+ {
2427
+ "epoch": 0.19,
2428
+ "learning_rate": 1.0747690362178142e-05,
2429
+ "loss": 0.3732,
2430
+ "step": 188500
2431
+ },
2432
+ {
2433
+ "epoch": 0.2,
2434
+ "learning_rate": 1.0684628296065977e-05,
2435
+ "loss": 0.373,
2436
+ "step": 189000
2437
+ },
2438
+ {
2439
+ "epoch": 0.2,
2440
+ "learning_rate": 1.062444176053193e-05,
2441
+ "loss": 0.3728,
2442
+ "step": 189500
2443
+ },
2444
+ {
2445
+ "epoch": 0.2,
2446
+ "learning_rate": 1.0566893240808188e-05,
2447
+ "loss": 0.3743,
2448
+ "step": 190000
2449
  }
2450
  ],
2451
  "max_steps": 200000,
2452
  "num_train_epochs": 9223372036854775807,
2453
+ "total_flos": 4.4743682799304704e+21,
2454
  "trial_name": null,
2455
  "trial_params": null
2456
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41f052da26766284555376f7008fbf54434a0f94e18219790a1add696aa730b3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34cdc450f36015ade18e6cf8d347ba98eed346e9fca052902560b578799df39f
3
  size 449471589