RoyJoy commited on
Commit
503cd8f
·
verified ·
1 Parent(s): 38d220a

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3b7af4d57506f325c1d22f3c8a40e77d790324c23ed41c8ed9682ce8f8e167c
3
  size 50358592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a499d923fc8b0362c81987e3172fbcd093b0effe5bdc768e70faf0811be15f5e
3
  size 50358592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7ad1c1da17d7e2a91d1e54810eac5e8be3e343789d4516ee50cf881367fd943
3
  size 100824826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc86c2c3b62f1125d16831eb4312c9bd1507cc6ded424140a435b9659edc365e
3
  size 100824826
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd61fceb2c896617b2d1e57a02a4d9fc6e1be354a1761a795a1dc967c6f384c9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3603ae0f461c0309918a469bb88361702e8fe7031d469296ef29915e59cd15f4
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5e03c4b3bca09b3ba06778c4520d28f0580ee20f08105c5b342aef9f6fd3b5e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95019e029f304009516750a4bbe05ba42bcbfeab090e08f3a47061c7683127e4
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df9e6a7b452ba404eb513c746d0e7064effeffd8c6ac44bd27874cc62f0cc04d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93ea150dedd152785349606801d73a50b174319e11bd7bc4c752090cefb4196
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7c6f41a02863890f07c335f550c3f39e132adeb06392e15ddbb77d7e38897d5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e3858bb07bc0e1a65c01d5084480d194b61020c06fc22f6fa0708b202f0e34
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef130dbe5559202aa005996d3d525a5e65bfb573d0d5648d2112c86f14c82e15
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba08b4a1855b48cf82b864725b4104527747dc0aacfb5a9d0a509cb25e565a06
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7224195599555969,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.2950687146321747,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,372 @@
381
  "eval_samples_per_second": 79.667,
382
  "eval_steps_per_second": 3.187,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -409,7 +775,7 @@
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 5.10201547456512e+16,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6968957781791687,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 2.5901374292643493,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 79.667,
382
  "eval_steps_per_second": 3.187,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 1.3209377526273243,
387
+ "grad_norm": 15.50403118133545,
388
+ "learning_rate": 0.0002022302453151598,
389
+ "loss": 22.3089,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 1.3468067906224737,
394
+ "grad_norm": 24.226417541503906,
395
+ "learning_rate": 0.0001980404780794256,
396
+ "loss": 23.1566,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 1.3726758286176233,
401
+ "grad_norm": 20.176647186279297,
402
+ "learning_rate": 0.00019380851559554636,
403
+ "loss": 22.929,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 1.3985448666127729,
408
+ "grad_norm": 15.459396362304688,
409
+ "learning_rate": 0.00018953807491036011,
410
+ "loss": 22.6978,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 1.4244139046079223,
415
+ "grad_norm": 14.14704418182373,
416
+ "learning_rate": 0.00018523290686714756,
417
+ "loss": 22.7141,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 1.450282942603072,
422
+ "grad_norm": 7.990035533905029,
423
+ "learning_rate": 0.00018089679281116472,
424
+ "loss": 23.1633,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 1.4761519805982215,
429
+ "grad_norm": 3.211017608642578,
430
+ "learning_rate": 0.00017653354126838592,
431
+ "loss": 22.3353,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 1.502021018593371,
436
+ "grad_norm": 18.740083694458008,
437
+ "learning_rate": 0.00017214698460037218,
438
+ "loss": 23.5309,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 1.5278900565885207,
443
+ "grad_norm": 16.11014175415039,
444
+ "learning_rate": 0.00016774097563820485,
445
+ "loss": 22.8019,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 1.5537590945836701,
450
+ "grad_norm": 26.232606887817383,
451
+ "learning_rate": 0.00016331938429844022,
452
+ "loss": 23.5109,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 1.5796281325788197,
457
+ "grad_norm": 16.256412506103516,
458
+ "learning_rate": 0.00015888609418405713,
459
+ "loss": 22.8009,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 1.6054971705739693,
464
+ "grad_norm": 11.629958152770996,
465
+ "learning_rate": 0.00015444499917338395,
466
+ "loss": 22.3203,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 1.6313662085691187,
471
+ "grad_norm": 11.147138595581055,
472
+ "learning_rate": 0.00015,
473
+ "loss": 22.4757,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 1.6572352465642683,
478
+ "grad_norm": 5.99025297164917,
479
+ "learning_rate": 0.00014555500082661602,
480
+ "loss": 22.2444,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 1.683104284559418,
485
+ "grad_norm": 11.468669891357422,
486
+ "learning_rate": 0.00014111390581594284,
487
+ "loss": 22.2462,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 1.7089733225545674,
492
+ "grad_norm": 14.979022979736328,
493
+ "learning_rate": 0.00013668061570155978,
494
+ "loss": 21.7589,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 1.7348423605497172,
499
+ "grad_norm": 12.94080924987793,
500
+ "learning_rate": 0.00013225902436179513,
501
+ "loss": 22.4269,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 1.7607113985448666,
506
+ "grad_norm": 11.411182403564453,
507
+ "learning_rate": 0.00012785301539962782,
508
+ "loss": 21.7354,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 1.7865804365400162,
513
+ "grad_norm": 27.090801239013672,
514
+ "learning_rate": 0.00012346645873161408,
515
+ "loss": 23.5318,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 1.8124494745351658,
520
+ "grad_norm": 17.46219825744629,
521
+ "learning_rate": 0.00011910320718883525,
522
+ "loss": 22.8003,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 1.8383185125303152,
527
+ "grad_norm": 17.276792526245117,
528
+ "learning_rate": 0.00011476709313285244,
529
+ "loss": 22.7198,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 1.8641875505254648,
534
+ "grad_norm": 13.101729393005371,
535
+ "learning_rate": 0.00011046192508963989,
536
+ "loss": 22.2413,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 1.8900565885206144,
541
+ "grad_norm": 10.330924987792969,
542
+ "learning_rate": 0.00010619148440445364,
543
+ "loss": 21.9412,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 1.9159256265157638,
548
+ "grad_norm": 16.028894424438477,
549
+ "learning_rate": 0.00010195952192057438,
550
+ "loss": 22.5098,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 1.9417946645109136,
555
+ "grad_norm": 8.1192626953125,
556
+ "learning_rate": 9.776975468484019e-05,
557
+ "loss": 22.1182,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 1.9417946645109136,
562
+ "eval_loss": 0.7175214886665344,
563
+ "eval_runtime": 0.6276,
564
+ "eval_samples_per_second": 79.669,
565
+ "eval_steps_per_second": 3.187,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.967663702506063,
570
+ "grad_norm": 11.423409461975098,
571
+ "learning_rate": 9.36258626828643e-05,
572
+ "loss": 22.3389,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.9935327405012127,
577
+ "grad_norm": 12.934334754943848,
578
+ "learning_rate": 8.953148560680418e-05,
579
+ "loss": 22.7501,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 2.021018593371059,
584
+ "grad_norm": 22.10219383239746,
585
+ "learning_rate": 8.549021965852197e-05,
586
+ "loss": 23.1807,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 2.0468876313662085,
591
+ "grad_norm": 15.90378475189209,
592
+ "learning_rate": 8.150561439094303e-05,
593
+ "loss": 22.5372,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 2.072756669361358,
598
+ "grad_norm": 10.656487464904785,
599
+ "learning_rate": 7.758116959038828e-05,
600
+ "loss": 22.1827,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 2.0986257073565078,
605
+ "grad_norm": 22.766876220703125,
606
+ "learning_rate": 7.372033220261696e-05,
607
+ "loss": 22.6163,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 2.124494745351657,
612
+ "grad_norm": 11.259724617004395,
613
+ "learning_rate": 6.992649330528145e-05,
614
+ "loss": 22.0147,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 2.1503637833468066,
619
+ "grad_norm": 12.66515827178955,
620
+ "learning_rate": 6.620298512945214e-05,
621
+ "loss": 21.9512,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 2.1762328213419564,
626
+ "grad_norm": 5.229973793029785,
627
+ "learning_rate": 6.255307813282921e-05,
628
+ "loss": 22.17,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 2.202101859337106,
633
+ "grad_norm": 6.952908992767334,
634
+ "learning_rate": 5.897997812721103e-05,
635
+ "loss": 22.418,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 2.2279708973322556,
640
+ "grad_norm": 9.438949584960938,
641
+ "learning_rate": 5.5486823462743344e-05,
642
+ "loss": 22.334,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 2.253839935327405,
647
+ "grad_norm": 13.546004295349121,
648
+ "learning_rate": 5.2076682271421774e-05,
649
+ "loss": 22.3634,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 2.2797089733225544,
654
+ "grad_norm": 14.096308708190918,
655
+ "learning_rate": 4.8752549772268444e-05,
656
+ "loss": 22.6631,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 2.3055780113177042,
661
+ "grad_norm": 18.847871780395508,
662
+ "learning_rate": 4.551734564055049e-05,
663
+ "loss": 22.0801,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 2.3314470493128536,
668
+ "grad_norm": 7.903066635131836,
669
+ "learning_rate": 4.2373911443350286e-05,
670
+ "loss": 22.043,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 2.3573160873080035,
675
+ "grad_norm": 16.976978302001953,
676
+ "learning_rate": 3.932500814374089e-05,
677
+ "loss": 22.2002,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 2.383185125303153,
682
+ "grad_norm": 11.1248140335083,
683
+ "learning_rate": 3.637331367575698e-05,
684
+ "loss": 22.1329,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 2.4090541632983022,
689
+ "grad_norm": 5.761756896972656,
690
+ "learning_rate": 3.352142059229365e-05,
691
+ "loss": 22.0856,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 2.434923201293452,
696
+ "grad_norm": 12.847921371459961,
697
+ "learning_rate": 3.077183378799699e-05,
698
+ "loss": 22.0646,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 2.4607922392886015,
703
+ "grad_norm": 9.289769172668457,
704
+ "learning_rate": 2.81269682991478e-05,
705
+ "loss": 21.8848,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 2.486661277283751,
710
+ "grad_norm": 13.644316673278809,
711
+ "learning_rate": 2.5589147182469732e-05,
712
+ "loss": 23.1436,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 2.5125303152789007,
717
+ "grad_norm": 16.434682846069336,
718
+ "learning_rate": 2.316059947472607e-05,
719
+ "loss": 22.212,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 2.53839935327405,
724
+ "grad_norm": 6.969300270080566,
725
+ "learning_rate": 2.0843458234896666e-05,
726
+ "loss": 22.2793,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 2.5642683912691995,
731
+ "grad_norm": 21.42749786376953,
732
+ "learning_rate": 1.8639758670654486e-05,
733
+ "loss": 22.3692,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 2.5901374292643493,
738
+ "grad_norm": 13.674956321716309,
739
+ "learning_rate": 1.6551436350787918e-05,
740
+ "loss": 22.2481,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 2.5901374292643493,
745
+ "eval_loss": 0.6968957781791687,
746
+ "eval_runtime": 0.6272,
747
+ "eval_samples_per_second": 79.714,
748
+ "eval_steps_per_second": 3.189,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 1.020403094913024e+17,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null