Training in progress, step 500, checkpoint

Browse files

Files changed (10) hide show

last-checkpoint/config.json +1 -1
last-checkpoint/model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer_config.json +4 -4
last-checkpoint/trainer_state.json +5 -649
last-checkpoint/training_args.bin +1 -1
last-checkpoint/vocab.txt +0 -0

last-checkpoint/config.json CHANGED Viewed

@@ -27,5 +27,5 @@
   "transformers_version": "4.41.2",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 105879
 }

   "transformers_version": "4.41.2",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 176008
 }

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1227fc9d063afe8a916525265451a011ad282688af85f24698698f65acebf31
-size 669879044

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac2d545e95d53dbd847e3dfb313fc59d64125bf026261ddc71edbe677f2edb81
+size 885595848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d70e5e168a8b362f4912c842ebf94d0b361157d833012e71f64d87a5a60fcb56
-size 1339879610

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbc5e79a5f32a4ee4d255a20aeadc3cfbad6eb4cc0d92611370c0dfe67fc9dd5
+size 1771313210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:120972ded6aee57bd52e0678e4bc85f1cc274f4a37aa9bd80c07bf52890268c4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd920835b7e1d3f942ead294d5d63bc784d9e6ec27e5553642cbea5f2c9e77c4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37a3a9153cdef7e42b212fa871ec5e0fa2010d69e2cd26dedc7993b39ac3e58
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5252eff43b3c3312cd40cbc7cbc932038114747237577193618542ca149ff6e
 size 1064

last-checkpoint/tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -8,7 +8,7 @@
       "single_word": false,
       "special": true
     },
-    "100": {
       "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
@@ -16,7 +16,7 @@
       "single_word": false,
       "special": true
     },
-    "101": {
       "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
@@ -24,7 +24,7 @@
       "single_word": false,
       "special": true
     },
-    "102": {
       "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
@@ -32,7 +32,7 @@
       "single_word": false,
       "special": true
     },
-    "103": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,

       "single_word": false,
       "special": true
     },
+    "1": {
       "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "2": {
       "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "3": {
       "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "4": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,663 +1,19 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.352412529447692,
   "eval_steps": 500,
-  "global_step": 46500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.014542070209114969,
-      "grad_norm": 14.519265174865723,
       "learning_rate": 1.9903052865272566e-05,
-      "loss": 0.9249,
       "step": 500
-    },
-    {
-      "epoch": 0.029084140418229938,
-      "grad_norm": 16.82585906982422,
-      "learning_rate": 1.9806105730545135e-05,
-      "loss": 0.7196,
-      "step": 1000
-    },
-    {
-      "epoch": 0.04362621062734491,
-      "grad_norm": 7.649247646331787,
-      "learning_rate": 1.9709158595817703e-05,
-      "loss": 0.6657,
-      "step": 1500
-    },
-    {
-      "epoch": 0.058168280836459876,
-      "grad_norm": 15.57618522644043,
-      "learning_rate": 1.961221146109027e-05,
-      "loss": 0.6333,
-      "step": 2000
-    },
-    {
-      "epoch": 0.07271035104557484,
-      "grad_norm": 8.144426345825195,
-      "learning_rate": 1.9515264326362836e-05,
-      "loss": 0.6274,
-      "step": 2500
-    },
-    {
-      "epoch": 0.08725242125468982,
-      "grad_norm": 5.530069828033447,
-      "learning_rate": 1.9418317191635404e-05,
-      "loss": 0.5996,
-      "step": 3000
-    },
-    {
-      "epoch": 0.10179449146380479,
-      "grad_norm": 17.099382400512695,
-      "learning_rate": 1.932137005690797e-05,
-      "loss": 0.5851,
-      "step": 3500
-    },
-    {
-      "epoch": 0.11633656167291975,
-      "grad_norm": 2.200979709625244,
-      "learning_rate": 1.9224422922180537e-05,
-      "loss": 0.6288,
-      "step": 4000
-    },
-    {
-      "epoch": 0.13087863188203472,
-      "grad_norm": 13.900425910949707,
-      "learning_rate": 1.91274757874531e-05,
-      "loss": 0.6023,
-      "step": 4500
-    },
-    {
-      "epoch": 0.14542070209114968,
-      "grad_norm": 2.3364081382751465,
-      "learning_rate": 1.903052865272567e-05,
-      "loss": 0.5997,
-      "step": 5000
-    },
-    {
-      "epoch": 0.15996277230026468,
-      "grad_norm": 60.922637939453125,
-      "learning_rate": 1.8933581517998238e-05,
-      "loss": 0.5695,
-      "step": 5500
-    },
-    {
-      "epoch": 0.17450484250937964,
-      "grad_norm": 5.009929656982422,
-      "learning_rate": 1.8836634383270806e-05,
-      "loss": 0.5434,
-      "step": 6000
-    },
-    {
-      "epoch": 0.1890469127184946,
-      "grad_norm": 10.972416877746582,
-      "learning_rate": 1.873968724854337e-05,
-      "loss": 0.5832,
-      "step": 6500
-    },
-    {
-      "epoch": 0.20358898292760957,
-      "grad_norm": 3.8107187747955322,
-      "learning_rate": 1.864274011381594e-05,
-      "loss": 0.5418,
-      "step": 7000
-    },
-    {
-      "epoch": 0.21813105313672454,
-      "grad_norm": 1.1021103858947754,
-      "learning_rate": 1.8545792979088504e-05,
-      "loss": 0.5869,
-      "step": 7500
-    },
-    {
-      "epoch": 0.2326731233458395,
-      "grad_norm": 4.626884460449219,
-      "learning_rate": 1.844884584436107e-05,
-      "loss": 0.5532,
-      "step": 8000
-    },
-    {
-      "epoch": 0.24721519355495447,
-      "grad_norm": 2.25512957572937,
-      "learning_rate": 1.8351898709633636e-05,
-      "loss": 0.5484,
-      "step": 8500
-    },
-    {
-      "epoch": 0.26175726376406944,
-      "grad_norm": 10.878036499023438,
-      "learning_rate": 1.8254951574906205e-05,
-      "loss": 0.517,
-      "step": 9000
-    },
-    {
-      "epoch": 0.2762993339731844,
-      "grad_norm": 0.1631862372159958,
-      "learning_rate": 1.8158004440178773e-05,
-      "loss": 0.5495,
-      "step": 9500
-    },
-    {
-      "epoch": 0.29084140418229937,
-      "grad_norm": 4.393772125244141,
-      "learning_rate": 1.806105730545134e-05,
-      "loss": 0.5365,
-      "step": 10000
-    },
-    {
-      "epoch": 0.3053834743914144,
-      "grad_norm": 3.9716999530792236,
-      "learning_rate": 1.7964110170723906e-05,
-      "loss": 0.5463,
-      "step": 10500
-    },
-    {
-      "epoch": 0.31992554460052935,
-      "grad_norm": 16.515634536743164,
-      "learning_rate": 1.7867163035996474e-05,
-      "loss": 0.5367,
-      "step": 11000
-    },
-    {
-      "epoch": 0.3344676148096443,
-      "grad_norm": 8.901307106018066,
-      "learning_rate": 1.777021590126904e-05,
-      "loss": 0.5279,
-      "step": 11500
-    },
-    {
-      "epoch": 0.3490096850187593,
-      "grad_norm": 5.7714524269104,
-      "learning_rate": 1.7673268766541607e-05,
-      "loss": 0.5694,
-      "step": 12000
-    },
-    {
-      "epoch": 0.36355175522787425,
-      "grad_norm": 14.238178253173828,
-      "learning_rate": 1.757632163181417e-05,
-      "loss": 0.5453,
-      "step": 12500
-    },
-    {
-      "epoch": 0.3780938254369892,
-      "grad_norm": 2.3930513858795166,
-      "learning_rate": 1.747937449708674e-05,
-      "loss": 0.5055,
-      "step": 13000
-    },
-    {
-      "epoch": 0.3926358956461042,
-      "grad_norm": 0.9438181519508362,
-      "learning_rate": 1.7382427362359308e-05,
-      "loss": 0.5561,
-      "step": 13500
-    },
-    {
-      "epoch": 0.40717796585521915,
-      "grad_norm": 19.889507293701172,
-      "learning_rate": 1.7285480227631876e-05,
-      "loss": 0.5267,
-      "step": 14000
-    },
-    {
-      "epoch": 0.4217200360643341,
-      "grad_norm": 9.12895393371582,
-      "learning_rate": 1.718853309290444e-05,
-      "loss": 0.5353,
-      "step": 14500
-    },
-    {
-      "epoch": 0.4362621062734491,
-      "grad_norm": 6.552937030792236,
-      "learning_rate": 1.709158595817701e-05,
-      "loss": 0.5231,
-      "step": 15000
-    },
-    {
-      "epoch": 0.45080417648256405,
-      "grad_norm": 6.750959396362305,
-      "learning_rate": 1.6994638823449574e-05,
-      "loss": 0.4972,
-      "step": 15500
-    },
-    {
-      "epoch": 0.465346246691679,
-      "grad_norm": 10.956033706665039,
-      "learning_rate": 1.689769168872214e-05,
-      "loss": 0.4949,
-      "step": 16000
-    },
-    {
-      "epoch": 0.479888316900794,
-      "grad_norm": 3.459519863128662,
-      "learning_rate": 1.6800744553994706e-05,
-      "loss": 0.5219,
-      "step": 16500
-    },
-    {
-      "epoch": 0.49443038710990894,
-      "grad_norm": 7.870626926422119,
-      "learning_rate": 1.6703797419267275e-05,
-      "loss": 0.5049,
-      "step": 17000
-    },
-    {
-      "epoch": 0.508972457319024,
-      "grad_norm": 9.513204574584961,
-      "learning_rate": 1.6606850284539843e-05,
-      "loss": 0.5365,
-      "step": 17500
-    },
-    {
-      "epoch": 0.5235145275281389,
-      "grad_norm": 4.7530951499938965,
-      "learning_rate": 1.650990314981241e-05,
-      "loss": 0.4979,
-      "step": 18000
-    },
-    {
-      "epoch": 0.5380565977372539,
-      "grad_norm": 4.865274906158447,
-      "learning_rate": 1.6412956015084976e-05,
-      "loss": 0.5273,
-      "step": 18500
-    },
-    {
-      "epoch": 0.5525986679463688,
-      "grad_norm": 2.539562940597534,
-      "learning_rate": 1.6316008880357544e-05,
-      "loss": 0.5029,
-      "step": 19000
-    },
-    {
-      "epoch": 0.5671407381554838,
-      "grad_norm": 2.071009874343872,
-      "learning_rate": 1.621906174563011e-05,
-      "loss": 0.5166,
-      "step": 19500
-    },
-    {
-      "epoch": 0.5816828083645987,
-      "grad_norm": 7.213927268981934,
-      "learning_rate": 1.6122114610902677e-05,
-      "loss": 0.5628,
-      "step": 20000
-    },
-    {
-      "epoch": 0.5962248785737138,
-      "grad_norm": 19.586095809936523,
-      "learning_rate": 1.602516747617524e-05,
-      "loss": 0.5259,
-      "step": 20500
-    },
-    {
-      "epoch": 0.6107669487828288,
-      "grad_norm": 2.6470067501068115,
-      "learning_rate": 1.592822034144781e-05,
-      "loss": 0.5044,
-      "step": 21000
-    },
-    {
-      "epoch": 0.6253090189919437,
-      "grad_norm": 8.12119197845459,
-      "learning_rate": 1.5831273206720378e-05,
-      "loss": 0.4909,
-      "step": 21500
-    },
-    {
-      "epoch": 0.6398510892010587,
-      "grad_norm": 11.704862594604492,
-      "learning_rate": 1.5734326071992943e-05,
-      "loss": 0.5103,
-      "step": 22000
-    },
-    {
-      "epoch": 0.6543931594101736,
-      "grad_norm": 5.466031551361084,
-      "learning_rate": 1.563737893726551e-05,
-      "loss": 0.5097,
-      "step": 22500
-    },
-    {
-      "epoch": 0.6689352296192886,
-      "grad_norm": 1.2860121726989746,
-      "learning_rate": 1.5540431802538075e-05,
-      "loss": 0.4858,
-      "step": 23000
-    },
-    {
-      "epoch": 0.6834772998284036,
-      "grad_norm": 5.133608341217041,
-      "learning_rate": 1.5443484667810644e-05,
-      "loss": 0.4814,
-      "step": 23500
-    },
-    {
-      "epoch": 0.6980193700375186,
-      "grad_norm": 10.405769348144531,
-      "learning_rate": 1.5346537533083212e-05,
-      "loss": 0.4985,
-      "step": 24000
-    },
-    {
-      "epoch": 0.7125614402466335,
-      "grad_norm": 3.4990031719207764,
-      "learning_rate": 1.5249590398355778e-05,
-      "loss": 0.493,
-      "step": 24500
-    },
-    {
-      "epoch": 0.7271035104557485,
-      "grad_norm": 5.292512893676758,
-      "learning_rate": 1.5152643263628345e-05,
-      "loss": 0.4807,
-      "step": 25000
-    },
-    {
-      "epoch": 0.7416455806648634,
-      "grad_norm": 17.39272117614746,
-      "learning_rate": 1.5055696128900911e-05,
-      "loss": 0.4966,
-      "step": 25500
-    },
-    {
-      "epoch": 0.7561876508739784,
-      "grad_norm": 18.466636657714844,
-      "learning_rate": 1.4958748994173478e-05,
-      "loss": 0.4851,
-      "step": 26000
-    },
-    {
-      "epoch": 0.7707297210830933,
-      "grad_norm": 2.161870241165161,
-      "learning_rate": 1.4861801859446046e-05,
-      "loss": 0.4922,
-      "step": 26500
-    },
-    {
-      "epoch": 0.7852717912922084,
-      "grad_norm": 1.0825892686843872,
-      "learning_rate": 1.4764854724718612e-05,
-      "loss": 0.4987,
-      "step": 27000
-    },
-    {
-      "epoch": 0.7998138615013233,
-      "grad_norm": 11.1233491897583,
-      "learning_rate": 1.4667907589991179e-05,
-      "loss": 0.4728,
-      "step": 27500
-    },
-    {
-      "epoch": 0.8143559317104383,
-      "grad_norm": 8.120223045349121,
-      "learning_rate": 1.4570960455263745e-05,
-      "loss": 0.5043,
-      "step": 28000
-    },
-    {
-      "epoch": 0.8288980019195533,
-      "grad_norm": 6.8163933753967285,
-      "learning_rate": 1.4474013320536313e-05,
-      "loss": 0.5224,
-      "step": 28500
-    },
-    {
-      "epoch": 0.8434400721286682,
-      "grad_norm": 15.210949897766113,
-      "learning_rate": 1.437706618580888e-05,
-      "loss": 0.4532,
-      "step": 29000
-    },
-    {
-      "epoch": 0.8579821423377832,
-      "grad_norm": 3.8793272972106934,
-      "learning_rate": 1.4280119051081446e-05,
-      "loss": 0.4864,
-      "step": 29500
-    },
-    {
-      "epoch": 0.8725242125468982,
-      "grad_norm": 8.277094841003418,
-      "learning_rate": 1.4183171916354013e-05,
-      "loss": 0.5122,
-      "step": 30000
-    },
-    {
-      "epoch": 0.8870662827560132,
-      "grad_norm": 6.275518417358398,
-      "learning_rate": 1.408622478162658e-05,
-      "loss": 0.49,
-      "step": 30500
-    },
-    {
-      "epoch": 0.9016083529651281,
-      "grad_norm": 8.79964542388916,
-      "learning_rate": 1.3989277646899147e-05,
-      "loss": 0.4991,
-      "step": 31000
-    },
-    {
-      "epoch": 0.9161504231742431,
-      "grad_norm": 23.397424697875977,
-      "learning_rate": 1.3892330512171715e-05,
-      "loss": 0.4966,
-      "step": 31500
-    },
-    {
-      "epoch": 0.930692493383358,
-      "grad_norm": 9.809805870056152,
-      "learning_rate": 1.379538337744428e-05,
-      "loss": 0.5091,
-      "step": 32000
-    },
-    {
-      "epoch": 0.945234563592473,
-      "grad_norm": 1.723449468612671,
-      "learning_rate": 1.3698436242716848e-05,
-      "loss": 0.4963,
-      "step": 32500
-    },
-    {
-      "epoch": 0.959776633801588,
-      "grad_norm": 9.49936580657959,
-      "learning_rate": 1.3601489107989415e-05,
-      "loss": 0.4739,
-      "step": 33000
-    },
-    {
-      "epoch": 0.974318704010703,
-      "grad_norm": 5.465103626251221,
-      "learning_rate": 1.3504541973261983e-05,
-      "loss": 0.4947,
-      "step": 33500
-    },
-    {
-      "epoch": 0.9888607742198179,
-      "grad_norm": 5.015740871429443,
-      "learning_rate": 1.3407594838534548e-05,
-      "loss": 0.4692,
-      "step": 34000
-    },
-    {
-      "epoch": 1.003402844428933,
-      "grad_norm": 11.569725036621094,
-      "learning_rate": 1.3310647703807116e-05,
-      "loss": 0.4667,
-      "step": 34500
-    },
-    {
-      "epoch": 1.017944914638048,
-      "grad_norm": 6.356573581695557,
-      "learning_rate": 1.3213700569079682e-05,
-      "loss": 0.4682,
-      "step": 35000
-    },
-    {
-      "epoch": 1.032486984847163,
-      "grad_norm": 5.768576622009277,
-      "learning_rate": 1.311675343435225e-05,
-      "loss": 0.486,
-      "step": 35500
-    },
-    {
-      "epoch": 1.0470290550562777,
-      "grad_norm": 5.155892372131348,
-      "learning_rate": 1.3019806299624815e-05,
-      "loss": 0.4968,
-      "step": 36000
-    },
-    {
-      "epoch": 1.0615711252653928,
-      "grad_norm": 3.3541529178619385,
-      "learning_rate": 1.2922859164897383e-05,
-      "loss": 0.4944,
-      "step": 36500
-    },
-    {
-      "epoch": 1.0761131954745078,
-      "grad_norm": 8.195282936096191,
-      "learning_rate": 1.282591203016995e-05,
-      "loss": 0.4467,
-      "step": 37000
-    },
-    {
-      "epoch": 1.0906552656836228,
-      "grad_norm": 3.4823594093322754,
-      "learning_rate": 1.2728964895442518e-05,
-      "loss": 0.4544,
-      "step": 37500
-    },
-    {
-      "epoch": 1.1051973358927376,
-      "grad_norm": 2.6191506385803223,
-      "learning_rate": 1.2632017760715083e-05,
-      "loss": 0.4807,
-      "step": 38000
-    },
-    {
-      "epoch": 1.1197394061018526,
-      "grad_norm": 3.6867098808288574,
-      "learning_rate": 1.253507062598765e-05,
-      "loss": 0.5017,
-      "step": 38500
-    },
-    {
-      "epoch": 1.1342814763109677,
-      "grad_norm": 4.41229772567749,
-      "learning_rate": 1.2438123491260217e-05,
-      "loss": 0.4981,
-      "step": 39000
-    },
-    {
-      "epoch": 1.1488235465200827,
-      "grad_norm": 6.181690692901611,
-      "learning_rate": 1.2341176356532785e-05,
-      "loss": 0.4632,
-      "step": 39500
-    },
-    {
-      "epoch": 1.1633656167291975,
-      "grad_norm": 1.7811199426651,
-      "learning_rate": 1.224422922180535e-05,
-      "loss": 0.4935,
-      "step": 40000
-    },
-    {
-      "epoch": 1.1779076869383125,
-      "grad_norm": 7.902093410491943,
-      "learning_rate": 1.2147282087077918e-05,
-      "loss": 0.467,
-      "step": 40500
-    },
-    {
-      "epoch": 1.1924497571474275,
-      "grad_norm": 7.348107814788818,
-      "learning_rate": 1.2050334952350485e-05,
-      "loss": 0.4732,
-      "step": 41000
-    },
-    {
-      "epoch": 1.2069918273565425,
-      "grad_norm": 1.9056262969970703,
-      "learning_rate": 1.1953387817623053e-05,
-      "loss": 0.4689,
-      "step": 41500
-    },
-    {
-      "epoch": 1.2215338975656573,
-      "grad_norm": 3.8321168422698975,
-      "learning_rate": 1.1856440682895618e-05,
-      "loss": 0.4957,
-      "step": 42000
-    },
-    {
-      "epoch": 1.2360759677747724,
-      "grad_norm": 0.5265329480171204,
-      "learning_rate": 1.1759493548168184e-05,
-      "loss": 0.4738,
-      "step": 42500
-    },
-    {
-      "epoch": 1.2506180379838874,
-      "grad_norm": 7.372343063354492,
-      "learning_rate": 1.1662546413440752e-05,
-      "loss": 0.485,
-      "step": 43000
-    },
-    {
-      "epoch": 1.2651601081930024,
-      "grad_norm": 2.4991230964660645,
-      "learning_rate": 1.1565599278713317e-05,
-      "loss": 0.4497,
-      "step": 43500
-    },
-    {
-      "epoch": 1.2797021784021174,
-      "grad_norm": 11.783917427062988,
-      "learning_rate": 1.1468652143985885e-05,
-      "loss": 0.5148,
-      "step": 44000
-    },
-    {
-      "epoch": 1.2942442486112322,
-      "grad_norm": 12.756231307983398,
-      "learning_rate": 1.1371705009258452e-05,
-      "loss": 0.4676,
-      "step": 44500
-    },
-    {
-      "epoch": 1.3087863188203472,
-      "grad_norm": 4.692300319671631,
-      "learning_rate": 1.127475787453102e-05,
-      "loss": 0.4647,
-      "step": 45000
-    },
-    {
-      "epoch": 1.3233283890294623,
-      "grad_norm": 7.03782844543457,
-      "learning_rate": 1.1177810739803584e-05,
-      "loss": 0.4572,
-      "step": 45500
-    },
-    {
-      "epoch": 1.3378704592385773,
-      "grad_norm": 3.1515519618988037,
-      "learning_rate": 1.1080863605076153e-05,
-      "loss": 0.5162,
-      "step": 46000
-    },
-    {
-      "epoch": 1.352412529447692,
-      "grad_norm": 13.067205429077148,
-      "learning_rate": 1.0983916470348719e-05,
-      "loss": 0.5158,
-      "step": 46500
     }
   ],
   "logging_steps": 500,
@@ -677,7 +33,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.416368314067739e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014542070209114969,
   "eval_steps": 500,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.014542070209114969,
+      "grad_norm": 12.663599014282227,
       "learning_rate": 1.9903052865272566e-05,
+      "loss": 7.4704,
       "step": 500
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 268694085430272.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cde46356f72f91161f863cec40376c04f3c90f38cae9b314f1588079bbc3ffe
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cf779800eed062a2e03f99378369c4384c63e953f13f7419f13d9c53267c737
 size 5176

last-checkpoint/vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff