huggingartists

Browse files

Files changed (12) hide show

README.md +4 -4
config.json +4 -2
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
tokenizer.json +0 -0
tokenizer_config.json +1 -1
trainer_state.json +85 -85
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ widget:
 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
-			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/73f52f6c73859a68ab961ca797e7b848.725x725x1.jpg&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/lil-uzi-vert")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2ndgi0dn/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Uzi Vert's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/9x6wbf6e) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/9x6wbf6e/artifacts) is logged and versioned.
 ## How to use

 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
+			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/3619e57354afa7dd5e65b9c261982ccc.640x640x1.jpg&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
 dataset = load_dataset("huggingartists/lil-uzi-vert")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/14mmkidw/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Uzi Vert's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3s5iqd7v) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3s5iqd7v/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.9.2",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "lil-uzi-vert",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.18.0",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 3.~~0165445804595947~~, "eval_runtime": 8.~~1174~~, "eval_samples_per_second": 37.45, "eval_steps_per_second": 4.~~681~~, "epoch": 1.0}


1	+ {"eval_loss": 2.9723432064056396, "eval_runtime": 14.485, "eval_samples_per_second": 20.918, "eval_steps_per_second": 2.623, "epoch": 4.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9f834e2dab17d6b22f3e5622e6ae587c757af1be1061e4ecc23e0724ae0b559
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:d911797eecf7771d7a8b11419a5eef76e749a76a836de6f2e223bc6792b68bba
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42921bfbd1f7da4499f0e2f798f9137b868407cced67116b7819ce354a30349b
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbe62339215bdc95a3540d463c2e486a400ea24b419ccc127a77e543a5fe7868
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e18a4078f8a9380fdeeb65a2e0038b5134dea4b4c76336a114ed08cea4d5ffa2
-size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:89fc126be56c4a03afeb33e2198d78baec72d354c2ca80b7bd837d0e24fab3b1
+size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3498d3c2707018f535ef83533e9dc2d6c4d547651de1f61798ba34c842d4d7f0
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:6331c41c620073da4a8b80d7b70f57f60aee54b5fffdf4709f2dc2f5f8b461c3
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbcce13fecbcb2be23ea34670a937a2c27e28ac198853dd41e256aa3ed115744
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:98fc3a88a1d7a12051327bb8cbfa8f3cf51243baa258da149a22d78cec439dec
 size 623

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/lil-uzi-vert", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 3.0165445804595947,
   "best_model_checkpoint": "output/lil-uzi-vert/checkpoint-218",
   "epoch": 1.0,
   "global_step": 218,
@@ -10,273 +10,273 @@
     {
       "epoch": 0.02,
       "learning_rate": 0.0001370219946819302,
-      "loss": 4.0012,
       "step": 5
     },
     {
       "epoch": 0.05,
-      "learning_rate": 0.0001364889025146963,
-      "loss": 3.8869,
       "step": 10
     },
     {
       "epoch": 0.07,
       "learning_rate": 0.00013560349006508517,
-      "loss": 3.6034,
       "step": 15
     },
     {
       "epoch": 0.09,
       "learning_rate": 0.0001343703523221556,
-      "loss": 3.5643,
       "step": 20
     },
     {
       "epoch": 0.11,
       "learning_rate": 0.00013279588885081002,
-      "loss": 3.3069,
       "step": 25
     },
     {
       "epoch": 0.14,
-      "learning_rate": 0.0001308882705802323,
-      "loss": 3.431,
       "step": 30
     },
     {
       "epoch": 0.16,
       "learning_rate": 0.00012865739739954807,
-      "loss": 3.5848,
       "step": 35
     },
     {
       "epoch": 0.18,
-      "learning_rate": 0.00012611484678077197,
-      "loss": 3.4069,
       "step": 40
     },
     {
       "epoch": 0.21,
-      "learning_rate": 0.00012327381369567084,
-      "loss": 3.3532,
       "step": 45
     },
     {
       "epoch": 0.23,
-      "learning_rate": 0.00012014904213835435,
-      "loss": 3.4164,
       "step": 50
     },
     {
       "epoch": 0.25,
-      "learning_rate": 0.00011675674860896696,
-      "loss": 3.5117,
       "step": 55
     },
     {
       "epoch": 0.28,
-      "learning_rate": 0.00011311453795557528,
-      "loss": 3.2964,
       "step": 60
     },
     {
       "epoch": 0.3,
-      "learning_rate": 0.00010924131201100248,
-      "loss": 3.2104,
       "step": 65
     },
     {
       "epoch": 0.32,
-      "learning_rate": 0.00010515717149875347,
-      "loss": 3.0675,
       "step": 70
     },
     {
       "epoch": 0.34,
-      "learning_rate": 0.00010088331171710597,
-      "loss": 3.4575,
       "step": 75
     },
     {
       "epoch": 0.37,
       "learning_rate": 9.644191254273106e-05,
-      "loss": 3.1265,
       "step": 80
     },
     {
       "epoch": 0.39,
-      "learning_rate": 9.185602332468734e-05,
-      "loss": 3.3004,
       "step": 85
     },
     {
       "epoch": 0.41,
-      "learning_rate": 8.714944326614944e-05,
-      "loss": 3.2766,
       "step": 90
     },
     {
       "epoch": 0.44,
-      "learning_rate": 8.234659791464919e-05,
-      "loss": 3.1492,
       "step": 95
     },
     {
       "epoch": 0.46,
-      "learning_rate": 7.747241240180272e-05,
-      "loss": 3.3842,
       "step": 100
     },
     {
       "epoch": 0.48,
-      "learning_rate": 7.255218209036649e-05,
-      "loss": 3.3591,
       "step": 105
     },
     {
       "epoch": 0.5,
-      "learning_rate": 6.76114412999196e-05,
-      "loss": 3.1375,
       "step": 110
     },
     {
       "epoch": 0.53,
-      "learning_rate": 6.267583079244174e-05,
-      "loss": 3.2619,
       "step": 115
     },
     {
       "epoch": 0.55,
-      "learning_rate": 5.777096470549132e-05,
-      "loss": 3.1689,
       "step": 120
     },
     {
       "epoch": 0.57,
       "learning_rate": 5.2922297623555134e-05,
-      "loss": 3.264,
       "step": 125
     },
     {
       "epoch": 0.6,
-      "learning_rate": 4.815499247742428e-05,
-      "loss": 3.0831,
       "step": 130
     },
     {
       "epoch": 0.62,
-      "learning_rate": 4.349378995715337e-05,
-      "loss": 3.1357,
       "step": 135
     },
     {
       "epoch": 0.64,
-      "learning_rate": 3.896288011630533e-05,
-      "loss": 3.164,
       "step": 140
     },
     {
       "epoch": 0.67,
-      "learning_rate": 3.458577683381209e-05,
-      "loss": 3.2134,
       "step": 145
     },
     {
       "epoch": 0.69,
-      "learning_rate": 3.0385195784951376e-05,
-      "loss": 3.1053,
       "step": 150
     },
     {
       "epoch": 0.71,
-      "learning_rate": 2.63829365547284e-05,
-      "loss": 3.099,
       "step": 155
     },
     {
       "epoch": 0.73,
-      "learning_rate": 2.2599769505454377e-05,
-      "loss": 3.1365,
       "step": 160
     },
     {
       "epoch": 0.76,
-      "learning_rate": 1.905532798564004e-05,
-      "loss": 3.1402,
       "step": 165
     },
     {
       "epoch": 0.78,
-      "learning_rate": 1.5768006439603532e-05,
-      "loss": 3.1203,
       "step": 170
     },
     {
       "epoch": 0.8,
-      "learning_rate": 1.2754864946569404e-05,
-      "loss": 3.2424,
       "step": 175
     },
     {
       "epoch": 0.83,
-      "learning_rate": 1.0031540684667541e-05,
-      "loss": 3.2244,
       "step": 180
     },
     {
       "epoch": 0.85,
-      "learning_rate": 7.612166779304597e-06,
-      "loss": 3.2007,
       "step": 185
     },
     {
       "epoch": 0.87,
       "learning_rate": 5.5092989570564855e-06,
-      "loss": 3.0923,
       "step": 190
     },
     {
       "epoch": 0.89,
-      "learning_rate": 3.7338503857237188e-06,
-      "loss": 3.0841,
       "step": 195
     },
     {
       "epoch": 0.92,
-      "learning_rate": 2.295035038707367e-06,
-      "loss": 3.1703,
       "step": 200
     },
     {
       "epoch": 0.94,
-      "learning_rate": 1.2003198776252066e-06,
-      "loss": 3.2195,
       "step": 205
     },
     {
       "epoch": 0.96,
-      "learning_rate": 4.5538610132401196e-07,
-      "loss": 3.0425,
       "step": 210
     },
     {
       "epoch": 0.99,
       "learning_rate": 6.409966239244377e-08,
-      "loss": 3.1853,
       "step": 215
     },
     {
       "epoch": 1.0,
-      "eval_loss": 3.0165445804595947,
-      "eval_runtime": 8.1641,
-      "eval_samples_per_second": 37.236,
-      "eval_steps_per_second": 4.655,
       "step": 218
     }
   ],
-  "max_steps": 218,
-  "num_train_epochs": 1,
-  "total_flos": 227454713856000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 2.9723432064056396,
   "best_model_checkpoint": "output/lil-uzi-vert/checkpoint-218",
   "epoch": 1.0,
   "global_step": 218,
     {
       "epoch": 0.02,
       "learning_rate": 0.0001370219946819302,
+      "loss": 2.8388,
       "step": 5
     },
     {
       "epoch": 0.05,
+      "learning_rate": 0.00013648890251469632,
+      "loss": 2.9,
       "step": 10
     },
     {
       "epoch": 0.07,
       "learning_rate": 0.00013560349006508517,
+      "loss": 2.895,
       "step": 15
     },
     {
       "epoch": 0.09,
       "learning_rate": 0.0001343703523221556,
+      "loss": 2.8357,
       "step": 20
     },
     {
       "epoch": 0.11,
       "learning_rate": 0.00013279588885081002,
+      "loss": 2.8199,
       "step": 25
     },
     {
       "epoch": 0.14,
+      "learning_rate": 0.00013088827058023233,
+      "loss": 2.7435,
       "step": 30
     },
     {
       "epoch": 0.16,
       "learning_rate": 0.00012865739739954807,
+      "loss": 2.8855,
       "step": 35
     },
     {
       "epoch": 0.18,
+      "learning_rate": 0.000126114846780772,
+      "loss": 2.7549,
       "step": 40
     },
     {
       "epoch": 0.21,
+      "learning_rate": 0.00012327381369567087,
+      "loss": 2.84,
       "step": 45
     },
     {
       "epoch": 0.23,
+      "learning_rate": 0.00012014904213835432,
+      "loss": 2.8624,
       "step": 50
     },
     {
       "epoch": 0.25,
+      "learning_rate": 0.00011675674860896702,
+      "loss": 2.8777,
       "step": 55
     },
     {
       "epoch": 0.28,
+      "learning_rate": 0.00011311453795557527,
+      "loss": 2.9297,
       "step": 60
     },
     {
       "epoch": 0.3,
+      "learning_rate": 0.00010924131201100249,
+      "loss": 2.567,
       "step": 65
     },
     {
       "epoch": 0.32,
+      "learning_rate": 0.00010515717149875348,
+      "loss": 2.8801,
       "step": 70
     },
     {
       "epoch": 0.34,
+      "learning_rate": 0.00010088331171710603,
+      "loss": 2.9652,
       "step": 75
     },
     {
       "epoch": 0.37,
       "learning_rate": 9.644191254273106e-05,
+      "loss": 2.6114,
       "step": 80
     },
     {
       "epoch": 0.39,
+      "learning_rate": 9.185602332468731e-05,
+      "loss": 2.7817,
       "step": 85
     },
     {
       "epoch": 0.41,
+      "learning_rate": 8.714944326614947e-05,
+      "loss": 2.8251,
       "step": 90
     },
     {
       "epoch": 0.44,
+      "learning_rate": 8.234659791464915e-05,
+      "loss": 2.8489,
       "step": 95
     },
     {
       "epoch": 0.46,
+      "learning_rate": 7.747241240180279e-05,
+      "loss": 2.7123,
       "step": 100
     },
     {
       "epoch": 0.48,
+      "learning_rate": 7.255218209036647e-05,
+      "loss": 2.6698,
       "step": 105
     },
     {
       "epoch": 0.5,
+      "learning_rate": 6.761144129991962e-05,
+      "loss": 2.7934,
       "step": 110
     },
     {
       "epoch": 0.53,
+      "learning_rate": 6.267583079244176e-05,
+      "loss": 2.8273,
       "step": 115
     },
     {
       "epoch": 0.55,
+      "learning_rate": 5.777096470549139e-05,
+      "loss": 2.8665,
       "step": 120
     },
     {
       "epoch": 0.57,
       "learning_rate": 5.2922297623555134e-05,
+      "loss": 2.672,
       "step": 125
     },
     {
       "epoch": 0.6,
+      "learning_rate": 4.815499247742421e-05,
+      "loss": 2.6851,
       "step": 130
     },
     {
       "epoch": 0.62,
+      "learning_rate": 4.3493789957153346e-05,
+      "loss": 2.6811,
       "step": 135
     },
     {
       "epoch": 0.64,
+      "learning_rate": 3.8962880116305346e-05,
+      "loss": 2.8504,
       "step": 140
     },
     {
       "epoch": 0.67,
+      "learning_rate": 3.458577683381216e-05,
+      "loss": 2.7296,
       "step": 145
     },
     {
       "epoch": 0.69,
+      "learning_rate": 3.0385195784951363e-05,
+      "loss": 2.9651,
       "step": 150
     },
     {
       "epoch": 0.71,
+      "learning_rate": 2.638293655472844e-05,
+      "loss": 2.8441,
       "step": 155
     },
     {
       "epoch": 0.73,
+      "learning_rate": 2.259976950545437e-05,
+      "loss": 2.7186,
       "step": 160
     },
     {
       "epoch": 0.76,
+      "learning_rate": 1.9055327985640047e-05,
+      "loss": 2.8246,
       "step": 165
     },
     {
       "epoch": 0.78,
+      "learning_rate": 1.5768006439603586e-05,
+      "loss": 2.5588,
       "step": 170
     },
     {
       "epoch": 0.8,
+      "learning_rate": 1.2754864946569397e-05,
+      "loss": 2.6992,
       "step": 175
     },
     {
       "epoch": 0.83,
+      "learning_rate": 1.0031540684667565e-05,
+      "loss": 2.6534,
       "step": 180
     },
     {
       "epoch": 0.85,
+      "learning_rate": 7.612166779304589e-06,
+      "loss": 2.7594,
       "step": 185
     },
     {
       "epoch": 0.87,
       "learning_rate": 5.5092989570564855e-06,
+      "loss": 2.5534,
       "step": 190
     },
     {
       "epoch": 0.89,
+      "learning_rate": 3.733850385723696e-06,
+      "loss": 2.955,
       "step": 195
     },
     {
       "epoch": 0.92,
+      "learning_rate": 2.2950350387073973e-06,
+      "loss": 2.8938,
       "step": 200
     },
     {
       "epoch": 0.94,
+      "learning_rate": 1.2003198776252143e-06,
+      "loss": 2.8361,
       "step": 205
     },
     {
       "epoch": 0.96,
+      "learning_rate": 4.553861013240044e-07,
+      "loss": 2.8043,
       "step": 210
     },
     {
       "epoch": 0.99,
       "learning_rate": 6.409966239244377e-08,
+      "loss": 2.8706,
       "step": 215
     },
     {
       "epoch": 1.0,
+      "eval_loss": 2.9723432064056396,
+      "eval_runtime": 14.4988,
+      "eval_samples_per_second": 20.898,
+      "eval_steps_per_second": 2.621,
       "step": 218
     }
   ],
+  "max_steps": 872,
+  "num_train_epochs": 4,
+  "total_flos": 227585359872000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32eed4e7cdcf3d9611cb0954dec030a5a9f6c10f458a381c5e5a079e5eb26127
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:6250b59e3427cb656ce0093ddac0f35af8103202480e8a9ad2e9ef9cf1f8b845
+size 3055