diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..073bf2998e05ca671fc780f1020c2f71821b211e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-189/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bae5b9a6959a2c23543530a10740fa7564eea43f --- /dev/null +++ b/README.md @@ -0,0 +1,103 @@ +--- +base_model: google/gemma-2b +library_name: peft +license: gemma +metrics: +- accuracy +tags: +- trl +- reward-trainer +- generated_from_trainer +model-index: +- name: reward_modeling + results: [] +--- + + + +[Visualize in Weights & Biases](https://wandb.ai/quirky_lats_at_mats/huggingface/runs/k92pr3b1) +# reward_modeling + +This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on an unknown dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4036 +- Accuracy: 0.8058 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 16 +- eval_batch_size: 8 +- seed: 42 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- num_epochs: 3.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Accuracy | +|:-------------:|:------:|:----:|:---------------:|:--------:| +| 0.9241 | 0.0787 | 5 | 0.6996 | 0.5678 | +| 0.7708 | 0.1575 | 10 | 0.6284 | 0.6660 | +| 0.7875 | 0.2362 | 15 | 0.5749 | 0.7244 | +| 0.6575 | 0.3150 | 20 | 0.5360 | 0.7390 | +| 0.6802 | 0.3937 | 25 | 0.5087 | 0.7432 | +| 0.3982 | 0.4724 | 30 | 0.4890 | 0.7578 | +| 0.4555 | 0.5512 | 35 | 0.4775 | 0.7599 | +| 0.8838 | 0.6299 | 40 | 0.4683 | 0.7662 | +| 0.4692 | 0.7087 | 45 | 0.4611 | 0.7662 | +| 0.5455 | 0.7874 | 50 | 0.4531 | 0.7620 | +| 0.5696 | 0.8661 | 55 | 0.4459 | 0.7662 | +| 0.7453 | 0.9449 | 60 | 0.4414 | 0.7766 | +| 0.5369 | 1.0236 | 65 | 0.4371 | 0.7829 | +| 0.3994 | 1.1024 | 70 | 0.4334 | 0.7850 | +| 0.4235 | 1.1811 | 75 | 0.4298 | 0.7912 | +| 0.4811 | 1.2598 | 80 | 0.4266 | 0.7912 | +| 0.5072 | 1.3386 | 85 | 0.4253 | 0.7912 | +| 0.4405 | 1.4173 | 90 | 0.4228 | 0.7850 | +| 0.5349 | 1.4961 | 95 | 0.4196 | 0.7871 | +| 0.3342 | 1.5748 | 100 | 0.4170 | 0.7829 | +| 0.5271 | 1.6535 | 105 | 0.4149 | 0.7933 | +| 0.3463 | 1.7323 | 110 | 0.4136 | 0.7975 | +| 0.4867 | 1.8110 | 115 | 0.4128 | 0.7996 | +| 0.3221 | 1.8898 | 120 | 0.4125 | 0.7996 | +| 0.3542 | 1.9685 | 125 | 0.4116 | 0.7996 | +| 0.5465 | 2.0472 | 130 | 0.4107 | 0.7996 | +| 0.3427 | 2.1260 | 135 | 0.4101 | 0.7996 | +| 0.4787 | 2.2047 | 140 | 0.4087 | 0.8038 | +| 0.4229 | 2.2835 | 145 | 0.4073 | 0.8017 | +| 0.4514 | 2.3622 | 150 | 0.4063 | 0.8038 | +| 0.5116 | 2.4409 | 155 | 0.4051 | 0.8038 | +| 0.3234 | 2.5197 | 160 | 0.4045 | 0.8058 | +| 0.3993 | 2.5984 | 165 | 0.4040 | 0.8058 | +| 0.3264 | 2.6772 | 170 | 0.4037 | 0.8058 | +| 0.3316 | 2.7559 | 175 | 0.4035 | 0.8038 | +| 0.4855 | 2.8346 | 180 | 0.4035 | 0.8038 | +| 0.536 | 2.9134 | 185 | 0.4036 | 0.8058 | + + +### Framework versions + +- PEFT 0.11.1 +- Transformers 4.42.3 +- Pytorch 2.3.1+cu121 +- Datasets 2.20.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4ca4edbf83a134d5a78b4d59470a3d59c4c1b58 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43bfb892b5ea91afa7f86c8bada7be90677e5c70b940a97a654b8d9d8033873 +size 57249936 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ab7e54cac8e96632f1e1352710c63447a0b4e0f --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8647c3fa8b099084537a1e1d18cb1527b1670a5457e61a01a032fe9dc0b9a8d +size 57249936 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76adb5c705bc411c27d3fd42d871cea738e0e5ee --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321b5e10fb87b7576cee78fc1d54fa78d9bfd4eb030b49146655b6a3407c3732 +size 114624506 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8df86bbc79a2ed932ad48d709145c07e1aa11b0f --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f6da814e1b18dd8e923fecfdf10418b1822dcbda27bb0cdfe3a57c836b108a +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4c5f1e660d96fc61e32a5da0e087c68bd6b6bcc --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584c2aaad272b9d62eaf42244490f5c8de0a7bde020519aa2bbd69b297976670 +size 1064 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1b0140f792b1a787d07951411e763953e4a62780 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,913 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.574803149606299, + "eval_steps": 5, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-120/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-120/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35fdd2ff2fa8b7647a024a8d4bba0fc247324369 --- /dev/null +++ b/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f11851fe23583ec44e576c5fb18b237de7fc159a33346983ad2e4ff1fe4fa74 +size 57249936 diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4860d847d8337f5d3a7c14bc01d61d1ae1d754d3 --- /dev/null +++ b/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea5ab8da21f265c0e6801bfbf1ac9c3ccbd5b9278d7bbafabbd1157bdd838a0 +size 114624506 diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..273e2bdf852533f9f092cbae0eff5e98381bec3b --- /dev/null +++ b/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be7d6bfaa8df62eff912b94645d28825ecb422e4c4bb9e6d312ddb70ef3076e +size 14244 diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ad75972691afe07f44829babce8a0c0d1bb93a5 --- /dev/null +++ b/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78e16bde6c7a374dba3962de0b5722aa1796cc5d6269a54005075f87f7c4bbbd +size 1064 diff --git a/checkpoint-120/special_tokens_map.json b/checkpoint-120/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-120/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-120/tokenizer.json b/checkpoint-120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-120/tokenizer_config.json b/checkpoint-120/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-120/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8b40231e73d9f23418fc5b8f3fd45839470cebf2 --- /dev/null +++ b/checkpoint-120/trainer_state.json @@ -0,0 +1,1089 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.889763779527559, + "eval_steps": 5, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + }, + { + "epoch": 1.590551181102362, + "grad_norm": 5.530861854553223, + "learning_rate": 9.312169312169313e-06, + "loss": 0.6154, + "step": 101 + }, + { + "epoch": 1.6062992125984252, + "grad_norm": 2.849217176437378, + "learning_rate": 9.206349206349207e-06, + "loss": 0.3823, + "step": 102 + }, + { + "epoch": 1.6220472440944882, + "grad_norm": 2.8741447925567627, + "learning_rate": 9.1005291005291e-06, + "loss": 0.2884, + "step": 103 + }, + { + "epoch": 1.6377952755905512, + "grad_norm": 5.099402904510498, + "learning_rate": 8.994708994708995e-06, + "loss": 0.4426, + "step": 104 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 3.130911350250244, + "learning_rate": 8.888888888888888e-06, + "loss": 0.5271, + "step": 105 + }, + { + "epoch": 1.6535433070866141, + "eval_accuracy": 0.7933194154488518, + "eval_loss": 0.41489914059638977, + "eval_runtime": 108.7149, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 105 + }, + { + "epoch": 1.6692913385826773, + "grad_norm": 3.4253950119018555, + "learning_rate": 8.783068783068783e-06, + "loss": 0.4482, + "step": 106 + }, + { + "epoch": 1.6850393700787403, + "grad_norm": 3.2515480518341064, + "learning_rate": 8.677248677248678e-06, + "loss": 0.5227, + "step": 107 + }, + { + "epoch": 1.7007874015748032, + "grad_norm": 3.6166484355926514, + "learning_rate": 8.571428571428571e-06, + "loss": 0.4545, + "step": 108 + }, + { + "epoch": 1.7165354330708662, + "grad_norm": 2.7220921516418457, + "learning_rate": 8.465608465608466e-06, + "loss": 0.3609, + "step": 109 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 2.6449429988861084, + "learning_rate": 8.35978835978836e-06, + "loss": 0.3463, + "step": 110 + }, + { + "epoch": 1.7322834645669292, + "eval_accuracy": 0.7974947807933194, + "eval_loss": 0.41358983516693115, + "eval_runtime": 108.7211, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 110 + }, + { + "epoch": 1.7480314960629921, + "grad_norm": 3.9665286540985107, + "learning_rate": 8.253968253968254e-06, + "loss": 0.5416, + "step": 111 + }, + { + "epoch": 1.763779527559055, + "grad_norm": 3.658632516860962, + "learning_rate": 8.148148148148148e-06, + "loss": 0.423, + "step": 112 + }, + { + "epoch": 1.779527559055118, + "grad_norm": 3.2784206867218018, + "learning_rate": 8.042328042328043e-06, + "loss": 0.4253, + "step": 113 + }, + { + "epoch": 1.795275590551181, + "grad_norm": 2.654160737991333, + "learning_rate": 7.936507936507936e-06, + "loss": 0.4836, + "step": 114 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 4.960519313812256, + "learning_rate": 7.830687830687831e-06, + "loss": 0.4867, + "step": 115 + }, + { + "epoch": 1.811023622047244, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4127565622329712, + "eval_runtime": 108.7361, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 115 + }, + { + "epoch": 1.826771653543307, + "grad_norm": 2.9885411262512207, + "learning_rate": 7.724867724867726e-06, + "loss": 0.3218, + "step": 116 + }, + { + "epoch": 1.84251968503937, + "grad_norm": 3.868762254714966, + "learning_rate": 7.61904761904762e-06, + "loss": 0.5335, + "step": 117 + }, + { + "epoch": 1.858267716535433, + "grad_norm": 3.111746072769165, + "learning_rate": 7.5132275132275136e-06, + "loss": 0.4498, + "step": 118 + }, + { + "epoch": 1.874015748031496, + "grad_norm": 3.94144868850708, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.3923, + "step": 119 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 3.6796834468841553, + "learning_rate": 7.301587301587301e-06, + "loss": 0.3221, + "step": 120 + }, + { + "epoch": 1.889763779527559, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4124543368816376, + "eval_runtime": 108.7265, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 120 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-140/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-140/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a708c2711a4ba323f13b716dd95e8786807e4a6 --- /dev/null +++ b/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9634613a2c088c8b5d76cf128bb2b13d2983aa9f709416b5dccbeba996e4ce22 +size 57249936 diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95a843ae967c9f819a457d2b51c41459c736a0e --- /dev/null +++ b/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92ad2e56c69b32a8142929996b6fd6001e43e082d70288c597ba4ee6681a29a +size 114624506 diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..579f15e29f035a7776adfa69221048924c1af8a4 --- /dev/null +++ b/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ad6846cbe2b442315bce06b1bf11228dd1fe0fc34398f8f8dd07cb01087822 +size 14244 diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ae2410e808260cd6ca3daeee068ada2c97b2f74 --- /dev/null +++ b/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b8a3f811898d722852b6120d7228b006381e652db9d750bc094cc5309ed790 +size 1064 diff --git a/checkpoint-140/special_tokens_map.json b/checkpoint-140/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-140/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-140/tokenizer.json b/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-140/tokenizer_config.json b/checkpoint-140/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-140/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd43ae9a1bcb38d91dd11e5ce78ca0c9c1a35f3 --- /dev/null +++ b/checkpoint-140/trainer_state.json @@ -0,0 +1,1265 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.204724409448819, + "eval_steps": 5, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + }, + { + "epoch": 1.590551181102362, + "grad_norm": 5.530861854553223, + "learning_rate": 9.312169312169313e-06, + "loss": 0.6154, + "step": 101 + }, + { + "epoch": 1.6062992125984252, + "grad_norm": 2.849217176437378, + "learning_rate": 9.206349206349207e-06, + "loss": 0.3823, + "step": 102 + }, + { + "epoch": 1.6220472440944882, + "grad_norm": 2.8741447925567627, + "learning_rate": 9.1005291005291e-06, + "loss": 0.2884, + "step": 103 + }, + { + "epoch": 1.6377952755905512, + "grad_norm": 5.099402904510498, + "learning_rate": 8.994708994708995e-06, + "loss": 0.4426, + "step": 104 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 3.130911350250244, + "learning_rate": 8.888888888888888e-06, + "loss": 0.5271, + "step": 105 + }, + { + "epoch": 1.6535433070866141, + "eval_accuracy": 0.7933194154488518, + "eval_loss": 0.41489914059638977, + "eval_runtime": 108.7149, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 105 + }, + { + "epoch": 1.6692913385826773, + "grad_norm": 3.4253950119018555, + "learning_rate": 8.783068783068783e-06, + "loss": 0.4482, + "step": 106 + }, + { + "epoch": 1.6850393700787403, + "grad_norm": 3.2515480518341064, + "learning_rate": 8.677248677248678e-06, + "loss": 0.5227, + "step": 107 + }, + { + "epoch": 1.7007874015748032, + "grad_norm": 3.6166484355926514, + "learning_rate": 8.571428571428571e-06, + "loss": 0.4545, + "step": 108 + }, + { + "epoch": 1.7165354330708662, + "grad_norm": 2.7220921516418457, + "learning_rate": 8.465608465608466e-06, + "loss": 0.3609, + "step": 109 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 2.6449429988861084, + "learning_rate": 8.35978835978836e-06, + "loss": 0.3463, + "step": 110 + }, + { + "epoch": 1.7322834645669292, + "eval_accuracy": 0.7974947807933194, + "eval_loss": 0.41358983516693115, + "eval_runtime": 108.7211, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 110 + }, + { + "epoch": 1.7480314960629921, + "grad_norm": 3.9665286540985107, + "learning_rate": 8.253968253968254e-06, + "loss": 0.5416, + "step": 111 + }, + { + "epoch": 1.763779527559055, + "grad_norm": 3.658632516860962, + "learning_rate": 8.148148148148148e-06, + "loss": 0.423, + "step": 112 + }, + { + "epoch": 1.779527559055118, + "grad_norm": 3.2784206867218018, + "learning_rate": 8.042328042328043e-06, + "loss": 0.4253, + "step": 113 + }, + { + "epoch": 1.795275590551181, + "grad_norm": 2.654160737991333, + "learning_rate": 7.936507936507936e-06, + "loss": 0.4836, + "step": 114 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 4.960519313812256, + "learning_rate": 7.830687830687831e-06, + "loss": 0.4867, + "step": 115 + }, + { + "epoch": 1.811023622047244, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4127565622329712, + "eval_runtime": 108.7361, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 115 + }, + { + "epoch": 1.826771653543307, + "grad_norm": 2.9885411262512207, + "learning_rate": 7.724867724867726e-06, + "loss": 0.3218, + "step": 116 + }, + { + "epoch": 1.84251968503937, + "grad_norm": 3.868762254714966, + "learning_rate": 7.61904761904762e-06, + "loss": 0.5335, + "step": 117 + }, + { + "epoch": 1.858267716535433, + "grad_norm": 3.111746072769165, + "learning_rate": 7.5132275132275136e-06, + "loss": 0.4498, + "step": 118 + }, + { + "epoch": 1.874015748031496, + "grad_norm": 3.94144868850708, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.3923, + "step": 119 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 3.6796834468841553, + "learning_rate": 7.301587301587301e-06, + "loss": 0.3221, + "step": 120 + }, + { + "epoch": 1.889763779527559, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4124543368816376, + "eval_runtime": 108.7265, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 120 + }, + { + "epoch": 1.905511811023622, + "grad_norm": 3.597407341003418, + "learning_rate": 7.195767195767196e-06, + "loss": 0.5133, + "step": 121 + }, + { + "epoch": 1.9212598425196852, + "grad_norm": 2.6824302673339844, + "learning_rate": 7.08994708994709e-06, + "loss": 0.4173, + "step": 122 + }, + { + "epoch": 1.9370078740157481, + "grad_norm": 3.5606563091278076, + "learning_rate": 6.984126984126984e-06, + "loss": 0.5625, + "step": 123 + }, + { + "epoch": 1.952755905511811, + "grad_norm": 2.981217622756958, + "learning_rate": 6.878306878306879e-06, + "loss": 0.3826, + "step": 124 + }, + { + "epoch": 1.968503937007874, + "grad_norm": 2.708043336868286, + "learning_rate": 6.772486772486773e-06, + "loss": 0.3542, + "step": 125 + }, + { + "epoch": 1.968503937007874, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4116251766681671, + "eval_runtime": 108.7821, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.552, + "step": 125 + }, + { + "epoch": 1.984251968503937, + "grad_norm": 3.5302915573120117, + "learning_rate": 6.666666666666667e-06, + "loss": 0.5818, + "step": 126 + }, + { + "epoch": 2.0, + "grad_norm": 6.032024383544922, + "learning_rate": 6.560846560846561e-06, + "loss": 0.5345, + "step": 127 + }, + { + "epoch": 2.015748031496063, + "grad_norm": 1.919519305229187, + "learning_rate": 6.455026455026455e-06, + "loss": 0.312, + "step": 128 + }, + { + "epoch": 2.031496062992126, + "grad_norm": 3.025320291519165, + "learning_rate": 6.349206349206349e-06, + "loss": 0.4013, + "step": 129 + }, + { + "epoch": 2.047244094488189, + "grad_norm": 4.498544216156006, + "learning_rate": 6.243386243386243e-06, + "loss": 0.5465, + "step": 130 + }, + { + "epoch": 2.047244094488189, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4106995761394501, + "eval_runtime": 108.7564, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 130 + }, + { + "epoch": 2.062992125984252, + "grad_norm": 4.507568359375, + "learning_rate": 6.137566137566138e-06, + "loss": 0.4377, + "step": 131 + }, + { + "epoch": 2.078740157480315, + "grad_norm": 3.309709310531616, + "learning_rate": 6.031746031746032e-06, + "loss": 0.4066, + "step": 132 + }, + { + "epoch": 2.094488188976378, + "grad_norm": 3.5611140727996826, + "learning_rate": 5.925925925925926e-06, + "loss": 0.4138, + "step": 133 + }, + { + "epoch": 2.1102362204724407, + "grad_norm": 5.7032670974731445, + "learning_rate": 5.820105820105821e-06, + "loss": 0.592, + "step": 134 + }, + { + "epoch": 2.1259842519685037, + "grad_norm": 2.3446578979492188, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.3427, + "step": 135 + }, + { + "epoch": 2.1259842519685037, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.41008052229881287, + "eval_runtime": 108.7205, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 135 + }, + { + "epoch": 2.141732283464567, + "grad_norm": 3.090672254562378, + "learning_rate": 5.6084656084656084e-06, + "loss": 0.3996, + "step": 136 + }, + { + "epoch": 2.15748031496063, + "grad_norm": 2.8654699325561523, + "learning_rate": 5.502645502645503e-06, + "loss": 0.3874, + "step": 137 + }, + { + "epoch": 2.173228346456693, + "grad_norm": 2.6311564445495605, + "learning_rate": 5.396825396825397e-06, + "loss": 0.4439, + "step": 138 + }, + { + "epoch": 2.188976377952756, + "grad_norm": 3.5829622745513916, + "learning_rate": 5.291005291005291e-06, + "loss": 0.4736, + "step": 139 + }, + { + "epoch": 2.204724409448819, + "grad_norm": 3.072617769241333, + "learning_rate": 5.185185185185185e-06, + "loss": 0.4787, + "step": 140 + }, + { + "epoch": 2.204724409448819, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40874549746513367, + "eval_runtime": 108.7212, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 140 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-160/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-160/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0426daadf332df2e00420aaf33c2f197fcb2ace1 --- /dev/null +++ b/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b3fd4c30d33e8e09b99b89e86c4bd8b6495e91b19c63efb6e3cfef0e8f8dd3 +size 57249936 diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d277530e1eaeb60cadae7c9107127ba3fd82738 --- /dev/null +++ b/checkpoint-160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7dccef8611036913b2427ee79c1f14be4186562ee2be92c4f054febcc30612 +size 114624506 diff --git a/checkpoint-160/rng_state.pth b/checkpoint-160/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..60f019e49b971b800345a6697e700401aa219622 --- /dev/null +++ b/checkpoint-160/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8bc0c5819f0483d9bd65978b43e15530444b6c6701e9955cde599f10ea2242 +size 14244 diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0140cf9bca2ef4310c6bbf11ac09e3c8b24f3309 --- /dev/null +++ b/checkpoint-160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7e794dd59318981e3bde8ec9797135c2b32e33113140bfb35685b86f266ba6 +size 1064 diff --git a/checkpoint-160/special_tokens_map.json b/checkpoint-160/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-160/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-160/tokenizer.json b/checkpoint-160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-160/tokenizer_config.json b/checkpoint-160/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-160/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e6f705dc8680b4cfa36dd633fd4724e046627b37 --- /dev/null +++ b/checkpoint-160/trainer_state.json @@ -0,0 +1,1441 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5196850393700787, + "eval_steps": 5, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + }, + { + "epoch": 1.590551181102362, + "grad_norm": 5.530861854553223, + "learning_rate": 9.312169312169313e-06, + "loss": 0.6154, + "step": 101 + }, + { + "epoch": 1.6062992125984252, + "grad_norm": 2.849217176437378, + "learning_rate": 9.206349206349207e-06, + "loss": 0.3823, + "step": 102 + }, + { + "epoch": 1.6220472440944882, + "grad_norm": 2.8741447925567627, + "learning_rate": 9.1005291005291e-06, + "loss": 0.2884, + "step": 103 + }, + { + "epoch": 1.6377952755905512, + "grad_norm": 5.099402904510498, + "learning_rate": 8.994708994708995e-06, + "loss": 0.4426, + "step": 104 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 3.130911350250244, + "learning_rate": 8.888888888888888e-06, + "loss": 0.5271, + "step": 105 + }, + { + "epoch": 1.6535433070866141, + "eval_accuracy": 0.7933194154488518, + "eval_loss": 0.41489914059638977, + "eval_runtime": 108.7149, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 105 + }, + { + "epoch": 1.6692913385826773, + "grad_norm": 3.4253950119018555, + "learning_rate": 8.783068783068783e-06, + "loss": 0.4482, + "step": 106 + }, + { + "epoch": 1.6850393700787403, + "grad_norm": 3.2515480518341064, + "learning_rate": 8.677248677248678e-06, + "loss": 0.5227, + "step": 107 + }, + { + "epoch": 1.7007874015748032, + "grad_norm": 3.6166484355926514, + "learning_rate": 8.571428571428571e-06, + "loss": 0.4545, + "step": 108 + }, + { + "epoch": 1.7165354330708662, + "grad_norm": 2.7220921516418457, + "learning_rate": 8.465608465608466e-06, + "loss": 0.3609, + "step": 109 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 2.6449429988861084, + "learning_rate": 8.35978835978836e-06, + "loss": 0.3463, + "step": 110 + }, + { + "epoch": 1.7322834645669292, + "eval_accuracy": 0.7974947807933194, + "eval_loss": 0.41358983516693115, + "eval_runtime": 108.7211, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 110 + }, + { + "epoch": 1.7480314960629921, + "grad_norm": 3.9665286540985107, + "learning_rate": 8.253968253968254e-06, + "loss": 0.5416, + "step": 111 + }, + { + "epoch": 1.763779527559055, + "grad_norm": 3.658632516860962, + "learning_rate": 8.148148148148148e-06, + "loss": 0.423, + "step": 112 + }, + { + "epoch": 1.779527559055118, + "grad_norm": 3.2784206867218018, + "learning_rate": 8.042328042328043e-06, + "loss": 0.4253, + "step": 113 + }, + { + "epoch": 1.795275590551181, + "grad_norm": 2.654160737991333, + "learning_rate": 7.936507936507936e-06, + "loss": 0.4836, + "step": 114 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 4.960519313812256, + "learning_rate": 7.830687830687831e-06, + "loss": 0.4867, + "step": 115 + }, + { + "epoch": 1.811023622047244, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4127565622329712, + "eval_runtime": 108.7361, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 115 + }, + { + "epoch": 1.826771653543307, + "grad_norm": 2.9885411262512207, + "learning_rate": 7.724867724867726e-06, + "loss": 0.3218, + "step": 116 + }, + { + "epoch": 1.84251968503937, + "grad_norm": 3.868762254714966, + "learning_rate": 7.61904761904762e-06, + "loss": 0.5335, + "step": 117 + }, + { + "epoch": 1.858267716535433, + "grad_norm": 3.111746072769165, + "learning_rate": 7.5132275132275136e-06, + "loss": 0.4498, + "step": 118 + }, + { + "epoch": 1.874015748031496, + "grad_norm": 3.94144868850708, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.3923, + "step": 119 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 3.6796834468841553, + "learning_rate": 7.301587301587301e-06, + "loss": 0.3221, + "step": 120 + }, + { + "epoch": 1.889763779527559, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4124543368816376, + "eval_runtime": 108.7265, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 120 + }, + { + "epoch": 1.905511811023622, + "grad_norm": 3.597407341003418, + "learning_rate": 7.195767195767196e-06, + "loss": 0.5133, + "step": 121 + }, + { + "epoch": 1.9212598425196852, + "grad_norm": 2.6824302673339844, + "learning_rate": 7.08994708994709e-06, + "loss": 0.4173, + "step": 122 + }, + { + "epoch": 1.9370078740157481, + "grad_norm": 3.5606563091278076, + "learning_rate": 6.984126984126984e-06, + "loss": 0.5625, + "step": 123 + }, + { + "epoch": 1.952755905511811, + "grad_norm": 2.981217622756958, + "learning_rate": 6.878306878306879e-06, + "loss": 0.3826, + "step": 124 + }, + { + "epoch": 1.968503937007874, + "grad_norm": 2.708043336868286, + "learning_rate": 6.772486772486773e-06, + "loss": 0.3542, + "step": 125 + }, + { + "epoch": 1.968503937007874, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4116251766681671, + "eval_runtime": 108.7821, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.552, + "step": 125 + }, + { + "epoch": 1.984251968503937, + "grad_norm": 3.5302915573120117, + "learning_rate": 6.666666666666667e-06, + "loss": 0.5818, + "step": 126 + }, + { + "epoch": 2.0, + "grad_norm": 6.032024383544922, + "learning_rate": 6.560846560846561e-06, + "loss": 0.5345, + "step": 127 + }, + { + "epoch": 2.015748031496063, + "grad_norm": 1.919519305229187, + "learning_rate": 6.455026455026455e-06, + "loss": 0.312, + "step": 128 + }, + { + "epoch": 2.031496062992126, + "grad_norm": 3.025320291519165, + "learning_rate": 6.349206349206349e-06, + "loss": 0.4013, + "step": 129 + }, + { + "epoch": 2.047244094488189, + "grad_norm": 4.498544216156006, + "learning_rate": 6.243386243386243e-06, + "loss": 0.5465, + "step": 130 + }, + { + "epoch": 2.047244094488189, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4106995761394501, + "eval_runtime": 108.7564, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 130 + }, + { + "epoch": 2.062992125984252, + "grad_norm": 4.507568359375, + "learning_rate": 6.137566137566138e-06, + "loss": 0.4377, + "step": 131 + }, + { + "epoch": 2.078740157480315, + "grad_norm": 3.309709310531616, + "learning_rate": 6.031746031746032e-06, + "loss": 0.4066, + "step": 132 + }, + { + "epoch": 2.094488188976378, + "grad_norm": 3.5611140727996826, + "learning_rate": 5.925925925925926e-06, + "loss": 0.4138, + "step": 133 + }, + { + "epoch": 2.1102362204724407, + "grad_norm": 5.7032670974731445, + "learning_rate": 5.820105820105821e-06, + "loss": 0.592, + "step": 134 + }, + { + "epoch": 2.1259842519685037, + "grad_norm": 2.3446578979492188, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.3427, + "step": 135 + }, + { + "epoch": 2.1259842519685037, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.41008052229881287, + "eval_runtime": 108.7205, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 135 + }, + { + "epoch": 2.141732283464567, + "grad_norm": 3.090672254562378, + "learning_rate": 5.6084656084656084e-06, + "loss": 0.3996, + "step": 136 + }, + { + "epoch": 2.15748031496063, + "grad_norm": 2.8654699325561523, + "learning_rate": 5.502645502645503e-06, + "loss": 0.3874, + "step": 137 + }, + { + "epoch": 2.173228346456693, + "grad_norm": 2.6311564445495605, + "learning_rate": 5.396825396825397e-06, + "loss": 0.4439, + "step": 138 + }, + { + "epoch": 2.188976377952756, + "grad_norm": 3.5829622745513916, + "learning_rate": 5.291005291005291e-06, + "loss": 0.4736, + "step": 139 + }, + { + "epoch": 2.204724409448819, + "grad_norm": 3.072617769241333, + "learning_rate": 5.185185185185185e-06, + "loss": 0.4787, + "step": 140 + }, + { + "epoch": 2.204724409448819, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40874549746513367, + "eval_runtime": 108.7212, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 140 + }, + { + "epoch": 2.220472440944882, + "grad_norm": 3.076615571975708, + "learning_rate": 5.07936507936508e-06, + "loss": 0.3407, + "step": 141 + }, + { + "epoch": 2.236220472440945, + "grad_norm": 3.896620273590088, + "learning_rate": 4.973544973544974e-06, + "loss": 0.2494, + "step": 142 + }, + { + "epoch": 2.251968503937008, + "grad_norm": 2.670351266860962, + "learning_rate": 4.867724867724868e-06, + "loss": 0.2989, + "step": 143 + }, + { + "epoch": 2.267716535433071, + "grad_norm": 3.2045724391937256, + "learning_rate": 4.761904761904762e-06, + "loss": 0.5389, + "step": 144 + }, + { + "epoch": 2.283464566929134, + "grad_norm": 2.343071699142456, + "learning_rate": 4.656084656084656e-06, + "loss": 0.4229, + "step": 145 + }, + { + "epoch": 2.283464566929134, + "eval_accuracy": 0.8016701461377871, + "eval_loss": 0.40726396441459656, + "eval_runtime": 108.722, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 145 + }, + { + "epoch": 2.2992125984251968, + "grad_norm": 4.241441249847412, + "learning_rate": 4.55026455026455e-06, + "loss": 0.3901, + "step": 146 + }, + { + "epoch": 2.3149606299212597, + "grad_norm": 3.265329360961914, + "learning_rate": 4.444444444444444e-06, + "loss": 0.5216, + "step": 147 + }, + { + "epoch": 2.3307086614173227, + "grad_norm": 3.340651273727417, + "learning_rate": 4.338624338624339e-06, + "loss": 0.4494, + "step": 148 + }, + { + "epoch": 2.3464566929133857, + "grad_norm": 3.6961324214935303, + "learning_rate": 4.232804232804233e-06, + "loss": 0.5125, + "step": 149 + }, + { + "epoch": 2.362204724409449, + "grad_norm": 3.192697286605835, + "learning_rate": 4.126984126984127e-06, + "loss": 0.4514, + "step": 150 + }, + { + "epoch": 2.362204724409449, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40633270144462585, + "eval_runtime": 108.7184, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 150 + }, + { + "epoch": 2.377952755905512, + "grad_norm": 2.322136878967285, + "learning_rate": 4.0211640211640215e-06, + "loss": 0.2585, + "step": 151 + }, + { + "epoch": 2.393700787401575, + "grad_norm": 3.902898073196411, + "learning_rate": 3.9153439153439155e-06, + "loss": 0.4682, + "step": 152 + }, + { + "epoch": 2.409448818897638, + "grad_norm": 4.637646675109863, + "learning_rate": 3.80952380952381e-06, + "loss": 0.4195, + "step": 153 + }, + { + "epoch": 2.425196850393701, + "grad_norm": 3.0741162300109863, + "learning_rate": 3.7037037037037037e-06, + "loss": 0.3182, + "step": 154 + }, + { + "epoch": 2.440944881889764, + "grad_norm": 3.675708055496216, + "learning_rate": 3.597883597883598e-06, + "loss": 0.5116, + "step": 155 + }, + { + "epoch": 2.440944881889764, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40514081716537476, + "eval_runtime": 108.7241, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 155 + }, + { + "epoch": 2.456692913385827, + "grad_norm": 5.543003082275391, + "learning_rate": 3.492063492063492e-06, + "loss": 0.4976, + "step": 156 + }, + { + "epoch": 2.47244094488189, + "grad_norm": 4.1292595863342285, + "learning_rate": 3.3862433862433864e-06, + "loss": 0.415, + "step": 157 + }, + { + "epoch": 2.4881889763779528, + "grad_norm": 4.066864967346191, + "learning_rate": 3.2804232804232807e-06, + "loss": 0.4172, + "step": 158 + }, + { + "epoch": 2.5039370078740157, + "grad_norm": 3.1275038719177246, + "learning_rate": 3.1746031746031746e-06, + "loss": 0.4113, + "step": 159 + }, + { + "epoch": 2.5196850393700787, + "grad_norm": 4.276533603668213, + "learning_rate": 3.068783068783069e-06, + "loss": 0.3234, + "step": 160 + }, + { + "epoch": 2.5196850393700787, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40446344017982483, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 160 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-180/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca1e0cdc0c4486108668274e448b05ffb41dbe30 --- /dev/null +++ b/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44c4d00d9a78bb8f56cef9d16ac5f761dcc3d0af43286227eaeae18d976d704 +size 57249936 diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3601ebe011b8c5fd98078bbc38932b153b21ed5b --- /dev/null +++ b/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67bea9ede440dd95febb5ae30ee5940b758b9f4c3f9ec544f47a5067829ad505 +size 114624506 diff --git a/checkpoint-180/rng_state.pth b/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6df6d0fd22f7f82a2d0aec3487cd5500d0b5549a --- /dev/null +++ b/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50f58c9b7c255404f383e3b85d5b02c9432a50fbc06febab2029b8e1592afb3 +size 14244 diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..59c486f8b763a42647a6edff28ab9181b1b046d0 --- /dev/null +++ b/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0379d1e1f61a294d52fd30a62e4f218e7500172e4684e8f28f55f76a47fb92e +size 1064 diff --git a/checkpoint-180/special_tokens_map.json b/checkpoint-180/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-180/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-180/tokenizer.json b/checkpoint-180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-180/tokenizer_config.json b/checkpoint-180/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-180/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..474d2514509f48adc1d44cefc186109629731404 --- /dev/null +++ b/checkpoint-180/trainer_state.json @@ -0,0 +1,1617 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.8346456692913384, + "eval_steps": 5, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + }, + { + "epoch": 1.590551181102362, + "grad_norm": 5.530861854553223, + "learning_rate": 9.312169312169313e-06, + "loss": 0.6154, + "step": 101 + }, + { + "epoch": 1.6062992125984252, + "grad_norm": 2.849217176437378, + "learning_rate": 9.206349206349207e-06, + "loss": 0.3823, + "step": 102 + }, + { + "epoch": 1.6220472440944882, + "grad_norm": 2.8741447925567627, + "learning_rate": 9.1005291005291e-06, + "loss": 0.2884, + "step": 103 + }, + { + "epoch": 1.6377952755905512, + "grad_norm": 5.099402904510498, + "learning_rate": 8.994708994708995e-06, + "loss": 0.4426, + "step": 104 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 3.130911350250244, + "learning_rate": 8.888888888888888e-06, + "loss": 0.5271, + "step": 105 + }, + { + "epoch": 1.6535433070866141, + "eval_accuracy": 0.7933194154488518, + "eval_loss": 0.41489914059638977, + "eval_runtime": 108.7149, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 105 + }, + { + "epoch": 1.6692913385826773, + "grad_norm": 3.4253950119018555, + "learning_rate": 8.783068783068783e-06, + "loss": 0.4482, + "step": 106 + }, + { + "epoch": 1.6850393700787403, + "grad_norm": 3.2515480518341064, + "learning_rate": 8.677248677248678e-06, + "loss": 0.5227, + "step": 107 + }, + { + "epoch": 1.7007874015748032, + "grad_norm": 3.6166484355926514, + "learning_rate": 8.571428571428571e-06, + "loss": 0.4545, + "step": 108 + }, + { + "epoch": 1.7165354330708662, + "grad_norm": 2.7220921516418457, + "learning_rate": 8.465608465608466e-06, + "loss": 0.3609, + "step": 109 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 2.6449429988861084, + "learning_rate": 8.35978835978836e-06, + "loss": 0.3463, + "step": 110 + }, + { + "epoch": 1.7322834645669292, + "eval_accuracy": 0.7974947807933194, + "eval_loss": 0.41358983516693115, + "eval_runtime": 108.7211, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 110 + }, + { + "epoch": 1.7480314960629921, + "grad_norm": 3.9665286540985107, + "learning_rate": 8.253968253968254e-06, + "loss": 0.5416, + "step": 111 + }, + { + "epoch": 1.763779527559055, + "grad_norm": 3.658632516860962, + "learning_rate": 8.148148148148148e-06, + "loss": 0.423, + "step": 112 + }, + { + "epoch": 1.779527559055118, + "grad_norm": 3.2784206867218018, + "learning_rate": 8.042328042328043e-06, + "loss": 0.4253, + "step": 113 + }, + { + "epoch": 1.795275590551181, + "grad_norm": 2.654160737991333, + "learning_rate": 7.936507936507936e-06, + "loss": 0.4836, + "step": 114 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 4.960519313812256, + "learning_rate": 7.830687830687831e-06, + "loss": 0.4867, + "step": 115 + }, + { + "epoch": 1.811023622047244, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4127565622329712, + "eval_runtime": 108.7361, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 115 + }, + { + "epoch": 1.826771653543307, + "grad_norm": 2.9885411262512207, + "learning_rate": 7.724867724867726e-06, + "loss": 0.3218, + "step": 116 + }, + { + "epoch": 1.84251968503937, + "grad_norm": 3.868762254714966, + "learning_rate": 7.61904761904762e-06, + "loss": 0.5335, + "step": 117 + }, + { + "epoch": 1.858267716535433, + "grad_norm": 3.111746072769165, + "learning_rate": 7.5132275132275136e-06, + "loss": 0.4498, + "step": 118 + }, + { + "epoch": 1.874015748031496, + "grad_norm": 3.94144868850708, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.3923, + "step": 119 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 3.6796834468841553, + "learning_rate": 7.301587301587301e-06, + "loss": 0.3221, + "step": 120 + }, + { + "epoch": 1.889763779527559, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4124543368816376, + "eval_runtime": 108.7265, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 120 + }, + { + "epoch": 1.905511811023622, + "grad_norm": 3.597407341003418, + "learning_rate": 7.195767195767196e-06, + "loss": 0.5133, + "step": 121 + }, + { + "epoch": 1.9212598425196852, + "grad_norm": 2.6824302673339844, + "learning_rate": 7.08994708994709e-06, + "loss": 0.4173, + "step": 122 + }, + { + "epoch": 1.9370078740157481, + "grad_norm": 3.5606563091278076, + "learning_rate": 6.984126984126984e-06, + "loss": 0.5625, + "step": 123 + }, + { + "epoch": 1.952755905511811, + "grad_norm": 2.981217622756958, + "learning_rate": 6.878306878306879e-06, + "loss": 0.3826, + "step": 124 + }, + { + "epoch": 1.968503937007874, + "grad_norm": 2.708043336868286, + "learning_rate": 6.772486772486773e-06, + "loss": 0.3542, + "step": 125 + }, + { + "epoch": 1.968503937007874, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4116251766681671, + "eval_runtime": 108.7821, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.552, + "step": 125 + }, + { + "epoch": 1.984251968503937, + "grad_norm": 3.5302915573120117, + "learning_rate": 6.666666666666667e-06, + "loss": 0.5818, + "step": 126 + }, + { + "epoch": 2.0, + "grad_norm": 6.032024383544922, + "learning_rate": 6.560846560846561e-06, + "loss": 0.5345, + "step": 127 + }, + { + "epoch": 2.015748031496063, + "grad_norm": 1.919519305229187, + "learning_rate": 6.455026455026455e-06, + "loss": 0.312, + "step": 128 + }, + { + "epoch": 2.031496062992126, + "grad_norm": 3.025320291519165, + "learning_rate": 6.349206349206349e-06, + "loss": 0.4013, + "step": 129 + }, + { + "epoch": 2.047244094488189, + "grad_norm": 4.498544216156006, + "learning_rate": 6.243386243386243e-06, + "loss": 0.5465, + "step": 130 + }, + { + "epoch": 2.047244094488189, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4106995761394501, + "eval_runtime": 108.7564, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 130 + }, + { + "epoch": 2.062992125984252, + "grad_norm": 4.507568359375, + "learning_rate": 6.137566137566138e-06, + "loss": 0.4377, + "step": 131 + }, + { + "epoch": 2.078740157480315, + "grad_norm": 3.309709310531616, + "learning_rate": 6.031746031746032e-06, + "loss": 0.4066, + "step": 132 + }, + { + "epoch": 2.094488188976378, + "grad_norm": 3.5611140727996826, + "learning_rate": 5.925925925925926e-06, + "loss": 0.4138, + "step": 133 + }, + { + "epoch": 2.1102362204724407, + "grad_norm": 5.7032670974731445, + "learning_rate": 5.820105820105821e-06, + "loss": 0.592, + "step": 134 + }, + { + "epoch": 2.1259842519685037, + "grad_norm": 2.3446578979492188, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.3427, + "step": 135 + }, + { + "epoch": 2.1259842519685037, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.41008052229881287, + "eval_runtime": 108.7205, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 135 + }, + { + "epoch": 2.141732283464567, + "grad_norm": 3.090672254562378, + "learning_rate": 5.6084656084656084e-06, + "loss": 0.3996, + "step": 136 + }, + { + "epoch": 2.15748031496063, + "grad_norm": 2.8654699325561523, + "learning_rate": 5.502645502645503e-06, + "loss": 0.3874, + "step": 137 + }, + { + "epoch": 2.173228346456693, + "grad_norm": 2.6311564445495605, + "learning_rate": 5.396825396825397e-06, + "loss": 0.4439, + "step": 138 + }, + { + "epoch": 2.188976377952756, + "grad_norm": 3.5829622745513916, + "learning_rate": 5.291005291005291e-06, + "loss": 0.4736, + "step": 139 + }, + { + "epoch": 2.204724409448819, + "grad_norm": 3.072617769241333, + "learning_rate": 5.185185185185185e-06, + "loss": 0.4787, + "step": 140 + }, + { + "epoch": 2.204724409448819, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40874549746513367, + "eval_runtime": 108.7212, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 140 + }, + { + "epoch": 2.220472440944882, + "grad_norm": 3.076615571975708, + "learning_rate": 5.07936507936508e-06, + "loss": 0.3407, + "step": 141 + }, + { + "epoch": 2.236220472440945, + "grad_norm": 3.896620273590088, + "learning_rate": 4.973544973544974e-06, + "loss": 0.2494, + "step": 142 + }, + { + "epoch": 2.251968503937008, + "grad_norm": 2.670351266860962, + "learning_rate": 4.867724867724868e-06, + "loss": 0.2989, + "step": 143 + }, + { + "epoch": 2.267716535433071, + "grad_norm": 3.2045724391937256, + "learning_rate": 4.761904761904762e-06, + "loss": 0.5389, + "step": 144 + }, + { + "epoch": 2.283464566929134, + "grad_norm": 2.343071699142456, + "learning_rate": 4.656084656084656e-06, + "loss": 0.4229, + "step": 145 + }, + { + "epoch": 2.283464566929134, + "eval_accuracy": 0.8016701461377871, + "eval_loss": 0.40726396441459656, + "eval_runtime": 108.722, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 145 + }, + { + "epoch": 2.2992125984251968, + "grad_norm": 4.241441249847412, + "learning_rate": 4.55026455026455e-06, + "loss": 0.3901, + "step": 146 + }, + { + "epoch": 2.3149606299212597, + "grad_norm": 3.265329360961914, + "learning_rate": 4.444444444444444e-06, + "loss": 0.5216, + "step": 147 + }, + { + "epoch": 2.3307086614173227, + "grad_norm": 3.340651273727417, + "learning_rate": 4.338624338624339e-06, + "loss": 0.4494, + "step": 148 + }, + { + "epoch": 2.3464566929133857, + "grad_norm": 3.6961324214935303, + "learning_rate": 4.232804232804233e-06, + "loss": 0.5125, + "step": 149 + }, + { + "epoch": 2.362204724409449, + "grad_norm": 3.192697286605835, + "learning_rate": 4.126984126984127e-06, + "loss": 0.4514, + "step": 150 + }, + { + "epoch": 2.362204724409449, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40633270144462585, + "eval_runtime": 108.7184, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 150 + }, + { + "epoch": 2.377952755905512, + "grad_norm": 2.322136878967285, + "learning_rate": 4.0211640211640215e-06, + "loss": 0.2585, + "step": 151 + }, + { + "epoch": 2.393700787401575, + "grad_norm": 3.902898073196411, + "learning_rate": 3.9153439153439155e-06, + "loss": 0.4682, + "step": 152 + }, + { + "epoch": 2.409448818897638, + "grad_norm": 4.637646675109863, + "learning_rate": 3.80952380952381e-06, + "loss": 0.4195, + "step": 153 + }, + { + "epoch": 2.425196850393701, + "grad_norm": 3.0741162300109863, + "learning_rate": 3.7037037037037037e-06, + "loss": 0.3182, + "step": 154 + }, + { + "epoch": 2.440944881889764, + "grad_norm": 3.675708055496216, + "learning_rate": 3.597883597883598e-06, + "loss": 0.5116, + "step": 155 + }, + { + "epoch": 2.440944881889764, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40514081716537476, + "eval_runtime": 108.7241, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 155 + }, + { + "epoch": 2.456692913385827, + "grad_norm": 5.543003082275391, + "learning_rate": 3.492063492063492e-06, + "loss": 0.4976, + "step": 156 + }, + { + "epoch": 2.47244094488189, + "grad_norm": 4.1292595863342285, + "learning_rate": 3.3862433862433864e-06, + "loss": 0.415, + "step": 157 + }, + { + "epoch": 2.4881889763779528, + "grad_norm": 4.066864967346191, + "learning_rate": 3.2804232804232807e-06, + "loss": 0.4172, + "step": 158 + }, + { + "epoch": 2.5039370078740157, + "grad_norm": 3.1275038719177246, + "learning_rate": 3.1746031746031746e-06, + "loss": 0.4113, + "step": 159 + }, + { + "epoch": 2.5196850393700787, + "grad_norm": 4.276533603668213, + "learning_rate": 3.068783068783069e-06, + "loss": 0.3234, + "step": 160 + }, + { + "epoch": 2.5196850393700787, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40446344017982483, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 160 + }, + { + "epoch": 2.5354330708661417, + "grad_norm": 5.183084011077881, + "learning_rate": 2.962962962962963e-06, + "loss": 0.5434, + "step": 161 + }, + { + "epoch": 2.5511811023622046, + "grad_norm": 2.6602373123168945, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.3082, + "step": 162 + }, + { + "epoch": 2.5669291338582676, + "grad_norm": 3.0452637672424316, + "learning_rate": 2.7513227513227516e-06, + "loss": 0.3836, + "step": 163 + }, + { + "epoch": 2.5826771653543306, + "grad_norm": 2.222245931625366, + "learning_rate": 2.6455026455026455e-06, + "loss": 0.3992, + "step": 164 + }, + { + "epoch": 2.5984251968503935, + "grad_norm": 3.8691773414611816, + "learning_rate": 2.53968253968254e-06, + "loss": 0.3993, + "step": 165 + }, + { + "epoch": 2.5984251968503935, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40404006838798523, + "eval_runtime": 108.7316, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 165 + }, + { + "epoch": 2.6141732283464565, + "grad_norm": 3.0435469150543213, + "learning_rate": 2.433862433862434e-06, + "loss": 0.4177, + "step": 166 + }, + { + "epoch": 2.6299212598425195, + "grad_norm": 2.3059229850769043, + "learning_rate": 2.328042328042328e-06, + "loss": 0.2918, + "step": 167 + }, + { + "epoch": 2.6456692913385824, + "grad_norm": 3.192077875137329, + "learning_rate": 2.222222222222222e-06, + "loss": 0.3203, + "step": 168 + }, + { + "epoch": 2.661417322834646, + "grad_norm": 3.519620418548584, + "learning_rate": 2.1164021164021164e-06, + "loss": 0.2996, + "step": 169 + }, + { + "epoch": 2.677165354330709, + "grad_norm": 3.1694862842559814, + "learning_rate": 2.0105820105820108e-06, + "loss": 0.3264, + "step": 170 + }, + { + "epoch": 2.677165354330709, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40368661284446716, + "eval_runtime": 108.7143, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 170 + }, + { + "epoch": 2.6929133858267718, + "grad_norm": 3.3654487133026123, + "learning_rate": 1.904761904761905e-06, + "loss": 0.3288, + "step": 171 + }, + { + "epoch": 2.7086614173228347, + "grad_norm": 4.05765438079834, + "learning_rate": 1.798941798941799e-06, + "loss": 0.4648, + "step": 172 + }, + { + "epoch": 2.7244094488188977, + "grad_norm": 4.653472900390625, + "learning_rate": 1.6931216931216932e-06, + "loss": 0.5563, + "step": 173 + }, + { + "epoch": 2.7401574803149606, + "grad_norm": 4.682580471038818, + "learning_rate": 1.5873015873015873e-06, + "loss": 0.4531, + "step": 174 + }, + { + "epoch": 2.7559055118110236, + "grad_norm": 2.616373300552368, + "learning_rate": 1.4814814814814815e-06, + "loss": 0.3316, + "step": 175 + }, + { + "epoch": 2.7559055118110236, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.4034806787967682, + "eval_runtime": 108.7256, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 175 + }, + { + "epoch": 2.7716535433070866, + "grad_norm": 4.228743076324463, + "learning_rate": 1.3756613756613758e-06, + "loss": 0.5363, + "step": 176 + }, + { + "epoch": 2.7874015748031495, + "grad_norm": 4.0345845222473145, + "learning_rate": 1.26984126984127e-06, + "loss": 0.3705, + "step": 177 + }, + { + "epoch": 2.8031496062992125, + "grad_norm": 3.482527494430542, + "learning_rate": 1.164021164021164e-06, + "loss": 0.3347, + "step": 178 + }, + { + "epoch": 2.8188976377952755, + "grad_norm": 2.3308494091033936, + "learning_rate": 1.0582010582010582e-06, + "loss": 0.3579, + "step": 179 + }, + { + "epoch": 2.8346456692913384, + "grad_norm": 4.280152797698975, + "learning_rate": 9.523809523809525e-07, + "loss": 0.4855, + "step": 180 + }, + { + "epoch": 2.8346456692913384, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.4035497307777405, + "eval_runtime": 108.7255, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 180 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-189/README.md b/checkpoint-189/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-189/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-189/adapter_config.json b/checkpoint-189/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-189/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-189/adapter_model.safetensors b/checkpoint-189/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4ca4edbf83a134d5a78b4d59470a3d59c4c1b58 --- /dev/null +++ b/checkpoint-189/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43bfb892b5ea91afa7f86c8bada7be90677e5c70b940a97a654b8d9d8033873 +size 57249936 diff --git a/checkpoint-189/optimizer.pt b/checkpoint-189/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4eb02610f99211c624c1415b8e731c187e6a22f9 --- /dev/null +++ b/checkpoint-189/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:728bbbbaa90d1b7b2b0a260f25cb73ccbf87ce05fe3f3cc11ad60e48a494fe47 +size 114624506 diff --git a/checkpoint-189/rng_state.pth b/checkpoint-189/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1e6e821a3fe76adbaf50e1ba4c9c21a7df58bbf --- /dev/null +++ b/checkpoint-189/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425537aba3077f51ea7fba5fabf5e86d95cccd1052e7da0102dc4813cc9d6cc2 +size 14244 diff --git a/checkpoint-189/scheduler.pt b/checkpoint-189/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1eea70e235e5919b790335a7caeb1baf3bb143b --- /dev/null +++ b/checkpoint-189/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d394d7525855a5e22bbd60f0290ae7de0d8e65c174c5d85d32a746866a18f755 +size 1064 diff --git a/checkpoint-189/special_tokens_map.json b/checkpoint-189/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-189/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-189/tokenizer.json b/checkpoint-189/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-189/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-189/tokenizer_config.json b/checkpoint-189/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-189/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-189/trainer_state.json b/checkpoint-189/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d225300290eeea5d170fc51b099b19818944aa9a --- /dev/null +++ b/checkpoint-189/trainer_state.json @@ -0,0 +1,1689 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9763779527559056, + "eval_steps": 5, + "global_step": 189, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + }, + { + "epoch": 1.2755905511811023, + "grad_norm": 5.053676605224609, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.5317, + "step": 81 + }, + { + "epoch": 1.2913385826771653, + "grad_norm": 4.484920024871826, + "learning_rate": 1.1322751322751324e-05, + "loss": 0.5474, + "step": 82 + }, + { + "epoch": 1.3070866141732282, + "grad_norm": 4.059377193450928, + "learning_rate": 1.1216931216931217e-05, + "loss": 0.4936, + "step": 83 + }, + { + "epoch": 1.3228346456692912, + "grad_norm": 4.017063617706299, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.3574, + "step": 84 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 3.275650978088379, + "learning_rate": 1.1005291005291006e-05, + "loss": 0.5072, + "step": 85 + }, + { + "epoch": 1.3385826771653544, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4252822697162628, + "eval_runtime": 108.7176, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 85 + }, + { + "epoch": 1.3543307086614174, + "grad_norm": 5.258458614349365, + "learning_rate": 1.0899470899470901e-05, + "loss": 0.6735, + "step": 86 + }, + { + "epoch": 1.3700787401574803, + "grad_norm": 3.070061445236206, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.4365, + "step": 87 + }, + { + "epoch": 1.3858267716535433, + "grad_norm": 3.556974172592163, + "learning_rate": 1.0687830687830689e-05, + "loss": 0.5113, + "step": 88 + }, + { + "epoch": 1.4015748031496063, + "grad_norm": 2.5072743892669678, + "learning_rate": 1.0582010582010582e-05, + "loss": 0.286, + "step": 89 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 4.407125949859619, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.4405, + "step": 90 + }, + { + "epoch": 1.4173228346456692, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.42280957102775574, + "eval_runtime": 108.7146, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 90 + }, + { + "epoch": 1.4330708661417324, + "grad_norm": 6.310215473175049, + "learning_rate": 1.037037037037037e-05, + "loss": 0.6205, + "step": 91 + }, + { + "epoch": 1.4488188976377954, + "grad_norm": 3.586291551589966, + "learning_rate": 1.0264550264550266e-05, + "loss": 0.4571, + "step": 92 + }, + { + "epoch": 1.4645669291338583, + "grad_norm": 4.950135707855225, + "learning_rate": 1.015873015873016e-05, + "loss": 0.7159, + "step": 93 + }, + { + "epoch": 1.4803149606299213, + "grad_norm": 2.9908485412597656, + "learning_rate": 1.0052910052910054e-05, + "loss": 0.4769, + "step": 94 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 4.945335865020752, + "learning_rate": 9.947089947089947e-06, + "loss": 0.5349, + "step": 95 + }, + { + "epoch": 1.4960629921259843, + "eval_accuracy": 0.7870563674321504, + "eval_loss": 0.41962236166000366, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 95 + }, + { + "epoch": 1.5118110236220472, + "grad_norm": 4.648383140563965, + "learning_rate": 9.841269841269842e-06, + "loss": 0.5035, + "step": 96 + }, + { + "epoch": 1.5275590551181102, + "grad_norm": 4.447684288024902, + "learning_rate": 9.735449735449735e-06, + "loss": 0.5128, + "step": 97 + }, + { + "epoch": 1.5433070866141732, + "grad_norm": 3.652973175048828, + "learning_rate": 9.62962962962963e-06, + "loss": 0.3454, + "step": 98 + }, + { + "epoch": 1.5590551181102361, + "grad_norm": 3.083529472351074, + "learning_rate": 9.523809523809525e-06, + "loss": 0.4522, + "step": 99 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 2.6377124786376953, + "learning_rate": 9.417989417989418e-06, + "loss": 0.3342, + "step": 100 + }, + { + "epoch": 1.574803149606299, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.4169768989086151, + "eval_runtime": 108.7386, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 100 + }, + { + "epoch": 1.590551181102362, + "grad_norm": 5.530861854553223, + "learning_rate": 9.312169312169313e-06, + "loss": 0.6154, + "step": 101 + }, + { + "epoch": 1.6062992125984252, + "grad_norm": 2.849217176437378, + "learning_rate": 9.206349206349207e-06, + "loss": 0.3823, + "step": 102 + }, + { + "epoch": 1.6220472440944882, + "grad_norm": 2.8741447925567627, + "learning_rate": 9.1005291005291e-06, + "loss": 0.2884, + "step": 103 + }, + { + "epoch": 1.6377952755905512, + "grad_norm": 5.099402904510498, + "learning_rate": 8.994708994708995e-06, + "loss": 0.4426, + "step": 104 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 3.130911350250244, + "learning_rate": 8.888888888888888e-06, + "loss": 0.5271, + "step": 105 + }, + { + "epoch": 1.6535433070866141, + "eval_accuracy": 0.7933194154488518, + "eval_loss": 0.41489914059638977, + "eval_runtime": 108.7149, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 105 + }, + { + "epoch": 1.6692913385826773, + "grad_norm": 3.4253950119018555, + "learning_rate": 8.783068783068783e-06, + "loss": 0.4482, + "step": 106 + }, + { + "epoch": 1.6850393700787403, + "grad_norm": 3.2515480518341064, + "learning_rate": 8.677248677248678e-06, + "loss": 0.5227, + "step": 107 + }, + { + "epoch": 1.7007874015748032, + "grad_norm": 3.6166484355926514, + "learning_rate": 8.571428571428571e-06, + "loss": 0.4545, + "step": 108 + }, + { + "epoch": 1.7165354330708662, + "grad_norm": 2.7220921516418457, + "learning_rate": 8.465608465608466e-06, + "loss": 0.3609, + "step": 109 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 2.6449429988861084, + "learning_rate": 8.35978835978836e-06, + "loss": 0.3463, + "step": 110 + }, + { + "epoch": 1.7322834645669292, + "eval_accuracy": 0.7974947807933194, + "eval_loss": 0.41358983516693115, + "eval_runtime": 108.7211, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 110 + }, + { + "epoch": 1.7480314960629921, + "grad_norm": 3.9665286540985107, + "learning_rate": 8.253968253968254e-06, + "loss": 0.5416, + "step": 111 + }, + { + "epoch": 1.763779527559055, + "grad_norm": 3.658632516860962, + "learning_rate": 8.148148148148148e-06, + "loss": 0.423, + "step": 112 + }, + { + "epoch": 1.779527559055118, + "grad_norm": 3.2784206867218018, + "learning_rate": 8.042328042328043e-06, + "loss": 0.4253, + "step": 113 + }, + { + "epoch": 1.795275590551181, + "grad_norm": 2.654160737991333, + "learning_rate": 7.936507936507936e-06, + "loss": 0.4836, + "step": 114 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 4.960519313812256, + "learning_rate": 7.830687830687831e-06, + "loss": 0.4867, + "step": 115 + }, + { + "epoch": 1.811023622047244, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4127565622329712, + "eval_runtime": 108.7361, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 115 + }, + { + "epoch": 1.826771653543307, + "grad_norm": 2.9885411262512207, + "learning_rate": 7.724867724867726e-06, + "loss": 0.3218, + "step": 116 + }, + { + "epoch": 1.84251968503937, + "grad_norm": 3.868762254714966, + "learning_rate": 7.61904761904762e-06, + "loss": 0.5335, + "step": 117 + }, + { + "epoch": 1.858267716535433, + "grad_norm": 3.111746072769165, + "learning_rate": 7.5132275132275136e-06, + "loss": 0.4498, + "step": 118 + }, + { + "epoch": 1.874015748031496, + "grad_norm": 3.94144868850708, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.3923, + "step": 119 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 3.6796834468841553, + "learning_rate": 7.301587301587301e-06, + "loss": 0.3221, + "step": 120 + }, + { + "epoch": 1.889763779527559, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4124543368816376, + "eval_runtime": 108.7265, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 120 + }, + { + "epoch": 1.905511811023622, + "grad_norm": 3.597407341003418, + "learning_rate": 7.195767195767196e-06, + "loss": 0.5133, + "step": 121 + }, + { + "epoch": 1.9212598425196852, + "grad_norm": 2.6824302673339844, + "learning_rate": 7.08994708994709e-06, + "loss": 0.4173, + "step": 122 + }, + { + "epoch": 1.9370078740157481, + "grad_norm": 3.5606563091278076, + "learning_rate": 6.984126984126984e-06, + "loss": 0.5625, + "step": 123 + }, + { + "epoch": 1.952755905511811, + "grad_norm": 2.981217622756958, + "learning_rate": 6.878306878306879e-06, + "loss": 0.3826, + "step": 124 + }, + { + "epoch": 1.968503937007874, + "grad_norm": 2.708043336868286, + "learning_rate": 6.772486772486773e-06, + "loss": 0.3542, + "step": 125 + }, + { + "epoch": 1.968503937007874, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4116251766681671, + "eval_runtime": 108.7821, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.552, + "step": 125 + }, + { + "epoch": 1.984251968503937, + "grad_norm": 3.5302915573120117, + "learning_rate": 6.666666666666667e-06, + "loss": 0.5818, + "step": 126 + }, + { + "epoch": 2.0, + "grad_norm": 6.032024383544922, + "learning_rate": 6.560846560846561e-06, + "loss": 0.5345, + "step": 127 + }, + { + "epoch": 2.015748031496063, + "grad_norm": 1.919519305229187, + "learning_rate": 6.455026455026455e-06, + "loss": 0.312, + "step": 128 + }, + { + "epoch": 2.031496062992126, + "grad_norm": 3.025320291519165, + "learning_rate": 6.349206349206349e-06, + "loss": 0.4013, + "step": 129 + }, + { + "epoch": 2.047244094488189, + "grad_norm": 4.498544216156006, + "learning_rate": 6.243386243386243e-06, + "loss": 0.5465, + "step": 130 + }, + { + "epoch": 2.047244094488189, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.4106995761394501, + "eval_runtime": 108.7564, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 130 + }, + { + "epoch": 2.062992125984252, + "grad_norm": 4.507568359375, + "learning_rate": 6.137566137566138e-06, + "loss": 0.4377, + "step": 131 + }, + { + "epoch": 2.078740157480315, + "grad_norm": 3.309709310531616, + "learning_rate": 6.031746031746032e-06, + "loss": 0.4066, + "step": 132 + }, + { + "epoch": 2.094488188976378, + "grad_norm": 3.5611140727996826, + "learning_rate": 5.925925925925926e-06, + "loss": 0.4138, + "step": 133 + }, + { + "epoch": 2.1102362204724407, + "grad_norm": 5.7032670974731445, + "learning_rate": 5.820105820105821e-06, + "loss": 0.592, + "step": 134 + }, + { + "epoch": 2.1259842519685037, + "grad_norm": 2.3446578979492188, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.3427, + "step": 135 + }, + { + "epoch": 2.1259842519685037, + "eval_accuracy": 0.7995824634655533, + "eval_loss": 0.41008052229881287, + "eval_runtime": 108.7205, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 135 + }, + { + "epoch": 2.141732283464567, + "grad_norm": 3.090672254562378, + "learning_rate": 5.6084656084656084e-06, + "loss": 0.3996, + "step": 136 + }, + { + "epoch": 2.15748031496063, + "grad_norm": 2.8654699325561523, + "learning_rate": 5.502645502645503e-06, + "loss": 0.3874, + "step": 137 + }, + { + "epoch": 2.173228346456693, + "grad_norm": 2.6311564445495605, + "learning_rate": 5.396825396825397e-06, + "loss": 0.4439, + "step": 138 + }, + { + "epoch": 2.188976377952756, + "grad_norm": 3.5829622745513916, + "learning_rate": 5.291005291005291e-06, + "loss": 0.4736, + "step": 139 + }, + { + "epoch": 2.204724409448819, + "grad_norm": 3.072617769241333, + "learning_rate": 5.185185185185185e-06, + "loss": 0.4787, + "step": 140 + }, + { + "epoch": 2.204724409448819, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40874549746513367, + "eval_runtime": 108.7212, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 140 + }, + { + "epoch": 2.220472440944882, + "grad_norm": 3.076615571975708, + "learning_rate": 5.07936507936508e-06, + "loss": 0.3407, + "step": 141 + }, + { + "epoch": 2.236220472440945, + "grad_norm": 3.896620273590088, + "learning_rate": 4.973544973544974e-06, + "loss": 0.2494, + "step": 142 + }, + { + "epoch": 2.251968503937008, + "grad_norm": 2.670351266860962, + "learning_rate": 4.867724867724868e-06, + "loss": 0.2989, + "step": 143 + }, + { + "epoch": 2.267716535433071, + "grad_norm": 3.2045724391937256, + "learning_rate": 4.761904761904762e-06, + "loss": 0.5389, + "step": 144 + }, + { + "epoch": 2.283464566929134, + "grad_norm": 2.343071699142456, + "learning_rate": 4.656084656084656e-06, + "loss": 0.4229, + "step": 145 + }, + { + "epoch": 2.283464566929134, + "eval_accuracy": 0.8016701461377871, + "eval_loss": 0.40726396441459656, + "eval_runtime": 108.722, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 145 + }, + { + "epoch": 2.2992125984251968, + "grad_norm": 4.241441249847412, + "learning_rate": 4.55026455026455e-06, + "loss": 0.3901, + "step": 146 + }, + { + "epoch": 2.3149606299212597, + "grad_norm": 3.265329360961914, + "learning_rate": 4.444444444444444e-06, + "loss": 0.5216, + "step": 147 + }, + { + "epoch": 2.3307086614173227, + "grad_norm": 3.340651273727417, + "learning_rate": 4.338624338624339e-06, + "loss": 0.4494, + "step": 148 + }, + { + "epoch": 2.3464566929133857, + "grad_norm": 3.6961324214935303, + "learning_rate": 4.232804232804233e-06, + "loss": 0.5125, + "step": 149 + }, + { + "epoch": 2.362204724409449, + "grad_norm": 3.192697286605835, + "learning_rate": 4.126984126984127e-06, + "loss": 0.4514, + "step": 150 + }, + { + "epoch": 2.362204724409449, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40633270144462585, + "eval_runtime": 108.7184, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 150 + }, + { + "epoch": 2.377952755905512, + "grad_norm": 2.322136878967285, + "learning_rate": 4.0211640211640215e-06, + "loss": 0.2585, + "step": 151 + }, + { + "epoch": 2.393700787401575, + "grad_norm": 3.902898073196411, + "learning_rate": 3.9153439153439155e-06, + "loss": 0.4682, + "step": 152 + }, + { + "epoch": 2.409448818897638, + "grad_norm": 4.637646675109863, + "learning_rate": 3.80952380952381e-06, + "loss": 0.4195, + "step": 153 + }, + { + "epoch": 2.425196850393701, + "grad_norm": 3.0741162300109863, + "learning_rate": 3.7037037037037037e-06, + "loss": 0.3182, + "step": 154 + }, + { + "epoch": 2.440944881889764, + "grad_norm": 3.675708055496216, + "learning_rate": 3.597883597883598e-06, + "loss": 0.5116, + "step": 155 + }, + { + "epoch": 2.440944881889764, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.40514081716537476, + "eval_runtime": 108.7241, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 155 + }, + { + "epoch": 2.456692913385827, + "grad_norm": 5.543003082275391, + "learning_rate": 3.492063492063492e-06, + "loss": 0.4976, + "step": 156 + }, + { + "epoch": 2.47244094488189, + "grad_norm": 4.1292595863342285, + "learning_rate": 3.3862433862433864e-06, + "loss": 0.415, + "step": 157 + }, + { + "epoch": 2.4881889763779528, + "grad_norm": 4.066864967346191, + "learning_rate": 3.2804232804232807e-06, + "loss": 0.4172, + "step": 158 + }, + { + "epoch": 2.5039370078740157, + "grad_norm": 3.1275038719177246, + "learning_rate": 3.1746031746031746e-06, + "loss": 0.4113, + "step": 159 + }, + { + "epoch": 2.5196850393700787, + "grad_norm": 4.276533603668213, + "learning_rate": 3.068783068783069e-06, + "loss": 0.3234, + "step": 160 + }, + { + "epoch": 2.5196850393700787, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40446344017982483, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 160 + }, + { + "epoch": 2.5354330708661417, + "grad_norm": 5.183084011077881, + "learning_rate": 2.962962962962963e-06, + "loss": 0.5434, + "step": 161 + }, + { + "epoch": 2.5511811023622046, + "grad_norm": 2.6602373123168945, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.3082, + "step": 162 + }, + { + "epoch": 2.5669291338582676, + "grad_norm": 3.0452637672424316, + "learning_rate": 2.7513227513227516e-06, + "loss": 0.3836, + "step": 163 + }, + { + "epoch": 2.5826771653543306, + "grad_norm": 2.222245931625366, + "learning_rate": 2.6455026455026455e-06, + "loss": 0.3992, + "step": 164 + }, + { + "epoch": 2.5984251968503935, + "grad_norm": 3.8691773414611816, + "learning_rate": 2.53968253968254e-06, + "loss": 0.3993, + "step": 165 + }, + { + "epoch": 2.5984251968503935, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40404006838798523, + "eval_runtime": 108.7316, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 165 + }, + { + "epoch": 2.6141732283464565, + "grad_norm": 3.0435469150543213, + "learning_rate": 2.433862433862434e-06, + "loss": 0.4177, + "step": 166 + }, + { + "epoch": 2.6299212598425195, + "grad_norm": 2.3059229850769043, + "learning_rate": 2.328042328042328e-06, + "loss": 0.2918, + "step": 167 + }, + { + "epoch": 2.6456692913385824, + "grad_norm": 3.192077875137329, + "learning_rate": 2.222222222222222e-06, + "loss": 0.3203, + "step": 168 + }, + { + "epoch": 2.661417322834646, + "grad_norm": 3.519620418548584, + "learning_rate": 2.1164021164021164e-06, + "loss": 0.2996, + "step": 169 + }, + { + "epoch": 2.677165354330709, + "grad_norm": 3.1694862842559814, + "learning_rate": 2.0105820105820108e-06, + "loss": 0.3264, + "step": 170 + }, + { + "epoch": 2.677165354330709, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.40368661284446716, + "eval_runtime": 108.7143, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 170 + }, + { + "epoch": 2.6929133858267718, + "grad_norm": 3.3654487133026123, + "learning_rate": 1.904761904761905e-06, + "loss": 0.3288, + "step": 171 + }, + { + "epoch": 2.7086614173228347, + "grad_norm": 4.05765438079834, + "learning_rate": 1.798941798941799e-06, + "loss": 0.4648, + "step": 172 + }, + { + "epoch": 2.7244094488188977, + "grad_norm": 4.653472900390625, + "learning_rate": 1.6931216931216932e-06, + "loss": 0.5563, + "step": 173 + }, + { + "epoch": 2.7401574803149606, + "grad_norm": 4.682580471038818, + "learning_rate": 1.5873015873015873e-06, + "loss": 0.4531, + "step": 174 + }, + { + "epoch": 2.7559055118110236, + "grad_norm": 2.616373300552368, + "learning_rate": 1.4814814814814815e-06, + "loss": 0.3316, + "step": 175 + }, + { + "epoch": 2.7559055118110236, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.4034806787967682, + "eval_runtime": 108.7256, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 175 + }, + { + "epoch": 2.7716535433070866, + "grad_norm": 4.228743076324463, + "learning_rate": 1.3756613756613758e-06, + "loss": 0.5363, + "step": 176 + }, + { + "epoch": 2.7874015748031495, + "grad_norm": 4.0345845222473145, + "learning_rate": 1.26984126984127e-06, + "loss": 0.3705, + "step": 177 + }, + { + "epoch": 2.8031496062992125, + "grad_norm": 3.482527494430542, + "learning_rate": 1.164021164021164e-06, + "loss": 0.3347, + "step": 178 + }, + { + "epoch": 2.8188976377952755, + "grad_norm": 2.3308494091033936, + "learning_rate": 1.0582010582010582e-06, + "loss": 0.3579, + "step": 179 + }, + { + "epoch": 2.8346456692913384, + "grad_norm": 4.280152797698975, + "learning_rate": 9.523809523809525e-07, + "loss": 0.4855, + "step": 180 + }, + { + "epoch": 2.8346456692913384, + "eval_accuracy": 0.8037578288100209, + "eval_loss": 0.4035497307777405, + "eval_runtime": 108.7255, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 180 + }, + { + "epoch": 2.850393700787402, + "grad_norm": 3.315803289413452, + "learning_rate": 8.465608465608466e-07, + "loss": 0.4837, + "step": 181 + }, + { + "epoch": 2.866141732283465, + "grad_norm": 2.9597713947296143, + "learning_rate": 7.407407407407407e-07, + "loss": 0.3521, + "step": 182 + }, + { + "epoch": 2.8818897637795278, + "grad_norm": 4.547348976135254, + "learning_rate": 6.34920634920635e-07, + "loss": 0.4159, + "step": 183 + }, + { + "epoch": 2.8976377952755907, + "grad_norm": 3.0314743518829346, + "learning_rate": 5.291005291005291e-07, + "loss": 0.3792, + "step": 184 + }, + { + "epoch": 2.9133858267716537, + "grad_norm": 4.220534801483154, + "learning_rate": 4.232804232804233e-07, + "loss": 0.536, + "step": 185 + }, + { + "epoch": 2.9133858267716537, + "eval_accuracy": 0.8058455114822547, + "eval_loss": 0.4035782217979431, + "eval_runtime": 108.7202, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 185 + }, + { + "epoch": 2.9291338582677167, + "grad_norm": 3.666822910308838, + "learning_rate": 3.174603174603175e-07, + "loss": 0.2668, + "step": 186 + }, + { + "epoch": 2.9448818897637796, + "grad_norm": 5.5073137283325195, + "learning_rate": 2.1164021164021165e-07, + "loss": 0.4872, + "step": 187 + }, + { + "epoch": 2.9606299212598426, + "grad_norm": 2.3061676025390625, + "learning_rate": 1.0582010582010582e-07, + "loss": 0.3233, + "step": 188 + }, + { + "epoch": 2.9763779527559056, + "grad_norm": 3.6440348625183105, + "learning_rate": 0.0, + "loss": 0.4471, + "step": 189 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-189/training_args.bin b/checkpoint-189/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-189/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff4bcaa481f567fb91ac8c046bf7ba29e2a86d45 --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9893f30a9cb7613743f07da410eb3552d6b666da363332a5a99241795acefe9d +size 57249936 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe90a803a07ea770fd17b4a85f96298bf17c99aa --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc5e40985dc8f943ea8b77942dcf14c2048602819e9ebf43bc86b01aef38075 +size 114624506 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4401b64fc3bb7945173b4e9971000e1268e6e0a4 --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f18f657ed475aca3667f23a89001edaf7f2c335182203fec1e1fd71cf4479bc +size 14244 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5f5f98b066aa6ac1c04e7ef3c28693dc5ffe7bf --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75fa1daf363a60e375e535145fb28dce54bce28f2f8899ac379c3926c6446eef +size 1064 diff --git a/checkpoint-20/special_tokens_map.json b/checkpoint-20/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-20/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-20/tokenizer.json b/checkpoint-20/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-20/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-20/tokenizer_config.json b/checkpoint-20/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-20/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e8e55b342f3e80e5eb3838098b3212a7c9d836e3 --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,209 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.31496062992125984, + "eval_steps": 5, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05e60491563177ee700447e9740ee0d2f2e6a565 --- /dev/null +++ b/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734da297e665b33a9e2959b350bcb3fc4bc78d3c8823fdb67544bc507dc39b8e +size 57249936 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..99e114b63fbd446d34b1a281f8c732be66d8f2f0 --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e686c1a3fdb1d8780872f6ecc5cfaa92c12ad9d24cae8ea7742c529c95ab981 +size 114624506 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3138eda43881f8924b17c2c4cb76cd5addd8f94 --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc40b4ef328132ff763caef3899e5db015ab3d9602d8a6ee76c5af85f870582 +size 14244 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb0192ee3ba2c3fd05f98eef02719646693f7d4a --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f6bbd05485692ec6eb3052c0c85cc1aba69f783d8dbaf473232092c122a00b +size 1064 diff --git a/checkpoint-40/special_tokens_map.json b/checkpoint-40/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-40/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-40/tokenizer.json b/checkpoint-40/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-40/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-40/tokenizer_config.json b/checkpoint-40/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-40/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..25e458bd0036acab68fd9da490dcda46e976109a --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,385 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6299212598425197, + "eval_steps": 5, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b6f98c3bb6b75b6756a0136bfadf2bfdb9ff2a6 --- /dev/null +++ b/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6035913dd63cede441fd3d69bfacfb58d7d5a0c3be7c68723a5a5f5d9492f7ef +size 57249936 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f65fa7dfe9427a6a2c7e53f5f420bf9ed0051227 --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a540f9892eda963164517fb3a02bbdca83ffccfcc582f204977e1da2694e37 +size 114624506 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5912df8b3700c9998be466d50f294925cdb3d325 --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9ead20217fb1cc96e2d13581e736ddd21380cf6c8bbf2aff8b98b2109b343a +size 14244 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..546b9db94f1843639360c09b82383d67c61737b0 --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc4a5c4fd0719ef6b857b8e086935250954df62f56a4a7eb30f9b0554773f16 +size 1064 diff --git a/checkpoint-60/special_tokens_map.json b/checkpoint-60/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-60/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-60/tokenizer.json b/checkpoint-60/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-60/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-60/tokenizer_config.json b/checkpoint-60/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-60/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ecd390ed699159506468cb353014aba7e8754557 --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,561 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9448818897637795, + "eval_steps": 5, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a53ea425933059074716d64973f94edb73a8eb80 --- /dev/null +++ b/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e788c33bde47908737d80fe730414dabf5e14ea --- /dev/null +++ b/checkpoint-80/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj" + ], + "task_type": "SEQ_CLS", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3544b978cc372b91bddb3a07d46eb0f3fb9fbe9c --- /dev/null +++ b/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b865022bd7cd279c3b1cc8b775f708e5960734a5056a7aa18a945b93d219ad +size 57249936 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bb99efbe5e22890dc4fee3abf3caa87d4092493 --- /dev/null +++ b/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ea0485d486c0f2ed4d46ca393745d2767a388dde142801d78639e2d9d93af4 +size 114624506 diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3bdc6c1f093b63d182fde082a8d8f0de1faeb9f --- /dev/null +++ b/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7cd80289228b9601df25f5d53b0754887ffd4b15cf77e2f38e94b5fbac32bec +size 14244 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f03969a034d8219bc117acfd60986b540a9a940 --- /dev/null +++ b/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610fecad621e4e5e38db6a84b6098b59532c13f6916096608747065e67ba5b13 +size 1064 diff --git a/checkpoint-80/special_tokens_map.json b/checkpoint-80/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/checkpoint-80/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-80/tokenizer.json b/checkpoint-80/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/checkpoint-80/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/checkpoint-80/tokenizer_config.json b/checkpoint-80/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/checkpoint-80/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ccf254483f05dbbf21b8828bae0148252c6d8c18 --- /dev/null +++ b/checkpoint-80/trainer_state.json @@ -0,0 +1,737 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2598425196850394, + "eval_steps": 5, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015748031496062992, + "grad_norm": 5.510926246643066, + "learning_rate": 1.9894179894179895e-05, + "loss": 0.9249, + "step": 1 + }, + { + "epoch": 0.031496062992125984, + "grad_norm": 8.61505126953125, + "learning_rate": 1.978835978835979e-05, + "loss": 0.8445, + "step": 2 + }, + { + "epoch": 0.047244094488188976, + "grad_norm": 7.036591529846191, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.9654, + "step": 3 + }, + { + "epoch": 0.06299212598425197, + "grad_norm": 5.803933143615723, + "learning_rate": 1.9576719576719577e-05, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 5.716428756713867, + "learning_rate": 1.947089947089947e-05, + "loss": 0.9241, + "step": 5 + }, + { + "epoch": 0.07874015748031496, + "eval_accuracy": 0.5678496868475992, + "eval_loss": 0.6996241807937622, + "eval_runtime": 108.7291, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 5 + }, + { + "epoch": 0.09448818897637795, + "grad_norm": 7.55866813659668, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.9947, + "step": 6 + }, + { + "epoch": 0.11023622047244094, + "grad_norm": 6.801171779632568, + "learning_rate": 1.925925925925926e-05, + "loss": 0.972, + "step": 7 + }, + { + "epoch": 0.12598425196850394, + "grad_norm": 4.845946311950684, + "learning_rate": 1.9153439153439156e-05, + "loss": 0.6478, + "step": 8 + }, + { + "epoch": 0.14173228346456693, + "grad_norm": 10.487945556640625, + "learning_rate": 1.904761904761905e-05, + "loss": 0.8597, + "step": 9 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 5.452786445617676, + "learning_rate": 1.8941798941798943e-05, + "loss": 0.7708, + "step": 10 + }, + { + "epoch": 0.15748031496062992, + "eval_accuracy": 0.6659707724425887, + "eval_loss": 0.6283570528030396, + "eval_runtime": 108.7155, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 10 + }, + { + "epoch": 0.1732283464566929, + "grad_norm": 4.522532939910889, + "learning_rate": 1.8835978835978836e-05, + "loss": 0.6848, + "step": 11 + }, + { + "epoch": 0.1889763779527559, + "grad_norm": 6.4987688064575195, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.6644, + "step": 12 + }, + { + "epoch": 0.2047244094488189, + "grad_norm": 4.2297682762146, + "learning_rate": 1.8624338624338625e-05, + "loss": 0.7227, + "step": 13 + }, + { + "epoch": 0.2204724409448819, + "grad_norm": 6.5658063888549805, + "learning_rate": 1.851851851851852e-05, + "loss": 0.6991, + "step": 14 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 6.549685001373291, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.7875, + "step": 15 + }, + { + "epoch": 0.23622047244094488, + "eval_accuracy": 0.7244258872651357, + "eval_loss": 0.5749094486236572, + "eval_runtime": 108.7121, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 15 + }, + { + "epoch": 0.25196850393700787, + "grad_norm": 3.6349198818206787, + "learning_rate": 1.8306878306878308e-05, + "loss": 0.5732, + "step": 16 + }, + { + "epoch": 0.2677165354330709, + "grad_norm": 4.741979598999023, + "learning_rate": 1.82010582010582e-05, + "loss": 0.5774, + "step": 17 + }, + { + "epoch": 0.28346456692913385, + "grad_norm": 4.751223087310791, + "learning_rate": 1.8095238095238097e-05, + "loss": 0.5738, + "step": 18 + }, + { + "epoch": 0.2992125984251969, + "grad_norm": 5.214819431304932, + "learning_rate": 1.798941798941799e-05, + "loss": 0.7182, + "step": 19 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 5.566962718963623, + "learning_rate": 1.7883597883597884e-05, + "loss": 0.6575, + "step": 20 + }, + { + "epoch": 0.31496062992125984, + "eval_accuracy": 0.7390396659707724, + "eval_loss": 0.5360159873962402, + "eval_runtime": 108.7252, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 20 + }, + { + "epoch": 0.33070866141732286, + "grad_norm": 4.060683727264404, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.5976, + "step": 21 + }, + { + "epoch": 0.3464566929133858, + "grad_norm": 5.9868621826171875, + "learning_rate": 1.7671957671957673e-05, + "loss": 0.7734, + "step": 22 + }, + { + "epoch": 0.36220472440944884, + "grad_norm": 3.4295496940612793, + "learning_rate": 1.7566137566137566e-05, + "loss": 0.5543, + "step": 23 + }, + { + "epoch": 0.3779527559055118, + "grad_norm": 4.587719917297363, + "learning_rate": 1.7460317460317463e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 4.520890235900879, + "learning_rate": 1.7354497354497356e-05, + "loss": 0.6802, + "step": 25 + }, + { + "epoch": 0.3937007874015748, + "eval_accuracy": 0.7432150313152401, + "eval_loss": 0.5086582899093628, + "eval_runtime": 108.7194, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 25 + }, + { + "epoch": 0.4094488188976378, + "grad_norm": 3.653116464614868, + "learning_rate": 1.724867724867725e-05, + "loss": 0.4888, + "step": 26 + }, + { + "epoch": 0.4251968503937008, + "grad_norm": 4.042315483093262, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6004, + "step": 27 + }, + { + "epoch": 0.4409448818897638, + "grad_norm": 5.317520618438721, + "learning_rate": 1.7037037037037038e-05, + "loss": 0.6253, + "step": 28 + }, + { + "epoch": 0.4566929133858268, + "grad_norm": 3.8642020225524902, + "learning_rate": 1.693121693121693e-05, + "loss": 0.5778, + "step": 29 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 2.3941361904144287, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.3982, + "step": 30 + }, + { + "epoch": 0.47244094488188976, + "eval_accuracy": 0.7578288100208769, + "eval_loss": 0.4889708459377289, + "eval_runtime": 108.7183, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 30 + }, + { + "epoch": 0.4881889763779528, + "grad_norm": 4.1248650550842285, + "learning_rate": 1.671957671957672e-05, + "loss": 0.777, + "step": 31 + }, + { + "epoch": 0.5039370078740157, + "grad_norm": 3.369483470916748, + "learning_rate": 1.6613756613756614e-05, + "loss": 0.5675, + "step": 32 + }, + { + "epoch": 0.5196850393700787, + "grad_norm": 3.8457119464874268, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.6227, + "step": 33 + }, + { + "epoch": 0.5354330708661418, + "grad_norm": 4.809354782104492, + "learning_rate": 1.6402116402116404e-05, + "loss": 0.7111, + "step": 34 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 2.84769868850708, + "learning_rate": 1.6296296296296297e-05, + "loss": 0.4555, + "step": 35 + }, + { + "epoch": 0.5511811023622047, + "eval_accuracy": 0.7599164926931107, + "eval_loss": 0.4774630665779114, + "eval_runtime": 108.7145, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 35 + }, + { + "epoch": 0.5669291338582677, + "grad_norm": 4.496406555175781, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.6703, + "step": 36 + }, + { + "epoch": 0.5826771653543307, + "grad_norm": 5.721245288848877, + "learning_rate": 1.6084656084656086e-05, + "loss": 0.7066, + "step": 37 + }, + { + "epoch": 0.5984251968503937, + "grad_norm": 4.494580268859863, + "learning_rate": 1.597883597883598e-05, + "loss": 0.4907, + "step": 38 + }, + { + "epoch": 0.6141732283464567, + "grad_norm": 2.8905560970306396, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.5501, + "step": 39 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 9.776362419128418, + "learning_rate": 1.576719576719577e-05, + "loss": 0.8838, + "step": 40 + }, + { + "epoch": 0.6299212598425197, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.46829721331596375, + "eval_runtime": 108.7189, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 40 + }, + { + "epoch": 0.6456692913385826, + "grad_norm": 3.8481881618499756, + "learning_rate": 1.5661375661375662e-05, + "loss": 0.5309, + "step": 41 + }, + { + "epoch": 0.6614173228346457, + "grad_norm": 6.0327839851379395, + "learning_rate": 1.555555555555556e-05, + "loss": 0.6414, + "step": 42 + }, + { + "epoch": 0.6771653543307087, + "grad_norm": 4.993657112121582, + "learning_rate": 1.544973544973545e-05, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.6929133858267716, + "grad_norm": 4.3265252113342285, + "learning_rate": 1.5343915343915344e-05, + "loss": 0.4913, + "step": 44 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 3.6012353897094727, + "learning_rate": 1.523809523809524e-05, + "loss": 0.4692, + "step": 45 + }, + { + "epoch": 0.7086614173228346, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4610559344291687, + "eval_runtime": 108.7229, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 45 + }, + { + "epoch": 0.7244094488188977, + "grad_norm": 4.319406509399414, + "learning_rate": 1.5132275132275134e-05, + "loss": 0.5203, + "step": 46 + }, + { + "epoch": 0.7401574803149606, + "grad_norm": 3.885263442993164, + "learning_rate": 1.5026455026455027e-05, + "loss": 0.5084, + "step": 47 + }, + { + "epoch": 0.7559055118110236, + "grad_norm": 3.547327995300293, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.442, + "step": 48 + }, + { + "epoch": 0.7716535433070866, + "grad_norm": 3.8868982791900635, + "learning_rate": 1.4814814814814815e-05, + "loss": 0.5848, + "step": 49 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 2.222346544265747, + "learning_rate": 1.470899470899471e-05, + "loss": 0.5455, + "step": 50 + }, + { + "epoch": 0.7874015748031497, + "eval_accuracy": 0.7620041753653445, + "eval_loss": 0.4531377851963043, + "eval_runtime": 108.7528, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 0.552, + "step": 50 + }, + { + "epoch": 0.8031496062992126, + "grad_norm": 3.129575252532959, + "learning_rate": 1.4603174603174603e-05, + "loss": 0.4861, + "step": 51 + }, + { + "epoch": 0.8188976377952756, + "grad_norm": 4.924710750579834, + "learning_rate": 1.44973544973545e-05, + "loss": 0.5782, + "step": 52 + }, + { + "epoch": 0.8346456692913385, + "grad_norm": 5.2157182693481445, + "learning_rate": 1.4391534391534392e-05, + "loss": 0.7203, + "step": 53 + }, + { + "epoch": 0.8503937007874016, + "grad_norm": 4.697371959686279, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.4261, + "step": 54 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 2.8899056911468506, + "learning_rate": 1.417989417989418e-05, + "loss": 0.5696, + "step": 55 + }, + { + "epoch": 0.8661417322834646, + "eval_accuracy": 0.7661795407098121, + "eval_loss": 0.4459321200847626, + "eval_runtime": 108.7951, + "eval_samples_per_second": 4.403, + "eval_steps_per_second": 0.551, + "step": 55 + }, + { + "epoch": 0.8818897637795275, + "grad_norm": 4.532041072845459, + "learning_rate": 1.4074074074074075e-05, + "loss": 0.5723, + "step": 56 + }, + { + "epoch": 0.8976377952755905, + "grad_norm": 2.3436343669891357, + "learning_rate": 1.3968253968253968e-05, + "loss": 0.3629, + "step": 57 + }, + { + "epoch": 0.9133858267716536, + "grad_norm": 3.333158493041992, + "learning_rate": 1.3862433862433865e-05, + "loss": 0.5433, + "step": 58 + }, + { + "epoch": 0.9291338582677166, + "grad_norm": 4.177884101867676, + "learning_rate": 1.3756613756613758e-05, + "loss": 0.3747, + "step": 59 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 5.238712310791016, + "learning_rate": 1.3650793650793652e-05, + "loss": 0.7453, + "step": 60 + }, + { + "epoch": 0.9448818897637795, + "eval_accuracy": 0.7766179540709812, + "eval_loss": 0.4413756728172302, + "eval_runtime": 108.7463, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 60 + }, + { + "epoch": 0.9606299212598425, + "grad_norm": 4.022979736328125, + "learning_rate": 1.3544973544973545e-05, + "loss": 0.6177, + "step": 61 + }, + { + "epoch": 0.9763779527559056, + "grad_norm": 2.0528969764709473, + "learning_rate": 1.343915343915344e-05, + "loss": 0.3505, + "step": 62 + }, + { + "epoch": 0.9921259842519685, + "grad_norm": 3.9705586433410645, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5858, + "step": 63 + }, + { + "epoch": 1.0078740157480315, + "grad_norm": 8.341585159301758, + "learning_rate": 1.322751322751323e-05, + "loss": 0.6721, + "step": 64 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 4.031370162963867, + "learning_rate": 1.3121693121693123e-05, + "loss": 0.5369, + "step": 65 + }, + { + "epoch": 1.0236220472440944, + "eval_accuracy": 0.7828810020876826, + "eval_loss": 0.43705105781555176, + "eval_runtime": 108.7278, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 65 + }, + { + "epoch": 1.0393700787401574, + "grad_norm": 2.898926019668579, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.3628, + "step": 66 + }, + { + "epoch": 1.0551181102362204, + "grad_norm": 2.9200918674468994, + "learning_rate": 1.291005291005291e-05, + "loss": 0.3311, + "step": 67 + }, + { + "epoch": 1.0708661417322836, + "grad_norm": 4.506103992462158, + "learning_rate": 1.2804232804232805e-05, + "loss": 0.5813, + "step": 68 + }, + { + "epoch": 1.0866141732283465, + "grad_norm": 4.187809944152832, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.4802, + "step": 69 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 3.5520920753479004, + "learning_rate": 1.2592592592592593e-05, + "loss": 0.3994, + "step": 70 + }, + { + "epoch": 1.1023622047244095, + "eval_accuracy": 0.7849686847599165, + "eval_loss": 0.43335652351379395, + "eval_runtime": 108.738, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 0.552, + "step": 70 + }, + { + "epoch": 1.1181102362204725, + "grad_norm": 3.6081998348236084, + "learning_rate": 1.2486772486772486e-05, + "loss": 0.5266, + "step": 71 + }, + { + "epoch": 1.1338582677165354, + "grad_norm": 3.6554276943206787, + "learning_rate": 1.2380952380952383e-05, + "loss": 0.5231, + "step": 72 + }, + { + "epoch": 1.1496062992125984, + "grad_norm": 3.551367998123169, + "learning_rate": 1.2275132275132276e-05, + "loss": 0.4538, + "step": 73 + }, + { + "epoch": 1.1653543307086613, + "grad_norm": 4.252958297729492, + "learning_rate": 1.216931216931217e-05, + "loss": 0.4688, + "step": 74 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 4.337672710418701, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.4235, + "step": 75 + }, + { + "epoch": 1.1811023622047245, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.429808109998703, + "eval_runtime": 108.723, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 75 + }, + { + "epoch": 1.1968503937007875, + "grad_norm": 2.607356548309326, + "learning_rate": 1.1957671957671959e-05, + "loss": 0.3639, + "step": 76 + }, + { + "epoch": 1.2125984251968505, + "grad_norm": 3.198551654815674, + "learning_rate": 1.1851851851851852e-05, + "loss": 0.4066, + "step": 77 + }, + { + "epoch": 1.2283464566929134, + "grad_norm": 4.820532321929932, + "learning_rate": 1.1746031746031748e-05, + "loss": 0.5906, + "step": 78 + }, + { + "epoch": 1.2440944881889764, + "grad_norm": 3.5706419944763184, + "learning_rate": 1.1640211640211641e-05, + "loss": 0.5065, + "step": 79 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 4.763455867767334, + "learning_rate": 1.1534391534391536e-05, + "loss": 0.4811, + "step": 80 + }, + { + "epoch": 1.2598425196850394, + "eval_accuracy": 0.791231732776618, + "eval_loss": 0.4265703856945038, + "eval_runtime": 108.7158, + "eval_samples_per_second": 4.406, + "eval_steps_per_second": 0.552, + "step": 80 + } + ], + "logging_steps": 1.0, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6368f7e735fbe4781bf6e956b7c6ad0586df80 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..50e79ef5e39127ac3280c4f578d33786f5afbb5c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1756 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..594d4dba0d053d5c7bfd636fe070834e9478eee4 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e8eb4d94fcecbd8ab0aa24ee61662b7a5da2eef5f366546a325524fc03e575 +size 5112