diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4e15345d3d46641d2697b0e61529562fc393893 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89546f1cfae05459838f8ecf9be2a4c4d6a451ed955044b1776f32db6abb292e +size 37803066 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c99cab23f21fab34aee46f385baed8969c88accd --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6658bb803792a8187620edfb12529ca0e583a5f5c3f4c2680804c9b8a8152cf +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fe7e19aeb8fdf8353b8f45d5fc26d99589b03c2 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5b5a8b0c711f0f2e32411bb3a51d421097b304182a799a979fcef82fd40741 +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b173de37f6772e8e7aa3d81f609606532775b40 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,131 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.16326530612245, + "eval_steps": 20, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 2.5324407405674496e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-120/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-120/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..66e58ed6fc075ba82a549db45faa2c9617893cf9 --- /dev/null +++ b/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fdf10e8bd7abae1cbe41e713657356fab124df9737617b40e6dbf8a20a97e6c +size 37803066 diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f27cb6388e85e739bb7084f46d08ad03a158ac7 --- /dev/null +++ b/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3201b9328a8797a342d60e68ac1db3faf1e570a609d56a74a19418179c1013 +size 14244 diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d121de4f810bae45514079b2bc96c14a1d817a52 --- /dev/null +++ b/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b0293f0ef471ab336f010a06133785c4b0a8abde856102589019680f63c506b +size 1064 diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5714af71b66de2e73b759de8ac44428dbc701a66 --- /dev/null +++ b/checkpoint-120/trainer_state.json @@ -0,0 +1,153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.795918367346939, + "eval_steps": 20, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 3.040578886267699e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-140/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-140/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0c9cdd5638e754159a6a7273388358dcc051911 --- /dev/null +++ b/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2507b3e5648f990de5bba05fe789ffa15356143e24e38c1084ce8c0970886314 +size 37803066 diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f405ec5b691b268e9f21cb3639c40fa286e223cb --- /dev/null +++ b/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d46991217c6d21a6618c7cd7c5d36c9b48595eb1380dec7899ffa770f517389 +size 14244 diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..032f42b08d956b20f4e55374f4815780f8b60f43 --- /dev/null +++ b/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf42dc04fd68d790bde68418c2c0cdd87da27fb120bf5bde1029d4bdb19665c +size 1064 diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef329f9e82e7c27681794d47244c0bb1a32ecf3b --- /dev/null +++ b/checkpoint-140/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.428571428571429, + "eval_steps": 20, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 3.5458651842871296e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-160/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-160/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1618d75d50c79e40774cdab37dda941082f5cb78 --- /dev/null +++ b/checkpoint-160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890f55baf5e8c007117520504265367020a0a770a43606d7b93aa2b1f05cbfe4 +size 37803066 diff --git a/checkpoint-160/rng_state.pth b/checkpoint-160/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..44ce9809705a29adb4f2079d45a3ea5694d026a5 --- /dev/null +++ b/checkpoint-160/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c838e8cb08fdf3fa98e1eae71b7e8c486cc8c28282e66eaab882393e9d12c9ab +size 14244 diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1d51c8b3568f36e493dd517e930862f7dbdbce9 --- /dev/null +++ b/checkpoint-160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b111cd45660eb1face1bcfa06f33e48dc20123917f55da0a1daffb00d391c810 +size 1064 diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f5ce2de1eaf533fc36aacbbfd110c1dd20aa7582 --- /dev/null +++ b/checkpoint-160/trainer_state.json @@ -0,0 +1,197 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.061224489795919, + "eval_steps": 20, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 4.049929261871923e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-180/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-180/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b09acde0f147803104931c41606de77fdde4553e --- /dev/null +++ b/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44a8aee4a9aebded4b15dca69d9b93da6b2cdd72e835b737c908fac001356e9 +size 37803066 diff --git a/checkpoint-180/rng_state.pth b/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d32e810e43d8a76c7fd60bbbf468bfa941b6d39 --- /dev/null +++ b/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a617908633b9e4096bf3a04ee0c0565bf03438029712e268d2687e943ecd9747 +size 14244 diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ff63043aacccc559878882da165da5439d5efc0 --- /dev/null +++ b/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e2940a6b6f7aaca29b769bc58976a81b780ffd50504916097bd9d1c2b94c46 +size 1064 diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..afafe947b21657aa8e729eb21232e45e163165cb --- /dev/null +++ b/checkpoint-180/trainer_state.json @@ -0,0 +1,219 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.693877551020408, + "eval_steps": 20, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 4.5597988865212416e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a45a309e68eeef6107130cf121fa0135f6cd7f7a --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c802c2cd6845072a759a7e0126fde304b27888c6bdb51b6b63a6ecfd11d175 +size 37803066 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5702939a2838347c62e18bc55f52dd67e96a2904 --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04cbff5311c7f9daf555dae1bb3c44151172e3d399ce87d0ce1da5a72fef680f +size 14244 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..96ed9d6ad9bc0f128428b67ada791af6c178def7 --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a50562abe9aa2ac417f3ee985d4bb2326b0bd2d9e6a5e360bc96423fce4c04d +size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..018ce092c06a6eb54ff3f963e706301f90f2bbf7 --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,43 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6326530612244898, + "eval_steps": 20, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.088511076204544e+16, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec8cd0d1f947cff180f9bab66009fc3f18e548dd --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5144d02de89f00a5ed53255221386d1574471c11b8525ececbfc8b11f67f25 +size 37803066 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..23bfd5c6e87af8aab83bb4bc084f75b74bbe20e4 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206992b4f04912343983da7b73ed83acbf14a47c43a14edee824120e0110ade8 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d56425b8efd27463fbd2d66bbf70b8c1d042b0f --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22379c268357b5568e48a9783f66f337f6ef8df705cc8deac592493ee705cfa9 +size 1064 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3793823123219832e1d12e6193a64087d4e69b7e --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,241 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.3265306122449, + "eval_steps": 20, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.064066667511808e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-220/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-220/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..96c5c1f0666160c420e6c1d42a8552c7c9fa4bfc --- /dev/null +++ b/checkpoint-220/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d306a3845568e04d8df59750c9406e1958af777a44de5b6185535ee6fd453a3 +size 37803066 diff --git a/checkpoint-220/rng_state.pth b/checkpoint-220/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..763b29ade190b3a4110860c5b85e0ed8c47698a7 --- /dev/null +++ b/checkpoint-220/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b2948ea62585519f5708d1cda89ceec7dde3fef4b1fef2a8eb381601d6dac58 +size 14244 diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..55a7e80c1ed8420abbe6567923f51375f6dde10f --- /dev/null +++ b/checkpoint-220/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64f0d454a159d3ccd6e17566dd964acecdffd5b3b98622ab433d7ecbd307825 +size 1064 diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..58be1ee9bd788ab190bbae8a7068bc7db49d3c87 --- /dev/null +++ b/checkpoint-220/trainer_state.json @@ -0,0 +1,263 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.959183673469386, + "eval_steps": 20, + "global_step": 220, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.5727140717264896e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-240/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-240/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9330c02a12e4617b2050c019eee9e4dfe7772b9 --- /dev/null +++ b/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:878097da1a566e084b88ca358b49b69ebb9a593ebaf880da3a83fbc47cb81891 +size 37803066 diff --git a/checkpoint-240/rng_state.pth b/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e56450bc8566b3c22df43aaa2906c40290a0a72 --- /dev/null +++ b/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d7a544c50efce91ea881dbca7883174d82806d0c52ce0f3dbd064561545ebd +size 14244 diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf78e6a7f57da6296cfd3be3201b2ca2a66ed0e3 --- /dev/null +++ b/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415a5a4189084652c317b9015c1e61100bae5bf7d254817316514404398a098b +size 1064 diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..957875d7c9835334ae7fc61d8c6cab7aeaa0503c --- /dev/null +++ b/checkpoint-240/trainer_state.json @@ -0,0 +1,285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.591836734693878, + "eval_steps": 20, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 6.077287407825715e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-260/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-260/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0db84c98857c3272417d79c3b9528b9723e3593 --- /dev/null +++ b/checkpoint-260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d8c936aaebfe7bea3a6d251d1ed60b6dbcf872e085abdc125d044d3739c49d +size 37803066 diff --git a/checkpoint-260/rng_state.pth b/checkpoint-260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e2019406184aa81b18078bb4783fd135eebc8028 --- /dev/null +++ b/checkpoint-260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4e2ad2ca44f0600a3ab318a48e88eb803010feee67ee9407e71d2a6053320e +size 14244 diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29529e0a7b1b76e1b634628af5c756ad800e0f75 --- /dev/null +++ b/checkpoint-260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d895b63f41effe497355f0153f97ca5d1a6b3270d0d6f12268f3b54a24e96965 +size 1064 diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e9a04341d68f678b3d0a800e4b339f3f2c8f14c --- /dev/null +++ b/checkpoint-260/trainer_state.json @@ -0,0 +1,307 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 21.224489795918366, + "eval_steps": 20, + "global_step": 260, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 6.582471854142259e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-280/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-280/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-280/rng_state.pth b/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0400429829fd97946daae6c21c8d25201c4489b --- /dev/null +++ b/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347d5792eabf9bd43326209329f1b3601a53b5bdddf0ef3a34ada6d8b1e92668 +size 14244 diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ce176944640213d2f8a29610d9284aa5d9c5471e --- /dev/null +++ b/checkpoint-280/trainer_state.json @@ -0,0 +1,329 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.857142857142858, + "eval_steps": 20, + "global_step": 280, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 7.091221110059827e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..efab25fa9cbfe618c376e514400f2a258f8715e5 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84981ca9295d5187672b2531b31e274c3af24f8163b14445136eb862d5aad801 +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..107536bdd5a7099ae13a5e873667eb3d6e5804c6 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,351 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.489795918367346, + "eval_steps": 20, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 7.595081484238848e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-320/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-320/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-320/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-320/rng_state.pth b/checkpoint-320/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7507837ad1714721b8a0b2883f795fb1e37d7d38 --- /dev/null +++ b/checkpoint-320/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2c0909b48d1922ef3159223360263de274b6f03594f8da274fc6e8faf19769 +size 14244 diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-320/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2eeada541a5ca88bece8e3e311674f8bf97899c --- /dev/null +++ b/checkpoint-320/trainer_state.json @@ -0,0 +1,373 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 26.122448979591837, + "eval_steps": 20, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + }, + { + "epoch": 25.31, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 310 + }, + { + "epoch": 26.12, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": NaN, + "eval_runtime": 52.9285, + "eval_samples_per_second": 7.312, + "eval_steps_per_second": 0.926, + "step": 320 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 8.100978892475597e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-340/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-340/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-340/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-340/rng_state.pth b/checkpoint-340/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1639449d25a7fd8ac6a85fecc1a77d0b0f29e9ca --- /dev/null +++ b/checkpoint-340/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a67c1b5e86d29d6e5aedc017f188aec4924938d02bd54972ff2e6342888a4f +size 14244 diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-340/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..24ceab1311d9110198c12928ab4443fce0230cb6 --- /dev/null +++ b/checkpoint-340/trainer_state.json @@ -0,0 +1,395 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.755102040816325, + "eval_steps": 20, + "global_step": 340, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + }, + { + "epoch": 25.31, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 310 + }, + { + "epoch": 26.12, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": NaN, + "eval_runtime": 52.9285, + "eval_samples_per_second": 7.312, + "eval_steps_per_second": 0.926, + "step": 320 + }, + { + "epoch": 26.94, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 330 + }, + { + "epoch": 27.76, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": NaN, + "eval_runtime": 52.861, + "eval_samples_per_second": 7.321, + "eval_steps_per_second": 0.927, + "step": 340 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 8.609117038175846e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-360/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-360/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-360/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-360/rng_state.pth b/checkpoint-360/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5960a709d2b5428f5845e8a91791424bcdf2850a --- /dev/null +++ b/checkpoint-360/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ed654c9415e99c93bc421376e33d7a7e0089249effa9c42e24c972d0597f47 +size 14244 diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-360/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5a336059eb1542005697c945e36058d5f1dbae9e --- /dev/null +++ b/checkpoint-360/trainer_state.json @@ -0,0 +1,417 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.387755102040817, + "eval_steps": 20, + "global_step": 360, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + }, + { + "epoch": 25.31, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 310 + }, + { + "epoch": 26.12, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": NaN, + "eval_runtime": 52.9285, + "eval_samples_per_second": 7.312, + "eval_steps_per_second": 0.926, + "step": 320 + }, + { + "epoch": 26.94, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 330 + }, + { + "epoch": 27.76, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": NaN, + "eval_runtime": 52.861, + "eval_samples_per_second": 7.321, + "eval_steps_per_second": 0.927, + "step": 340 + }, + { + "epoch": 28.57, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 350 + }, + { + "epoch": 29.39, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": NaN, + "eval_runtime": 52.8275, + "eval_samples_per_second": 7.326, + "eval_steps_per_second": 0.928, + "step": 360 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 9.114505187898163e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-380/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-380/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-380/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-380/rng_state.pth b/checkpoint-380/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f64c029745d86178c128ce71f06e7701e2575cc --- /dev/null +++ b/checkpoint-380/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0040a28e95823a97883f8629145d107bac014da68a310598606c72c8c453e0d +size 14244 diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-380/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e2ed91823ce529056b87fe6dfb8a2d5be48a8ea --- /dev/null +++ b/checkpoint-380/trainer_state.json @@ -0,0 +1,439 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 31.020408163265305, + "eval_steps": 20, + "global_step": 380, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + }, + { + "epoch": 25.31, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 310 + }, + { + "epoch": 26.12, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": NaN, + "eval_runtime": 52.9285, + "eval_samples_per_second": 7.312, + "eval_steps_per_second": 0.926, + "step": 320 + }, + { + "epoch": 26.94, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 330 + }, + { + "epoch": 27.76, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": NaN, + "eval_runtime": 52.861, + "eval_samples_per_second": 7.321, + "eval_steps_per_second": 0.927, + "step": 340 + }, + { + "epoch": 28.57, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 350 + }, + { + "epoch": 29.39, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": NaN, + "eval_runtime": 52.8275, + "eval_samples_per_second": 7.326, + "eval_steps_per_second": 0.928, + "step": 360 + }, + { + "epoch": 30.2, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 370 + }, + { + "epoch": 31.02, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 380 + }, + { + "epoch": 31.02, + "eval_loss": NaN, + "eval_runtime": 52.7782, + "eval_samples_per_second": 7.333, + "eval_steps_per_second": 0.928, + "step": 380 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 9.618365562077184e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..04d1c044b5cae3c57ee63f860832f8f7135ff1de --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc1ce9e737f6eb2492effc3f8bd1e5b1ede83b687876c18d19abdc07223a059 +size 37803066 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a36461c010d695c9b49ddfee73e3da2cb7a7d4ef --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d809d9991831c5cb9397cb0f02daa61ec7d67baa05fe74d2d00348d9bd993ae5 +size 14244 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7134b6d9efcd50a1d2b2482a9d3881f5a16f7a6 --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e54836d3914918341c2962ce3c135a265c36faa44b54b23fe18e6f17f61e97 +size 1064 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e4e38e0d9d4687713d981f77473c5b84f92a88bc --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,65 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.2653061224489797, + "eval_steps": 20, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.0133225920167936e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b +size 37803066 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..655e614ac66b4ee2f350c7a464130c0b63480ad7 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a902094d22903dd53cb590eb65132e652bbbcf064e2753e482f1a209747e0e +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f +size 1064 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5c727aa03a7172b45e2dcfc75576a169bf10900d --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,461 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 32.6530612244898, + "eval_steps": 20, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + }, + { + "epoch": 7.35, + "grad_norm": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 4.1684, + "step": 90 + }, + { + "epoch": 8.16, + "grad_norm": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 4.3018, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": NaN, + "eval_runtime": 53.6674, + "eval_samples_per_second": 7.211, + "eval_steps_per_second": 0.913, + "step": 100 + }, + { + "epoch": 8.98, + "grad_norm": NaN, + "learning_rate": 0.000162, + "loss": 4.8328, + "step": 110 + }, + { + "epoch": 9.8, + "grad_norm": NaN, + "learning_rate": 0.00017099999999999998, + "loss": 3.8634, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": NaN, + "eval_runtime": 53.8298, + "eval_samples_per_second": 7.189, + "eval_steps_per_second": 0.91, + "step": 120 + }, + { + "epoch": 10.61, + "grad_norm": 0.0, + "learning_rate": 0.00018299999999999998, + "loss": 4.8214, + "step": 130 + }, + { + "epoch": 11.43, + "grad_norm": 0.0, + "learning_rate": 0.000195, + "loss": 5.3188, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": NaN, + "eval_runtime": 53.6779, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 140 + }, + { + "epoch": 12.24, + "grad_norm": NaN, + "learning_rate": 0.00020999999999999998, + "loss": 3.932, + "step": 150 + }, + { + "epoch": 13.06, + "grad_norm": NaN, + "learning_rate": 0.00022799999999999999, + "loss": 4.5087, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": NaN, + "eval_runtime": 53.7126, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 160 + }, + { + "epoch": 13.88, + "grad_norm": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 4.1573, + "step": 170 + }, + { + "epoch": 14.69, + "grad_norm": NaN, + "learning_rate": 0.000249, + "loss": 3.9824, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": NaN, + "eval_runtime": 53.7148, + "eval_samples_per_second": 7.205, + "eval_steps_per_second": 0.912, + "step": 180 + }, + { + "epoch": 15.51, + "grad_norm": NaN, + "learning_rate": 0.000261, + "loss": 3.9201, + "step": 190 + }, + { + "epoch": 16.33, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 3.9784, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": NaN, + "eval_runtime": 53.6797, + "eval_samples_per_second": 7.209, + "eval_steps_per_second": 0.913, + "step": 200 + }, + { + "epoch": 17.14, + "grad_norm": 0.0, + "learning_rate": 0.0003, + "loss": 4.5915, + "step": 210 + }, + { + "epoch": 17.96, + "grad_norm": NaN, + "learning_rate": 0.00029699999999999996, + "loss": 4.8878, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": NaN, + "eval_runtime": 53.8508, + "eval_samples_per_second": 7.187, + "eval_steps_per_second": 0.91, + "step": 220 + }, + { + "epoch": 18.78, + "grad_norm": NaN, + "learning_rate": 0.000294, + "loss": 3.5563, + "step": 230 + }, + { + "epoch": 19.59, + "grad_norm": 0.0, + "learning_rate": 0.000289, + "loss": 4.5682, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": NaN, + "eval_runtime": 53.6594, + "eval_samples_per_second": 7.212, + "eval_steps_per_second": 0.913, + "step": 240 + }, + { + "epoch": 20.41, + "grad_norm": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 4.3472, + "step": 250 + }, + { + "epoch": 21.22, + "grad_norm": NaN, + "learning_rate": 0.00027699999999999996, + "loss": 4.911, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": NaN, + "eval_runtime": 53.6876, + "eval_samples_per_second": 7.208, + "eval_steps_per_second": 0.913, + "step": 260 + }, + { + "epoch": 22.04, + "grad_norm": NaN, + "learning_rate": 0.000274, + "loss": 4.5261, + "step": 270 + }, + { + "epoch": 22.86, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 166.5225, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": NaN, + "eval_runtime": 52.9157, + "eval_samples_per_second": 7.314, + "eval_steps_per_second": 0.926, + "step": 280 + }, + { + "epoch": 23.67, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 290 + }, + { + "epoch": 24.49, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": NaN, + "eval_runtime": 52.9419, + "eval_samples_per_second": 7.31, + "eval_steps_per_second": 0.926, + "step": 300 + }, + { + "epoch": 25.31, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 310 + }, + { + "epoch": 26.12, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": NaN, + "eval_runtime": 52.9285, + "eval_samples_per_second": 7.312, + "eval_steps_per_second": 0.926, + "step": 320 + }, + { + "epoch": 26.94, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 330 + }, + { + "epoch": 27.76, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": NaN, + "eval_runtime": 52.861, + "eval_samples_per_second": 7.321, + "eval_steps_per_second": 0.927, + "step": 340 + }, + { + "epoch": 28.57, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 350 + }, + { + "epoch": 29.39, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": NaN, + "eval_runtime": 52.8275, + "eval_samples_per_second": 7.326, + "eval_steps_per_second": 0.928, + "step": 360 + }, + { + "epoch": 30.2, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 370 + }, + { + "epoch": 31.02, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 380 + }, + { + "epoch": 31.02, + "eval_loss": NaN, + "eval_runtime": 52.7782, + "eval_samples_per_second": 7.333, + "eval_steps_per_second": 0.928, + "step": 380 + }, + { + "epoch": 31.84, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 390 + }, + { + "epoch": 32.65, + "grad_norm": NaN, + "learning_rate": 0.00027299999999999997, + "loss": 0.0, + "step": 400 + }, + { + "epoch": 32.65, + "eval_loss": NaN, + "eval_runtime": 52.8047, + "eval_samples_per_second": 7.329, + "eval_steps_per_second": 0.928, + "step": 400 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.012772592821207e+18, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e224efa9cc2dc4484a3620ddcdc18d7ec1186846 --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada8633ef7c86b2adf3be6188403cdab4575c2607d102c13741b2b70251a4321 +size 37803066 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a05638b0536ba7729dd3ffc025dc2802bfc68f3c --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d86ca60a2accd028382a0334d38ca2215e70b45ea221f54ee0044467d365ce4 +size 14244 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb705e8c5e90434d227198a8319ba1148a4dc212 --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a04cbdf6466f68f256f2d4627a11d643556e5de365ab5de0f8cd920d5015bec +size 1064 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b11216c2480d87e24b38d6bdfcb8bef1401d9787 --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,87 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.8979591836734695, + "eval_steps": 20, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.5222755513401344e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0 --- /dev/null +++ b/checkpoint-80/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: tiiuae/falcon-7b +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8 --- /dev/null +++ b/checkpoint-80/adapter_config.json @@ -0,0 +1,26 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "tiiuae/falcon-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4735ae95aeefa27162c0b7368a9a9bf627f4b33 --- /dev/null +++ b/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec077894d123beb92117ce9810d801c8664f9ca8f0a13e8e54eca7955cb8937 +size 37803066 diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d4e8654f7377c989ad1cbe76e771ca6949ab8c9 --- /dev/null +++ b/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79810d05db487bc3a5c6b937f97b30085310c874ce60128ad0a7f81421eae126 +size 14244 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b854514c47d3b1ee29f7f462ccaa6cc4209dd364 --- /dev/null +++ b/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ac1536a084330b7fd77c5f2e7beba8797b26586ff6389dba220facbfe4604d +size 1064 diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..52273d7e9945f053cbb7d6b4e988627258e704b5 --- /dev/null +++ b/checkpoint-80/trainer_state.json @@ -0,0 +1,109 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.530612244897959, + "eval_steps": 20, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "grad_norm": 0.1748005896806717, + "learning_rate": 2.1e-05, + "loss": 4.8309, + "step": 10 + }, + { + "epoch": 1.63, + "grad_norm": 0.19310609996318817, + "learning_rate": 3.2999999999999996e-05, + "loss": 5.0159, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": NaN, + "eval_runtime": 53.474, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 20 + }, + { + "epoch": 2.45, + "grad_norm": NaN, + "learning_rate": 4.2e-05, + "loss": 4.8025, + "step": 30 + }, + { + "epoch": 3.27, + "grad_norm": NaN, + "learning_rate": 5.399999999999999e-05, + "loss": 4.1014, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": NaN, + "eval_runtime": 53.4777, + "eval_samples_per_second": 7.237, + "eval_steps_per_second": 0.916, + "step": 40 + }, + { + "epoch": 4.08, + "grad_norm": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7645, + "step": 50 + }, + { + "epoch": 4.9, + "grad_norm": 0.0, + "learning_rate": 9.3e-05, + "loss": 4.3747, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": NaN, + "eval_runtime": 53.6778, + "eval_samples_per_second": 7.21, + "eval_steps_per_second": 0.913, + "step": 60 + }, + { + "epoch": 5.71, + "grad_norm": NaN, + "learning_rate": 0.000102, + "loss": 5.0924, + "step": 70 + }, + { + "epoch": 6.53, + "grad_norm": NaN, + "learning_rate": 0.00011099999999999999, + "loss": 4.3234, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": NaN, + "eval_runtime": 53.7614, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 0.911, + "step": 80 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 2.0272562942509056e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5 --- /dev/null +++ b/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2 +size 4856 diff --git a/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0 b/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0 new file mode 100644 index 0000000000000000000000000000000000000000..8b71cf219befa66380fd471bafd4c407a69a71ab --- /dev/null +++ b/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7dadf7fc0c97d04129b450fdb4d4f790dbc9b9e7d48a9706c527a4a70f3721 +size 19676