diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d4e15345d3d46641d2697b0e61529562fc393893
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89546f1cfae05459838f8ecf9be2a4c4d6a451ed955044b1776f32db6abb292e
+size 37803066
diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c99cab23f21fab34aee46f385baed8969c88accd
--- /dev/null
+++ b/checkpoint-100/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6658bb803792a8187620edfb12529ca0e583a5f5c3f4c2680804c9b8a8152cf
+size 14244
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6fe7e19aeb8fdf8353b8f45d5fc26d99589b03c2
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd5b5a8b0c711f0f2e32411bb3a51d421097b304182a799a979fcef82fd40741
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b173de37f6772e8e7aa3d81f609606532775b40
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,131 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 8.16326530612245,
+  "eval_steps": 20,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.5324407405674496e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-120/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-120/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66e58ed6fc075ba82a549db45faa2c9617893cf9
--- /dev/null
+++ b/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fdf10e8bd7abae1cbe41e713657356fab124df9737617b40e6dbf8a20a97e6c
+size 37803066
diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4f27cb6388e85e739bb7084f46d08ad03a158ac7
--- /dev/null
+++ b/checkpoint-120/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3201b9328a8797a342d60e68ac1db3faf1e570a609d56a74a19418179c1013
+size 14244
diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d121de4f810bae45514079b2bc96c14a1d817a52
--- /dev/null
+++ b/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b0293f0ef471ab336f010a06133785c4b0a8abde856102589019680f63c506b
+size 1064
diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5714af71b66de2e73b759de8ac44428dbc701a66
--- /dev/null
+++ b/checkpoint-120/trainer_state.json
@@ -0,0 +1,153 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.795918367346939,
+  "eval_steps": 20,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 3.040578886267699e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-140/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-140/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-140/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0c9cdd5638e754159a6a7273388358dcc051911
--- /dev/null
+++ b/checkpoint-140/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2507b3e5648f990de5bba05fe789ffa15356143e24e38c1084ce8c0970886314
+size 37803066
diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f405ec5b691b268e9f21cb3639c40fa286e223cb
--- /dev/null
+++ b/checkpoint-140/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d46991217c6d21a6618c7cd7c5d36c9b48595eb1380dec7899ffa770f517389
+size 14244
diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..032f42b08d956b20f4e55374f4815780f8b60f43
--- /dev/null
+++ b/checkpoint-140/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bf42dc04fd68d790bde68418c2c0cdd87da27fb120bf5bde1029d4bdb19665c
+size 1064
diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef329f9e82e7c27681794d47244c0bb1a32ecf3b
--- /dev/null
+++ b/checkpoint-140/trainer_state.json
@@ -0,0 +1,175 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 11.428571428571429,
+  "eval_steps": 20,
+  "global_step": 140,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 3.5458651842871296e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-140/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-160/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-160/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-160/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1618d75d50c79e40774cdab37dda941082f5cb78
--- /dev/null
+++ b/checkpoint-160/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:890f55baf5e8c007117520504265367020a0a770a43606d7b93aa2b1f05cbfe4
+size 37803066
diff --git a/checkpoint-160/rng_state.pth b/checkpoint-160/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..44ce9809705a29adb4f2079d45a3ea5694d026a5
--- /dev/null
+++ b/checkpoint-160/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c838e8cb08fdf3fa98e1eae71b7e8c486cc8c28282e66eaab882393e9d12c9ab
+size 14244
diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1d51c8b3568f36e493dd517e930862f7dbdbce9
--- /dev/null
+++ b/checkpoint-160/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b111cd45660eb1face1bcfa06f33e48dc20123917f55da0a1daffb00d391c810
+size 1064
diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5ce2de1eaf533fc36aacbbfd110c1dd20aa7582
--- /dev/null
+++ b/checkpoint-160/trainer_state.json
@@ -0,0 +1,197 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 13.061224489795919,
+  "eval_steps": 20,
+  "global_step": 160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 4.049929261871923e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-160/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-180/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-180/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-180/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b09acde0f147803104931c41606de77fdde4553e
--- /dev/null
+++ b/checkpoint-180/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44a8aee4a9aebded4b15dca69d9b93da6b2cdd72e835b737c908fac001356e9
+size 37803066
diff --git a/checkpoint-180/rng_state.pth b/checkpoint-180/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2d32e810e43d8a76c7fd60bbbf468bfa941b6d39
--- /dev/null
+++ b/checkpoint-180/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a617908633b9e4096bf3a04ee0c0565bf03438029712e268d2687e943ecd9747
+size 14244
diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff63043aacccc559878882da165da5439d5efc0
--- /dev/null
+++ b/checkpoint-180/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4e2940a6b6f7aaca29b769bc58976a81b780ffd50504916097bd9d1c2b94c46
+size 1064
diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..afafe947b21657aa8e729eb21232e45e163165cb
--- /dev/null
+++ b/checkpoint-180/trainer_state.json
@@ -0,0 +1,219 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 14.693877551020408,
+  "eval_steps": 20,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 4.5597988865212416e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-180/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-20/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-20/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-20/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a45a309e68eeef6107130cf121fa0135f6cd7f7a
--- /dev/null
+++ b/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77c802c2cd6845072a759a7e0126fde304b27888c6bdb51b6b63a6ecfd11d175
+size 37803066
diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5702939a2838347c62e18bc55f52dd67e96a2904
--- /dev/null
+++ b/checkpoint-20/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04cbff5311c7f9daf555dae1bb3c44151172e3d399ce87d0ce1da5a72fef680f
+size 14244
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96ed9d6ad9bc0f128428b67ada791af6c178def7
--- /dev/null
+++ b/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a50562abe9aa2ac417f3ee985d4bb2326b0bd2d9e6a5e360bc96423fce4c04d
+size 1064
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..018ce092c06a6eb54ff3f963e706301f90f2bbf7
--- /dev/null
+++ b/checkpoint-20/trainer_state.json
@@ -0,0 +1,43 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.6326530612244898,
+  "eval_steps": 20,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 5.088511076204544e+16,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec8cd0d1f947cff180f9bab66009fc3f18e548dd
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a5144d02de89f00a5ed53255221386d1574471c11b8525ececbfc8b11f67f25
+size 37803066
diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..23bfd5c6e87af8aab83bb4bc084f75b74bbe20e4
--- /dev/null
+++ b/checkpoint-200/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:206992b4f04912343983da7b73ed83acbf14a47c43a14edee824120e0110ade8
+size 14244
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d56425b8efd27463fbd2d66bbf70b8c1d042b0f
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22379c268357b5568e48a9783f66f337f6ef8df705cc8deac592493ee705cfa9
+size 1064
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3793823123219832e1d12e6193a64087d4e69b7e
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,241 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 16.3265306122449,
+  "eval_steps": 20,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 5.064066667511808e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-220/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-220/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-220/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96c5c1f0666160c420e6c1d42a8552c7c9fa4bfc
--- /dev/null
+++ b/checkpoint-220/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d306a3845568e04d8df59750c9406e1958af777a44de5b6185535ee6fd453a3
+size 37803066
diff --git a/checkpoint-220/rng_state.pth b/checkpoint-220/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..763b29ade190b3a4110860c5b85e0ed8c47698a7
--- /dev/null
+++ b/checkpoint-220/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b2948ea62585519f5708d1cda89ceec7dde3fef4b1fef2a8eb381601d6dac58
+size 14244
diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55a7e80c1ed8420abbe6567923f51375f6dde10f
--- /dev/null
+++ b/checkpoint-220/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a64f0d454a159d3ccd6e17566dd964acecdffd5b3b98622ab433d7ecbd307825
+size 1064
diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..58be1ee9bd788ab190bbae8a7068bc7db49d3c87
--- /dev/null
+++ b/checkpoint-220/trainer_state.json
@@ -0,0 +1,263 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 17.959183673469386,
+  "eval_steps": 20,
+  "global_step": 220,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 5.5727140717264896e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-220/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-240/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-240/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-240/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9330c02a12e4617b2050c019eee9e4dfe7772b9
--- /dev/null
+++ b/checkpoint-240/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:878097da1a566e084b88ca358b49b69ebb9a593ebaf880da3a83fbc47cb81891
+size 37803066
diff --git a/checkpoint-240/rng_state.pth b/checkpoint-240/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4e56450bc8566b3c22df43aaa2906c40290a0a72
--- /dev/null
+++ b/checkpoint-240/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d7a544c50efce91ea881dbca7883174d82806d0c52ce0f3dbd064561545ebd
+size 14244
diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf78e6a7f57da6296cfd3be3201b2ca2a66ed0e3
--- /dev/null
+++ b/checkpoint-240/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:415a5a4189084652c317b9015c1e61100bae5bf7d254817316514404398a098b
+size 1064
diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..957875d7c9835334ae7fc61d8c6cab7aeaa0503c
--- /dev/null
+++ b/checkpoint-240/trainer_state.json
@@ -0,0 +1,285 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 19.591836734693878,
+  "eval_steps": 20,
+  "global_step": 240,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 6.077287407825715e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-240/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-260/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-260/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-260/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0db84c98857c3272417d79c3b9528b9723e3593
--- /dev/null
+++ b/checkpoint-260/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90d8c936aaebfe7bea3a6d251d1ed60b6dbcf872e085abdc125d044d3739c49d
+size 37803066
diff --git a/checkpoint-260/rng_state.pth b/checkpoint-260/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e2019406184aa81b18078bb4783fd135eebc8028
--- /dev/null
+++ b/checkpoint-260/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b4e2ad2ca44f0600a3ab318a48e88eb803010feee67ee9407e71d2a6053320e
+size 14244
diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29529e0a7b1b76e1b634628af5c756ad800e0f75
--- /dev/null
+++ b/checkpoint-260/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d895b63f41effe497355f0153f97ca5d1a6b3270d0d6f12268f3b54a24e96965
+size 1064
diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e9a04341d68f678b3d0a800e4b339f3f2c8f14c
--- /dev/null
+++ b/checkpoint-260/trainer_state.json
@@ -0,0 +1,307 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 21.224489795918366,
+  "eval_steps": 20,
+  "global_step": 260,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 6.582471854142259e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-260/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-280/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-280/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-280/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-280/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-280/rng_state.pth b/checkpoint-280/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f0400429829fd97946daae6c21c8d25201c4489b
--- /dev/null
+++ b/checkpoint-280/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:347d5792eabf9bd43326209329f1b3601a53b5bdddf0ef3a34ada6d8b1e92668
+size 14244
diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-280/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce176944640213d2f8a29610d9284aa5d9c5471e
--- /dev/null
+++ b/checkpoint-280/trainer_state.json
@@ -0,0 +1,329 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 22.857142857142858,
+  "eval_steps": 20,
+  "global_step": 280,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 7.091221110059827e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-280/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..efab25fa9cbfe618c376e514400f2a258f8715e5
--- /dev/null
+++ b/checkpoint-300/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84981ca9295d5187672b2531b31e274c3af24f8163b14445136eb862d5aad801
+size 14244
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..107536bdd5a7099ae13a5e873667eb3d6e5804c6
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,351 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 24.489795918367346,
+  "eval_steps": 20,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 7.595081484238848e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-320/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-320/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-320/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-320/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-320/rng_state.pth b/checkpoint-320/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7507837ad1714721b8a0b2883f795fb1e37d7d38
--- /dev/null
+++ b/checkpoint-320/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce2c0909b48d1922ef3159223360263de274b6f03594f8da274fc6e8faf19769
+size 14244
diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-320/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2eeada541a5ca88bece8e3e311674f8bf97899c
--- /dev/null
+++ b/checkpoint-320/trainer_state.json
@@ -0,0 +1,373 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 26.122448979591837,
+  "eval_steps": 20,
+  "global_step": 320,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9285,
+      "eval_samples_per_second": 7.312,
+      "eval_steps_per_second": 0.926,
+      "step": 320
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 8.100978892475597e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-320/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-340/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-340/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-340/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-340/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-340/rng_state.pth b/checkpoint-340/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1639449d25a7fd8ac6a85fecc1a77d0b0f29e9ca
--- /dev/null
+++ b/checkpoint-340/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a67c1b5e86d29d6e5aedc017f188aec4924938d02bd54972ff2e6342888a4f
+size 14244
diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-340/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..24ceab1311d9110198c12928ab4443fce0230cb6
--- /dev/null
+++ b/checkpoint-340/trainer_state.json
@@ -0,0 +1,395 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 27.755102040816325,
+  "eval_steps": 20,
+  "global_step": 340,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9285,
+      "eval_samples_per_second": 7.312,
+      "eval_steps_per_second": 0.926,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": NaN,
+      "eval_runtime": 52.861,
+      "eval_samples_per_second": 7.321,
+      "eval_steps_per_second": 0.927,
+      "step": 340
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 8.609117038175846e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-340/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-360/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-360/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-360/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-360/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-360/rng_state.pth b/checkpoint-360/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5960a709d2b5428f5845e8a91791424bcdf2850a
--- /dev/null
+++ b/checkpoint-360/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9ed654c9415e99c93bc421376e33d7a7e0089249effa9c42e24c972d0597f47
+size 14244
diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-360/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a336059eb1542005697c945e36058d5f1dbae9e
--- /dev/null
+++ b/checkpoint-360/trainer_state.json
@@ -0,0 +1,417 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 29.387755102040817,
+  "eval_steps": 20,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9285,
+      "eval_samples_per_second": 7.312,
+      "eval_steps_per_second": 0.926,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": NaN,
+      "eval_runtime": 52.861,
+      "eval_samples_per_second": 7.321,
+      "eval_steps_per_second": 0.927,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": NaN,
+      "eval_runtime": 52.8275,
+      "eval_samples_per_second": 7.326,
+      "eval_steps_per_second": 0.928,
+      "step": 360
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 9.114505187898163e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-360/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-380/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-380/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-380/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-380/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-380/rng_state.pth b/checkpoint-380/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2f64c029745d86178c128ce71f06e7701e2575cc
--- /dev/null
+++ b/checkpoint-380/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0040a28e95823a97883f8629145d107bac014da68a310598606c72c8c453e0d
+size 14244
diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-380/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3e2ed91823ce529056b87fe6dfb8a2d5be48a8ea
--- /dev/null
+++ b/checkpoint-380/trainer_state.json
@@ -0,0 +1,439 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 31.020408163265305,
+  "eval_steps": 20,
+  "global_step": 380,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9285,
+      "eval_samples_per_second": 7.312,
+      "eval_steps_per_second": 0.926,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": NaN,
+      "eval_runtime": 52.861,
+      "eval_samples_per_second": 7.321,
+      "eval_steps_per_second": 0.927,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": NaN,
+      "eval_runtime": 52.8275,
+      "eval_samples_per_second": 7.326,
+      "eval_steps_per_second": 0.928,
+      "step": 360
+    },
+    {
+      "epoch": 30.2,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 370
+    },
+    {
+      "epoch": 31.02,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 380
+    },
+    {
+      "epoch": 31.02,
+      "eval_loss": NaN,
+      "eval_runtime": 52.7782,
+      "eval_samples_per_second": 7.333,
+      "eval_steps_per_second": 0.928,
+      "step": 380
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 9.618365562077184e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-380/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04d1c044b5cae3c57ee63f860832f8f7135ff1de
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fc1ce9e737f6eb2492effc3f8bd1e5b1ede83b687876c18d19abdc07223a059
+size 37803066
diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a36461c010d695c9b49ddfee73e3da2cb7a7d4ef
--- /dev/null
+++ b/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d809d9991831c5cb9397cb0f02daa61ec7d67baa05fe74d2d00348d9bd993ae5
+size 14244
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c7134b6d9efcd50a1d2b2482a9d3881f5a16f7a6
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2e54836d3914918341c2962ce3c135a265c36faa44b54b23fe18e6f17f61e97
+size 1064
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4e38e0d9d4687713d981f77473c5b84f92a88bc
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,65 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.2653061224489797,
+  "eval_steps": 20,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.0133225920167936e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf351b54d0a3ba259752f00de0e29d3567aba818
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d386540f5deed97f6013adfaf9a218226767bdedf44653d9e173c2d91e0e0d3b
+size 37803066
diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..655e614ac66b4ee2f350c7a464130c0b63480ad7
--- /dev/null
+++ b/checkpoint-400/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91a902094d22903dd53cb590eb65132e652bbbcf064e2753e482f1a209747e0e
+size 14244
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e202b92de9c575940cb790284cb99887813847
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd0124ea87bd9105ebf36fcf80ad5bfb73521162506c1387b3cad1f7fd8427f
+size 1064
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c727aa03a7172b45e2dcfc75576a169bf10900d
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,461 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 32.6530612244898,
+  "eval_steps": 20,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00012599999999999997,
+      "loss": 4.1684,
+      "step": 90
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 4.3018,
+      "step": 100
+    },
+    {
+      "epoch": 8.16,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6674,
+      "eval_samples_per_second": 7.211,
+      "eval_steps_per_second": 0.913,
+      "step": 100
+    },
+    {
+      "epoch": 8.98,
+      "grad_norm": NaN,
+      "learning_rate": 0.000162,
+      "loss": 4.8328,
+      "step": 110
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017099999999999998,
+      "loss": 3.8634,
+      "step": 120
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8298,
+      "eval_samples_per_second": 7.189,
+      "eval_steps_per_second": 0.91,
+      "step": 120
+    },
+    {
+      "epoch": 10.61,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00018299999999999998,
+      "loss": 4.8214,
+      "step": 130
+    },
+    {
+      "epoch": 11.43,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000195,
+      "loss": 5.3188,
+      "step": 140
+    },
+    {
+      "epoch": 11.43,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6779,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 140
+    },
+    {
+      "epoch": 12.24,
+      "grad_norm": NaN,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 3.932,
+      "step": 150
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022799999999999999,
+      "loss": 4.5087,
+      "step": 160
+    },
+    {
+      "epoch": 13.06,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7126,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 160
+    },
+    {
+      "epoch": 13.88,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 4.1573,
+      "step": 170
+    },
+    {
+      "epoch": 14.69,
+      "grad_norm": NaN,
+      "learning_rate": 0.000249,
+      "loss": 3.9824,
+      "step": 180
+    },
+    {
+      "epoch": 14.69,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7148,
+      "eval_samples_per_second": 7.205,
+      "eval_steps_per_second": 0.912,
+      "step": 180
+    },
+    {
+      "epoch": 15.51,
+      "grad_norm": NaN,
+      "learning_rate": 0.000261,
+      "loss": 3.9201,
+      "step": 190
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 3.9784,
+      "step": 200
+    },
+    {
+      "epoch": 16.33,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6797,
+      "eval_samples_per_second": 7.209,
+      "eval_steps_per_second": 0.913,
+      "step": 200
+    },
+    {
+      "epoch": 17.14,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0003,
+      "loss": 4.5915,
+      "step": 210
+    },
+    {
+      "epoch": 17.96,
+      "grad_norm": NaN,
+      "learning_rate": 0.00029699999999999996,
+      "loss": 4.8878,
+      "step": 220
+    },
+    {
+      "epoch": 17.96,
+      "eval_loss": NaN,
+      "eval_runtime": 53.8508,
+      "eval_samples_per_second": 7.187,
+      "eval_steps_per_second": 0.91,
+      "step": 220
+    },
+    {
+      "epoch": 18.78,
+      "grad_norm": NaN,
+      "learning_rate": 0.000294,
+      "loss": 3.5563,
+      "step": 230
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.0,
+      "learning_rate": 0.000289,
+      "loss": 4.5682,
+      "step": 240
+    },
+    {
+      "epoch": 19.59,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6594,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 0.913,
+      "step": 240
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 4.3472,
+      "step": 250
+    },
+    {
+      "epoch": 21.22,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027699999999999996,
+      "loss": 4.911,
+      "step": 260
+    },
+    {
+      "epoch": 21.22,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6876,
+      "eval_samples_per_second": 7.208,
+      "eval_steps_per_second": 0.913,
+      "step": 260
+    },
+    {
+      "epoch": 22.04,
+      "grad_norm": NaN,
+      "learning_rate": 0.000274,
+      "loss": 4.5261,
+      "step": 270
+    },
+    {
+      "epoch": 22.86,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 166.5225,
+      "step": 280
+    },
+    {
+      "epoch": 22.86,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9157,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 0.926,
+      "step": 280
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 24.49,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 24.49,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9419,
+      "eval_samples_per_second": 7.31,
+      "eval_steps_per_second": 0.926,
+      "step": 300
+    },
+    {
+      "epoch": 25.31,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 26.12,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 320
+    },
+    {
+      "epoch": 26.12,
+      "eval_loss": NaN,
+      "eval_runtime": 52.9285,
+      "eval_samples_per_second": 7.312,
+      "eval_steps_per_second": 0.926,
+      "step": 320
+    },
+    {
+      "epoch": 26.94,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 330
+    },
+    {
+      "epoch": 27.76,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 340
+    },
+    {
+      "epoch": 27.76,
+      "eval_loss": NaN,
+      "eval_runtime": 52.861,
+      "eval_samples_per_second": 7.321,
+      "eval_steps_per_second": 0.927,
+      "step": 340
+    },
+    {
+      "epoch": 28.57,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 29.39,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 360
+    },
+    {
+      "epoch": 29.39,
+      "eval_loss": NaN,
+      "eval_runtime": 52.8275,
+      "eval_samples_per_second": 7.326,
+      "eval_steps_per_second": 0.928,
+      "step": 360
+    },
+    {
+      "epoch": 30.2,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 370
+    },
+    {
+      "epoch": 31.02,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 380
+    },
+    {
+      "epoch": 31.02,
+      "eval_loss": NaN,
+      "eval_runtime": 52.7782,
+      "eval_samples_per_second": 7.333,
+      "eval_steps_per_second": 0.928,
+      "step": 380
+    },
+    {
+      "epoch": 31.84,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 390
+    },
+    {
+      "epoch": 32.65,
+      "grad_norm": NaN,
+      "learning_rate": 0.00027299999999999997,
+      "loss": 0.0,
+      "step": 400
+    },
+    {
+      "epoch": 32.65,
+      "eval_loss": NaN,
+      "eval_runtime": 52.8047,
+      "eval_samples_per_second": 7.329,
+      "eval_steps_per_second": 0.928,
+      "step": 400
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.012772592821207e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e224efa9cc2dc4484a3620ddcdc18d7ec1186846
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ada8633ef7c86b2adf3be6188403cdab4575c2607d102c13741b2b70251a4321
+size 37803066
diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a05638b0536ba7729dd3ffc025dc2802bfc68f3c
--- /dev/null
+++ b/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d86ca60a2accd028382a0334d38ca2215e70b45ea221f54ee0044467d365ce4
+size 14244
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb705e8c5e90434d227198a8319ba1148a4dc212
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a04cbdf6466f68f256f2d4627a11d643556e5de365ab5de0f8cd920d5015bec
+size 1064
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b11216c2480d87e24b38d6bdfcb8bef1401d9787
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,87 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.8979591836734695,
+  "eval_steps": 20,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 1.5222755513401344e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4344ae048223e06fb1acf1bb88642a6c10e697d0
--- /dev/null
+++ b/checkpoint-80/README.md
@@ -0,0 +1,204 @@
+---
+library_name: peft
+base_model: tiiuae/falcon-7b
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure 
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+### Framework versions
+
+- PEFT 0.8.2
\ No newline at end of file
diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a2cbc690462b2312eb4258736158a8b1b030fa8
--- /dev/null
+++ b/checkpoint-80/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75
--- /dev/null
+++ b/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
+size 48
diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4735ae95aeefa27162c0b7368a9a9bf627f4b33
--- /dev/null
+++ b/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ec077894d123beb92117ce9810d801c8664f9ca8f0a13e8e54eca7955cb8937
+size 37803066
diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8d4e8654f7377c989ad1cbe76e771ca6949ab8c9
--- /dev/null
+++ b/checkpoint-80/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79810d05db487bc3a5c6b937f97b30085310c874ce60128ad0a7f81421eae126
+size 14244
diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b854514c47d3b1ee29f7f462ccaa6cc4209dd364
--- /dev/null
+++ b/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6ac1536a084330b7fd77c5f2e7beba8797b26586ff6389dba220facbfe4604d
+size 1064
diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..52273d7e9945f053cbb7d6b4e988627258e704b5
--- /dev/null
+++ b/checkpoint-80/trainer_state.json
@@ -0,0 +1,109 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.530612244897959,
+  "eval_steps": 20,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.1748005896806717,
+      "learning_rate": 2.1e-05,
+      "loss": 4.8309,
+      "step": 10
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 0.19310609996318817,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 5.0159,
+      "step": 20
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": NaN,
+      "eval_runtime": 53.474,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 20
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": NaN,
+      "learning_rate": 4.2e-05,
+      "loss": 4.8025,
+      "step": 30
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": NaN,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 4.1014,
+      "step": 40
+    },
+    {
+      "epoch": 3.27,
+      "eval_loss": NaN,
+      "eval_runtime": 53.4777,
+      "eval_samples_per_second": 7.237,
+      "eval_steps_per_second": 0.916,
+      "step": 40
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 0.0,
+      "learning_rate": 7.199999999999999e-05,
+      "loss": 4.7645,
+      "step": 50
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.0,
+      "learning_rate": 9.3e-05,
+      "loss": 4.3747,
+      "step": 60
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": NaN,
+      "eval_runtime": 53.6778,
+      "eval_samples_per_second": 7.21,
+      "eval_steps_per_second": 0.913,
+      "step": 60
+    },
+    {
+      "epoch": 5.71,
+      "grad_norm": NaN,
+      "learning_rate": 0.000102,
+      "loss": 5.0924,
+      "step": 70
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011099999999999999,
+      "loss": 4.3234,
+      "step": 80
+    },
+    {
+      "epoch": 6.53,
+      "eval_loss": NaN,
+      "eval_runtime": 53.7614,
+      "eval_samples_per_second": 7.198,
+      "eval_steps_per_second": 0.911,
+      "step": 80
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 34,
+  "save_steps": 20,
+  "total_flos": 2.0272562942509056e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7de56e2eae9931479f20663b429b090f5e4257e5
--- /dev/null
+++ b/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f95f6bb506054796e9e714a7ca32503916b4a84a4d68384f001ba913152b2ec2
+size 4856
diff --git a/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0 b/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0
new file mode 100644
index 0000000000000000000000000000000000000000..8b71cf219befa66380fd471bafd4c407a69a71ab
--- /dev/null
+++ b/runs/Feb21_07-51-56_nq0jxhxas9/events.out.tfevents.1708501919.nq0jxhxas9.728.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f7dadf7fc0c97d04129b450fdb4d4f790dbc9b9e7d48a9706c527a4a70f3721
+size 19676