joseagmz commited on
Commit
f59a43c
1 Parent(s): 98aef55

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: TinyLlama-PsychiatryCaseNotes-epochs-1-lr-0002
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
15
+ <details><summary>See axolotl config</summary>
16
+
17
+ axolotl version: `0.4.0`
18
+ ```yaml
19
+ adapter: null
20
+ base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
21
+ bf16: auto
22
+ dataset_prepared_path: last_run_prepared
23
+ datasets:
24
+ - path: utrgvseniorproject/Tinybook
25
+ type: completion
26
+ debug: null
27
+ deepspeed: null
28
+ early_stopping_patience: null
29
+ eval_sample_packing: false
30
+ eval_table_size: null
31
+ evals_per_epoch: 4
32
+ flash_attention: true
33
+ flash_attn_cross_entropy: false
34
+ flash_attn_fuse_mlp: true
35
+ flash_attn_fuse_qkv: false
36
+ flash_attn_rms_norm: true
37
+ fp16: null
38
+ fsdp: null
39
+ fsdp_config: null
40
+ gradient_accumulation_steps: 1
41
+ gradient_checkpointing: true
42
+ group_by_length: false
43
+ learning_rate: 0.0002
44
+ load_in_4bit: false
45
+ load_in_8bit: false
46
+ local_rank: null
47
+ logging_steps: 1
48
+ lora_alpha: null
49
+ lora_dropout: null
50
+ lora_fan_in_fan_out: null
51
+ lora_model_dir: null
52
+ lora_r: null
53
+ lora_target_linear: null
54
+ lr_scheduler: cosine
55
+ micro_batch_size: 1
56
+ model_type: LlamaForCausalLM
57
+ num_epochs: 1
58
+ optimizer: adamw_bnb_8bit
59
+ output_dir: ./TinyLlama-PsychiatryCaseNotes-epochs-1-lr-0002
60
+ pad_to_sequence_len: true
61
+ resume_from_checkpoint: null
62
+ sample_packing: true
63
+ saves_per_epoch: 1
64
+ sequence_len: 2048
65
+ special_tokens: null
66
+ strict: false
67
+ tf32: false
68
+ tokenizer_type: LlamaTokenizer
69
+ train_on_inputs: false
70
+ val_set_size: 0.05
71
+ wandb_entity: utrgvmedai
72
+ wandb_log_model: null
73
+ wandb_name: tinyLama_colab_test_2
74
+ wandb_project: TinyLlama-PsychiatryCaseNotes-epochs-1-lr-0002
75
+ wandb_watch: null
76
+ warmup_steps: 100
77
+ weight_decay: 0.1
78
+ xformers_attention: null
79
+
80
+ ```
81
+
82
+ </details><br>
83
+
84
+ # TinyLlama-PsychiatryCaseNotes-epochs-1-lr-0002
85
+
86
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
87
+ It achieves the following results on the evaluation set:
88
+ - Loss: 1.8020
89
+
90
+ ## Model description
91
+
92
+ More information needed
93
+
94
+ ## Intended uses & limitations
95
+
96
+ More information needed
97
+
98
+ ## Training and evaluation data
99
+
100
+ More information needed
101
+
102
+ ## Training procedure
103
+
104
+ ### Training hyperparameters
105
+
106
+ The following hyperparameters were used during training:
107
+ - learning_rate: 0.0002
108
+ - train_batch_size: 1
109
+ - eval_batch_size: 1
110
+ - seed: 42
111
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
112
+ - lr_scheduler_type: cosine
113
+ - lr_scheduler_warmup_steps: 100
114
+ - num_epochs: 1
115
+
116
+ ### Training results
117
+
118
+ | Training Loss | Epoch | Step | Validation Loss |
119
+ |:-------------:|:-----:|:----:|:---------------:|
120
+ | 1.7259 | 0.04 | 1 | 1.9138 |
121
+ | 1.8148 | 0.26 | 6 | 1.9011 |
122
+ | 1.8631 | 0.52 | 12 | 1.8659 |
123
+ | 1.8768 | 0.78 | 18 | 1.8020 |
124
+
125
+
126
+ ### Framework versions
127
+
128
+ - Transformers 4.38.2
129
+ - Pytorch 2.1.2+cu121
130
+ - Datasets 2.18.0
131
+ - Tokenizers 0.15.0
checkpoint-23/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.38.2",
26
+ "use_cache": false,
27
+ "vocab_size": 32000
28
+ }
checkpoint-23/generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 2048,
6
+ "pad_token_id": 0,
7
+ "transformers_version": "4.38.2"
8
+ }
checkpoint-23/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba30f671d999fe633d081c0421918318256694dc5e34fc9716fe553a3c955ed1
3
+ size 2200117448
checkpoint-23/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ea382251858d90b652feb78299e4cfd871c4e0f5cc4ff0b8545c47c81d6669
3
+ size 2205132090
checkpoint-23/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
+ size 14244
checkpoint-23/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d628143488a87bbd00a601b9e0c49fc74e98baeb714f6509744eab74f54d1813
3
+ size 1064
checkpoint-23/trainer_state.json ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 6,
6
+ "global_step": 23,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "grad_norm": 6.0,
14
+ "learning_rate": 2.0000000000000003e-06,
15
+ "loss": 1.7259,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "eval_loss": 1.913841724395752,
21
+ "eval_runtime": 0.1057,
22
+ "eval_samples_per_second": 18.922,
23
+ "eval_steps_per_second": 18.922,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.09,
28
+ "grad_norm": 7.0,
29
+ "learning_rate": 4.000000000000001e-06,
30
+ "loss": 1.8344,
31
+ "step": 2
32
+ },
33
+ {
34
+ "epoch": 0.13,
35
+ "grad_norm": 6.1875,
36
+ "learning_rate": 6e-06,
37
+ "loss": 1.8468,
38
+ "step": 3
39
+ },
40
+ {
41
+ "epoch": 0.17,
42
+ "grad_norm": 6.3125,
43
+ "learning_rate": 8.000000000000001e-06,
44
+ "loss": 1.7955,
45
+ "step": 4
46
+ },
47
+ {
48
+ "epoch": 0.22,
49
+ "grad_norm": 6.0625,
50
+ "learning_rate": 1e-05,
51
+ "loss": 1.8767,
52
+ "step": 5
53
+ },
54
+ {
55
+ "epoch": 0.26,
56
+ "grad_norm": 6.6875,
57
+ "learning_rate": 1.2e-05,
58
+ "loss": 1.8148,
59
+ "step": 6
60
+ },
61
+ {
62
+ "epoch": 0.26,
63
+ "eval_loss": 1.901075005531311,
64
+ "eval_runtime": 0.1023,
65
+ "eval_samples_per_second": 19.547,
66
+ "eval_steps_per_second": 19.547,
67
+ "step": 6
68
+ },
69
+ {
70
+ "epoch": 0.3,
71
+ "grad_norm": 6.9375,
72
+ "learning_rate": 1.4000000000000001e-05,
73
+ "loss": 1.9312,
74
+ "step": 7
75
+ },
76
+ {
77
+ "epoch": 0.35,
78
+ "grad_norm": 6.25,
79
+ "learning_rate": 1.6000000000000003e-05,
80
+ "loss": 1.832,
81
+ "step": 8
82
+ },
83
+ {
84
+ "epoch": 0.39,
85
+ "grad_norm": 6.375,
86
+ "learning_rate": 1.8e-05,
87
+ "loss": 1.8511,
88
+ "step": 9
89
+ },
90
+ {
91
+ "epoch": 0.43,
92
+ "grad_norm": 6.5625,
93
+ "learning_rate": 2e-05,
94
+ "loss": 1.9536,
95
+ "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.48,
99
+ "grad_norm": 6.3125,
100
+ "learning_rate": 2.2000000000000003e-05,
101
+ "loss": 1.9282,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.52,
106
+ "grad_norm": 6.125,
107
+ "learning_rate": 2.4e-05,
108
+ "loss": 1.8631,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.52,
113
+ "eval_loss": 1.8658802509307861,
114
+ "eval_runtime": 0.1025,
115
+ "eval_samples_per_second": 19.508,
116
+ "eval_steps_per_second": 19.508,
117
+ "step": 12
118
+ },
119
+ {
120
+ "epoch": 0.57,
121
+ "grad_norm": 6.15625,
122
+ "learning_rate": 2.6000000000000002e-05,
123
+ "loss": 1.8138,
124
+ "step": 13
125
+ },
126
+ {
127
+ "epoch": 0.61,
128
+ "grad_norm": 6.5625,
129
+ "learning_rate": 2.8000000000000003e-05,
130
+ "loss": 1.7873,
131
+ "step": 14
132
+ },
133
+ {
134
+ "epoch": 0.65,
135
+ "grad_norm": 12.875,
136
+ "learning_rate": 3e-05,
137
+ "loss": 1.9504,
138
+ "step": 15
139
+ },
140
+ {
141
+ "epoch": 0.7,
142
+ "grad_norm": 6.53125,
143
+ "learning_rate": 3.2000000000000005e-05,
144
+ "loss": 1.7767,
145
+ "step": 16
146
+ },
147
+ {
148
+ "epoch": 0.74,
149
+ "grad_norm": 7.09375,
150
+ "learning_rate": 3.4000000000000007e-05,
151
+ "loss": 1.9669,
152
+ "step": 17
153
+ },
154
+ {
155
+ "epoch": 0.78,
156
+ "grad_norm": 6.96875,
157
+ "learning_rate": 3.6e-05,
158
+ "loss": 1.8768,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 0.78,
163
+ "eval_loss": 1.8019580841064453,
164
+ "eval_runtime": 0.1033,
165
+ "eval_samples_per_second": 19.362,
166
+ "eval_steps_per_second": 19.362,
167
+ "step": 18
168
+ },
169
+ {
170
+ "epoch": 0.83,
171
+ "grad_norm": 6.5625,
172
+ "learning_rate": 3.8e-05,
173
+ "loss": 1.6976,
174
+ "step": 19
175
+ },
176
+ {
177
+ "epoch": 0.87,
178
+ "grad_norm": 5.90625,
179
+ "learning_rate": 4e-05,
180
+ "loss": 1.7206,
181
+ "step": 20
182
+ },
183
+ {
184
+ "epoch": 0.91,
185
+ "grad_norm": 6.5,
186
+ "learning_rate": 4.2e-05,
187
+ "loss": 1.884,
188
+ "step": 21
189
+ },
190
+ {
191
+ "epoch": 0.96,
192
+ "grad_norm": 6.53125,
193
+ "learning_rate": 4.4000000000000006e-05,
194
+ "loss": 2.005,
195
+ "step": 22
196
+ },
197
+ {
198
+ "epoch": 1.0,
199
+ "grad_norm": 6.09375,
200
+ "learning_rate": 4.600000000000001e-05,
201
+ "loss": 1.7929,
202
+ "step": 23
203
+ }
204
+ ],
205
+ "logging_steps": 1,
206
+ "max_steps": 23,
207
+ "num_input_tokens_seen": 0,
208
+ "num_train_epochs": 1,
209
+ "save_steps": 500,
210
+ "total_flos": 292378028015616.0,
211
+ "train_batch_size": 1,
212
+ "trial_name": null,
213
+ "trial_params": null
214
+ }
checkpoint-23/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9c490b52d0079996aa8fe2e1d328e7be40b73d158b23ea0b114134e1124760
3
+ size 5688
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.38.2",
26
+ "use_cache": false,
27
+ "vocab_size": 32000
28
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": 2,
5
+ "max_length": 2048,
6
+ "pad_token_id": 0,
7
+ "transformers_version": "4.38.2"
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e8b25ed3014fa68dcc813285269f1b7af8cb41c3fc9fa1bdaeba70e2a85863
3
+ size 2200160278
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "</s>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false,
43
+ "use_fast": true
44
+ }