Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

README.md +58 -0
adapter_config.json +29 -0
adapter_model.safetensors +3 -0
added_tokens.json +5 -0
all_results.json +8 -0
merges.txt +0 -0
running_log.txt +162 -0
special_tokens_map.json +20 -0
tokenizer.json +0 -0
tokenizer_config.json +44 -0
train_results.json +8 -0
trainer_config.yaml +27 -0
trainer_log.jsonl +9 -0
trainer_state.json +86 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: Qwen/Qwen1.5-0.5B
+model-index:
+- name: train_2024-05-10-16-53-38
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# train_2024-05-10-16-53-38
+This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B](https://huggingface.co/Qwen/Qwen1.5-0.5B) on the evy-dataset dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 3.0
+### Training results
+### Framework versions
+- PEFT 0.10.0
+- Transformers 4.40.2
+- Pytorch 2.3.0
+- Datasets 2.19.1
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen1.5-0.5B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2365a768b7143e0bf54dd9d8e62f07e885ed00c1d173de27048c93a481fb0b27
+size 3158328

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.847457627118644,
+    "total_flos": 285618906316800.0,
+    "train_loss": 2.7561378251938593,
+    "train_runtime": 901.2566,
+    "train_samples_per_second": 0.782,
+    "train_steps_per_second": 0.047
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

running_log.txt ADDED Viewed

	@@ -0,0 +1,162 @@

+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/vocab.json
+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/merges.txt
+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/tokenizer.json
+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None
+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None
+05/10/2024 16:54:21 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/tokenizer_config.json
+05/10/2024 16:54:21 - WARNING - transformers.tokenization_utils_base - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+05/10/2024 16:54:21 - INFO - llmtuner.data.loader - Loading dataset joshcarp/evy-dataset...
+05/10/2024 16:54:23 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/config.json
+05/10/2024 16:54:23 - INFO - transformers.configuration_utils - Model config Qwen2Config {
+  "_name_or_path": "Qwen/Qwen1.5-0.5B",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 2816,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+05/10/2024 16:55:38 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/model.safetensors
+05/10/2024 16:55:38 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float32.
+05/10/2024 16:55:38 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "use_cache": false
+}
+05/10/2024 16:55:40 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing Qwen2ForCausalLM.
+05/10/2024 16:55:40 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen1.5-0.5B.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
+05/10/2024 16:55:40 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/generation_config.json
+05/10/2024 16:55:40 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048
+}
+05/10/2024 16:55:40 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled.
+05/10/2024 16:55:40 - INFO - llmtuner.model.utils.attention - Using torch SDPA for faster training and inference.
+05/10/2024 16:55:40 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
+05/10/2024 16:55:40 - INFO - llmtuner.model.loader - trainable params: 786432 || all params: 464774144 || trainable%: 0.1692
+05/10/2024 16:55:40 - INFO - transformers.trainer - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
+05/10/2024 16:55:41 - INFO - transformers.trainer - ***** Running training *****
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Num examples = 235
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Num Epochs = 3
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Instantaneous batch size per device = 2
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Total train batch size (w. parallel, distributed & accumulation) = 16
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Gradient Accumulation steps = 8
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Total optimization steps = 42
+05/10/2024 16:55:41 - INFO - transformers.trainer -   Number of trainable parameters = 786,432
+05/10/2024 16:56:35 - INFO - llmtuner.extras.callbacks - {'loss': 3.1521, 'learning_rate': 4.8272e-05, 'epoch': 0.34}
+05/10/2024 16:58:24 - INFO - llmtuner.extras.callbacks - {'loss': 2.7178, 'learning_rate': 4.3326e-05, 'epoch': 0.68}
+05/10/2024 17:00:15 - INFO - llmtuner.extras.callbacks - {'loss': 2.7124, 'learning_rate': 3.5847e-05, 'epoch': 1.02}
+05/10/2024 17:01:32 - INFO - llmtuner.extras.callbacks - {'loss': 3.0056, 'learning_rate': 2.6868e-05, 'epoch': 1.36}
+05/10/2024 17:03:03 - INFO - llmtuner.extras.callbacks - {'loss': 2.6855, 'learning_rate': 1.7631e-05, 'epoch': 1.69}
+05/10/2024 17:06:33 - INFO - llmtuner.extras.callbacks - {'loss': 2.3657, 'learning_rate': 9.4128e-06, 'epoch': 2.03}
+05/10/2024 17:08:15 - INFO - llmtuner.extras.callbacks - {'loss': 2.5569, 'learning_rate': 3.3494e-06, 'epoch': 2.37}
+05/10/2024 17:10:03 - INFO - llmtuner.extras.callbacks - {'loss': 2.9284, 'learning_rate': 2.7923e-07, 'epoch': 2.71}
+05/10/2024 17:10:42 - INFO - transformers.trainer -
+Training completed. Do not forget to share your model on huggingface.co/models =)
+05/10/2024 17:10:42 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen1.5-0.5B/lora/train_2024-05-10-16-53-38
+05/10/2024 17:10:42 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /Users/joshcarp/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/config.json
+05/10/2024 17:10:42 - INFO - transformers.configuration_utils - Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 2816,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+05/10/2024 17:10:42 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen1.5-0.5B/lora/train_2024-05-10-16-53-38/tokenizer_config.json
+05/10/2024 17:10:42 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen1.5-0.5B/lora/train_2024-05-10-16-53-38/special_tokens_map.json
+05/10/2024 17:10:42 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\\nAssistant: ' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.847457627118644,
+    "total_flos": 285618906316800.0,
+    "train_loss": 2.7561378251938593,
+    "train_runtime": 901.2566,
+    "train_samples_per_second": 0.782,
+    "train_steps_per_second": 0.047
+}

trainer_config.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+cutoff_len: 1024
+dataset: evy-dataset
+dataset_dir: data
+do_train: true
+finetuning_type: lora
+flash_attn: auto
+gradient_accumulation_steps: 8
+learning_rate: 5.0e-05
+logging_steps: 5
+lora_alpha: 16
+lora_dropout: 0
+lora_rank: 8
+lora_target: q_proj,v_proj
+lr_scheduler_type: cosine
+max_grad_norm: 1.0
+max_samples: 100000
+model_name_or_path: Qwen/Qwen1.5-0.5B
+num_train_epochs: 3.0
+optim: adamw_torch
+output_dir: saves/Qwen1.5-0.5B/lora/train_2024-05-10-16-53-38
+packing: false
+per_device_train_batch_size: 2
+report_to: none
+save_steps: 100
+stage: sft
+template: default
+warmup_steps: 0

trainer_log.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{"current_steps": 5, "total_steps": 42, "loss": 3.1521, "learning_rate": 4.827184371610511e-05, "epoch": 0.3389830508474576, "percentage": 11.9, "elapsed_time": "0:00:54", "remaining_time": "0:06:41"}
+{"current_steps": 10, "total_steps": 42, "loss": 2.7178, "learning_rate": 4.332629679574566e-05, "epoch": 0.6779661016949152, "percentage": 23.81, "elapsed_time": "0:02:43", "remaining_time": "0:08:42"}
+{"current_steps": 15, "total_steps": 42, "loss": 2.7124, "learning_rate": 3.5847093477938956e-05, "epoch": 1.0169491525423728, "percentage": 35.71, "elapsed_time": "0:04:34", "remaining_time": "0:08:13"}
+{"current_steps": 20, "total_steps": 42, "loss": 3.0056, "learning_rate": 2.686825233966061e-05, "epoch": 1.3559322033898304, "percentage": 47.62, "elapsed_time": "0:05:51", "remaining_time": "0:06:26"}
+{"current_steps": 25, "total_steps": 42, "loss": 2.6855, "learning_rate": 1.7631120639727393e-05, "epoch": 1.694915254237288, "percentage": 59.52, "elapsed_time": "0:07:22", "remaining_time": "0:05:00"}
+{"current_steps": 30, "total_steps": 42, "loss": 2.3657, "learning_rate": 9.412754953531663e-06, "epoch": 2.0338983050847457, "percentage": 71.43, "elapsed_time": "0:10:52", "remaining_time": "0:04:21"}
+{"current_steps": 35, "total_steps": 42, "loss": 2.5569, "learning_rate": 3.3493649053890326e-06, "epoch": 2.3728813559322033, "percentage": 83.33, "elapsed_time": "0:12:34", "remaining_time": "0:02:30"}
+{"current_steps": 40, "total_steps": 42, "loss": 2.9284, "learning_rate": 2.7922934437178695e-07, "epoch": 2.711864406779661, "percentage": 95.24, "elapsed_time": "0:14:22", "remaining_time": "0:00:43"}
+{"current_steps": 42, "total_steps": 42, "epoch": 2.847457627118644, "percentage": 100.0, "elapsed_time": "0:15:01", "remaining_time": "0:00:00"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.847457627118644,
+  "eval_steps": 500,
+  "global_step": 42,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3389830508474576,
+      "grad_norm": 1.8727585077285767,
+      "learning_rate": 4.827184371610511e-05,
+      "loss": 3.1521,
+      "step": 5
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 1.4997955560684204,
+      "learning_rate": 4.332629679574566e-05,
+      "loss": 2.7178,
+      "step": 10
+    },
+    {
+      "epoch": 1.0169491525423728,
+      "grad_norm": 1.1865301132202148,
+      "learning_rate": 3.5847093477938956e-05,
+      "loss": 2.7124,
+      "step": 15
+    },
+    {
+      "epoch": 1.3559322033898304,
+      "grad_norm": 1.8665063381195068,
+      "learning_rate": 2.686825233966061e-05,
+      "loss": 3.0056,
+      "step": 20
+    },
+    {
+      "epoch": 1.694915254237288,
+      "grad_norm": 1.1309840679168701,
+      "learning_rate": 1.7631120639727393e-05,
+      "loss": 2.6855,
+      "step": 25
+    },
+    {
+      "epoch": 2.0338983050847457,
+      "grad_norm": 0.8969791531562805,
+      "learning_rate": 9.412754953531663e-06,
+      "loss": 2.3657,
+      "step": 30
+    },
+    {
+      "epoch": 2.3728813559322033,
+      "grad_norm": 1.163950800895691,
+      "learning_rate": 3.3493649053890326e-06,
+      "loss": 2.5569,
+      "step": 35
+    },
+    {
+      "epoch": 2.711864406779661,
+      "grad_norm": 2.0787620544433594,
+      "learning_rate": 2.7922934437178695e-07,
+      "loss": 2.9284,
+      "step": 40
+    },
+    {
+      "epoch": 2.847457627118644,
+      "step": 42,
+      "total_flos": 285618906316800.0,
+      "train_loss": 2.7561378251938593,
+      "train_runtime": 901.2566,
+      "train_samples_per_second": 0.782,
+      "train_steps_per_second": 0.047
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 42,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 100,
+  "total_flos": 285618906316800.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727b71cc8ae77598a66fb72b785a6060cac8a4554eefb1cb6b50b070e332e56b
+size 5176

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff