Flowersea37 commited on
Commit
d487691
1 Parent(s): a0efabe

Model save

Browse files
README.md CHANGED
@@ -5,6 +5,7 @@ license: apache-2.0
5
  tags:
6
  - trl
7
  - sft
 
8
  - generated_from_trainer
9
  model-index:
10
  - name: Qwen-1.5b-sft-qlora
 
5
  tags:
6
  - trl
7
  - sft
8
+ - alignment-handbook
9
  - generated_from_trainer
10
  model-index:
11
  - name: Qwen-1.5b-sft-qlora
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "o_proj",
25
  "up_proj",
26
- "down_proj",
27
- "v_proj",
28
  "q_proj",
29
- "gate_proj"
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "up_proj",
 
 
24
  "q_proj",
25
+ "gate_proj",
26
+ "v_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 2.4013425932721193e+18,
4
  "train_loss": 0.0,
5
- "train_runtime": 0.0528,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 3940185.324,
8
- "train_steps_per_second": 328348.777
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 2.4013425932721193e+18,
4
  "train_loss": 0.0,
5
+ "train_runtime": 0.0617,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 3368693.42,
8
+ "train_steps_per_second": 280724.452
9
  }
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2-1.5B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "quantization_config": {
20
+ "_load_in_4bit": true,
21
+ "_load_in_8bit": false,
22
+ "bnb_4bit_compute_dtype": "bfloat16",
23
+ "bnb_4bit_quant_storage": "uint8",
24
+ "bnb_4bit_quant_type": "nf4",
25
+ "bnb_4bit_use_double_quant": false,
26
+ "llm_int8_enable_fp32_cpu_offload": false,
27
+ "llm_int8_has_fp16_weight": false,
28
+ "llm_int8_skip_modules": null,
29
+ "llm_int8_threshold": 6.0,
30
+ "load_in_4bit": true,
31
+ "load_in_8bit": false,
32
+ "quant_method": "bitsandbytes"
33
+ },
34
+ "rms_norm_eps": 1e-06,
35
+ "rope_theta": 1000000.0,
36
+ "sliding_window": null,
37
+ "tie_word_embeddings": true,
38
+ "torch_dtype": "bfloat16",
39
+ "transformers_version": "4.44.2",
40
+ "use_cache": true,
41
+ "use_sliding_window": false,
42
+ "vocab_size": 151936
43
+ }
runs/Sep26_13-57-34_interactive79915/events.out.tfevents.1727330275.interactive79915.116894.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca818c0b116da1c1348db5edf439b23c873dbfd428437cfd6c205f7ebea7ff50
3
+ size 6880
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 2.4013425932721193e+18,
4
  "train_loss": 0.0,
5
- "train_runtime": 0.0528,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 3940185.324,
8
- "train_steps_per_second": 328348.777
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 2.4013425932721193e+18,
4
  "train_loss": 0.0,
5
+ "train_runtime": 0.0617,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 3368693.42,
8
+ "train_steps_per_second": 280724.452
9
  }
trainer_state.json CHANGED
@@ -24268,9 +24268,9 @@
24268
  "step": 17322,
24269
  "total_flos": 2.4013425932721193e+18,
24270
  "train_loss": 0.0,
24271
- "train_runtime": 0.0528,
24272
- "train_samples_per_second": 3940185.324,
24273
- "train_steps_per_second": 328348.777
24274
  }
24275
  ],
24276
  "logging_steps": 5,
 
24268
  "step": 17322,
24269
  "total_flos": 2.4013425932721193e+18,
24270
  "train_loss": 0.0,
24271
+ "train_runtime": 0.0617,
24272
+ "train_samples_per_second": 3368693.42,
24273
+ "train_steps_per_second": 280724.452
24274
  }
24275
  ],
24276
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d70fd547b854e0290ba245f13d741823cf38735fde2fbd11c3f6e63946fac002
3
  size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46f41662c700aa2b459e4260e58b5e312345f8e51ab835dd9f230cc31e2b1775
3
  size 6200