nolestock commited on
Commit
8196c10
1 Parent(s): 5c57edf

Model save

Browse files
last-checkpoint/README.md → README.md RENAMED
@@ -4,6 +4,17 @@ library_name: peft
4
  ## Training procedure
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
7
  The following `bitsandbytes` quantization config was used during training:
8
  - load_in_8bit: False
9
  - load_in_4bit: True
@@ -16,5 +27,6 @@ The following `bitsandbytes` quantization config was used during training:
16
  - bnb_4bit_compute_dtype: float16
17
  ### Framework versions
18
 
 
19
 
20
  - PEFT 0.4.0.dev0
 
4
  ## Training procedure
5
 
6
 
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - load_in_8bit: False
9
+ - load_in_4bit: True
10
+ - llm_int8_threshold: 6.0
11
+ - llm_int8_skip_modules: None
12
+ - llm_int8_enable_fp32_cpu_offload: False
13
+ - llm_int8_has_fp16_weight: False
14
+ - bnb_4bit_quant_type: nf4
15
+ - bnb_4bit_use_double_quant: False
16
+ - bnb_4bit_compute_dtype: float16
17
+
18
  The following `bitsandbytes` quantization config was used during training:
19
  - load_in_8bit: False
20
  - load_in_4bit: True
 
27
  - bnb_4bit_compute_dtype: float16
28
  ### Framework versions
29
 
30
+ - PEFT 0.4.0.dev0
31
 
32
  - PEFT 0.4.0.dev0
last-checkpoint/adapter_config.json DELETED
@@ -1,22 +0,0 @@
1
- {
2
- "base_model_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
3
- "bias": "none",
4
- "fan_in_fan_out": false,
5
- "inference_mode": true,
6
- "init_lora_weights": true,
7
- "layers_pattern": null,
8
- "layers_to_transform": null,
9
- "lora_alpha": 16,
10
- "lora_dropout": 0.1,
11
- "modules_to_save": null,
12
- "peft_type": "LORA",
13
- "r": 64,
14
- "revision": null,
15
- "target_modules": [
16
- "query_key_value",
17
- "dense",
18
- "dense_h_to_4h",
19
- "dense_4h_to_h"
20
- ],
21
- "task_type": "CAUSAL_LM"
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:18d9ad4f8dc933ad6e407f8768604fa8df673a51ff20e3fc2b2b72b58b96296c
3
- size 335638221
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5645adc0e815427bee2525b551ef19301c3bd58feffe8f523abeea2acf1157d
3
- size 671246597
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2a4a57d3543837f3d3fec93400bac8989273bbf35e51c30c34f8bfa85b258ee
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:216f76b8039f833c337db298c81f13b12082d5fd4f9d866cecd34b2ca7550b37
3
- size 627
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
6
- }
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<|endoftext|>",
4
- "clean_up_tokenization_spaces": true,
5
- "eos_token": "<|endoftext|>",
6
- "model_max_length": 2048,
7
- "tokenizer_class": "GPTNeoXTokenizer",
8
- "unk_token": "<|endoftext|>"
9
- }
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,76 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.6498781478472786,
5
- "global_step": 100,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.06,
12
- "learning_rate": 0.0002,
13
- "loss": 1.6209,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.13,
18
- "learning_rate": 0.0002,
19
- "loss": 1.5431,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.19,
24
- "learning_rate": 0.0002,
25
- "loss": 1.5206,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.26,
30
- "learning_rate": 0.0002,
31
- "loss": 1.5347,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.32,
36
- "learning_rate": 0.0002,
37
- "loss": 1.5357,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.39,
42
- "learning_rate": 0.0002,
43
- "loss": 1.485,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.45,
48
- "learning_rate": 0.0002,
49
- "loss": 1.4854,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.52,
54
- "learning_rate": 0.0002,
55
- "loss": 1.5324,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.58,
60
- "learning_rate": 0.0002,
61
- "loss": 1.4887,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.65,
66
- "learning_rate": 0.0002,
67
- "loss": 1.5132,
68
- "step": 100
69
- }
70
- ],
71
- "max_steps": 100,
72
- "num_train_epochs": 1,
73
- "total_flos": 5.7895286734848e+16,
74
- "trial_name": null,
75
- "trial_params": null
76
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a937e9698d9377e313f6374c2abd56957ca2182371fcd2fc70c7320f2a02f75a
3
- size 3963
 
 
 
 
runs/Jul04_00-42-28_06a0078a8833/events.out.tfevents.1688431447.06a0078a8833.315.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234d3567338b4b818eea4a3bd8f27e320b5f47d2eccf3c8fa4319bb780be8fbc
3
- size 5965
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8c1541b2c430463344f4374106318cf6177e954bf008dd11b70985c3df3a83e
3
+ size 6313