codegood commited on
Commit
05f731a
1 Parent(s): 5bb3e15

End of training

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.6032
22
 
23
  ## Model description
24
 
@@ -38,23 +38,23 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 0.0002
41
- - train_batch_size: 7
42
- - eval_batch_size: 7
43
  - seed: 42
44
  - gradient_accumulation_steps: 3
45
- - total_train_batch_size: 21
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.05
49
- - num_epochs: 2
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 0.9816 | 0.59 | 300 | 0.6548 |
56
- | 0.8223 | 1.18 | 600 | 0.6201 |
57
- | 0.7223 | 1.78 | 900 | 0.6032 |
58
 
59
 
60
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4678
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 0.0002
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
  - seed: 42
44
  - gradient_accumulation_steps: 3
45
+ - total_train_batch_size: 24
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.05
49
+ - num_epochs: 5
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 0.2819 | 1.39 | 300 | 0.4803 |
56
+ | 0.2312 | 2.79 | 600 | 0.4819 |
57
+ | 0.2005 | 4.18 | 900 | 0.4678 |
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -19,11 +19,11 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "Wqkv",
23
  "out_proj",
24
- "fc2",
25
  "linear",
26
- "fc1"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "fc1",
23
  "out_proj",
 
24
  "linear",
25
+ "Wqkv",
26
+ "fc2"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:669affdb816b5482b82bc9551558f5ff3fcd6869e634628da5d36281683759a2
3
  size 53764520
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753e7bfc1b0b5b95674493b1a72e7d095ef12542482a6529d4642be60587e74d
3
  size 53764520
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 256,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:953e9ce9b8f402e54bc6f7c49fdddff21f4aa74e3786b6159b8b19ef03d4da1f
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865f5284d663e02e4c82d01e5ef41853179fa7038eda6296ccfc6095b8fefccf
3
  size 4219