hossay commited on
Commit
35e862d
·
1 Parent(s): 27791c9

🍻 cheers

Browse files
README.md CHANGED
@@ -24,10 +24,10 @@ model-index:
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
- value: 0.8589626933575978
28
  - name: F1
29
  type: f1
30
- value: 0.8116646415552855
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
37
 
38
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the stool-image dataset.
39
  It achieves the following results on the evaluation set:
40
- - Loss: 0.3651
41
- - Auroc: 0.9090
42
- - Accuracy: 0.8590
43
- - Sensitivity: 0.7422
44
- - Specificty: 0.9399
45
- - F1: 0.8117
46
 
47
  ## Model description
48
 
@@ -67,15 +67,19 @@ The following hyperparameters were used during training:
67
  - seed: 42
68
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
69
  - lr_scheduler_type: linear
70
- - num_epochs: 2
71
  - mixed_precision_training: Native AMP
72
 
73
  ### Training results
74
 
75
  | Training Loss | Epoch | Step | Validation Loss | Auroc | Accuracy | Sensitivity | Specificty | F1 |
76
  |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-----------:|:----------:|:------:|
77
- | 0.3775 | 0.98 | 100 | 0.4743 | 0.9005 | 0.7863 | 0.5899 | 0.9602 | 0.7216 |
78
- | 0.3238 | 1.96 | 200 | 0.4048 | 0.8973 | 0.8232 | 0.7360 | 0.9005 | 0.7964 |
 
 
 
 
79
 
80
 
81
  ### Framework versions
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.8171064604185623
28
  - name: F1
29
  type: f1
30
+ value: 0.7841031149301826
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
37
 
38
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the stool-image dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.4538
41
+ - Auroc: 0.8897
42
+ - Accuracy: 0.8171
43
+ - Sensitivity: 0.8111
44
+ - Specificty: 0.8213
45
+ - F1: 0.7841
46
 
47
  ## Model description
48
 
 
67
  - seed: 42
68
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
69
  - lr_scheduler_type: linear
70
+ - num_epochs: 100
71
  - mixed_precision_training: Native AMP
72
 
73
  ### Training results
74
 
75
  | Training Loss | Epoch | Step | Validation Loss | Auroc | Accuracy | Sensitivity | Specificty | F1 |
76
  |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-----------:|:----------:|:------:|
77
+ | 0.5303 | 0.98 | 100 | 0.4327 | 0.8826 | 0.7942 | 0.7191 | 0.8607 | 0.7665 |
78
+ | 0.3909 | 1.96 | 200 | 0.5196 | 0.8675 | 0.8047 | 0.8539 | 0.7612 | 0.8042 |
79
+ | 0.5328 | 2.94 | 300 | 0.4421 | 0.8864 | 0.8074 | 0.7528 | 0.8557 | 0.7859 |
80
+ | 0.4834 | 3.92 | 400 | 0.4721 | 0.8596 | 0.7757 | 0.7135 | 0.8308 | 0.7493 |
81
+ | 0.4209 | 4.9 | 500 | 0.4797 | 0.8625 | 0.7863 | 0.6798 | 0.8806 | 0.7492 |
82
+ | 0.4567 | 5.88 | 600 | 0.5150 | 0.8688 | 0.7942 | 0.6011 | 0.9652 | 0.7329 |
83
 
84
 
85
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.8589626933575978,
4
- "eval_auroc": 0.9090378359869886,
5
- "eval_f1": 0.8116646415552855,
6
- "eval_loss": 0.3651178777217865,
7
- "eval_runtime": 64.3389,
8
- "eval_samples_per_second": 17.081,
9
- "eval_sensitivity": 0.7422222222222222,
10
- "eval_specificty": 0.9399075500770416,
11
- "eval_steps_per_second": 2.145,
12
- "train_loss": 0.45571306347846985,
13
- "train_runtime": 150.3892,
14
- "train_samples_per_second": 21.611,
15
- "train_steps_per_second": 1.356
16
  }
 
1
  {
2
+ "epoch": 5.88,
3
+ "eval_accuracy": 0.8171064604185623,
4
+ "eval_auroc": 0.8896644410203732,
5
+ "eval_f1": 0.7841031149301826,
6
+ "eval_loss": 0.45378199219703674,
7
+ "eval_runtime": 63.9212,
8
+ "eval_samples_per_second": 17.193,
9
+ "eval_sensitivity": 0.8111111111111111,
10
+ "eval_specificty": 0.8212634822804314,
11
+ "eval_steps_per_second": 2.159,
12
+ "train_loss": 0.4638279656569163,
13
+ "train_runtime": 457.0802,
14
+ "train_samples_per_second": 355.517,
15
+ "train_steps_per_second": 22.316
16
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.8589626933575978,
4
- "eval_auroc": 0.9090378359869886,
5
- "eval_f1": 0.8116646415552855,
6
- "eval_loss": 0.3651178777217865,
7
- "eval_runtime": 64.3389,
8
- "eval_samples_per_second": 17.081,
9
- "eval_sensitivity": 0.7422222222222222,
10
- "eval_specificty": 0.9399075500770416,
11
- "eval_steps_per_second": 2.145
12
  }
 
1
  {
2
+ "epoch": 5.88,
3
+ "eval_accuracy": 0.8171064604185623,
4
+ "eval_auroc": 0.8896644410203732,
5
+ "eval_f1": 0.7841031149301826,
6
+ "eval_loss": 0.45378199219703674,
7
+ "eval_runtime": 63.9212,
8
+ "eval_samples_per_second": 17.193,
9
+ "eval_sensitivity": 0.8111111111111111,
10
+ "eval_specificty": 0.8212634822804314,
11
+ "eval_steps_per_second": 2.159
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32661c373043abb2ce39e6889c503c41730dea1672b9a35afda19445c74b08b2
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce917709538915003c0c8fe4efef00dae13bd7d04517b013edb5b0d28d71e17
3
  size 343223968
runs/Jan05_17-18-52_DESKTOP-BDBS5RV/events.out.tfevents.1704442947.DESKTOP-BDBS5RV CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6239e87a7bb3ca5ba9ae979e06daaaabe2a3717fd6757d5bd06120dc7faef59a
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fe79f35723ce861fee77287d519e1c18d951a60862b85645fcd34c53f49b1e
3
+ size 567
runs/Jan05_17-26-06_DESKTOP-BDBS5RV/events.out.tfevents.1704443167.DESKTOP-BDBS5RV ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf8bb6f30306e3c6d48bd767fb74682b740f846ee095bd0a651196b77c48999
3
+ size 17243
runs/Jan05_17-26-06_DESKTOP-BDBS5RV/events.out.tfevents.1704443688.DESKTOP-BDBS5RV ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b62303b6c67ec8004a1373b7d8162f85881f93156735bc5dc4c83946ad955c
3
+ size 40
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 0.45571306347846985,
4
- "train_runtime": 150.3892,
5
- "train_samples_per_second": 21.611,
6
- "train_steps_per_second": 1.356
7
  }
 
1
  {
2
+ "epoch": 5.88,
3
+ "train_loss": 0.4638279656569163,
4
+ "train_runtime": 457.0802,
5
+ "train_samples_per_second": 355.517,
6
+ "train_steps_per_second": 22.316
7
  }
trainer_state.json CHANGED
@@ -1,175 +1,467 @@
1
  {
2
- "best_metric": 0.9004555872323774,
3
- "best_model_checkpoint": "./stool-condition-classification\\checkpoint-100",
4
- "epoch": 2.0,
5
  "eval_steps": 100,
6
- "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
- "learning_rate": 0.00019019607843137254,
14
- "loss": 0.615,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.2,
19
- "learning_rate": 0.0001803921568627451,
20
- "loss": 0.527,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.29,
25
- "learning_rate": 0.00017058823529411766,
26
- "loss": 0.5331,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.39,
31
- "learning_rate": 0.00016078431372549022,
32
- "loss": 0.5381,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.49,
37
- "learning_rate": 0.00015098039215686275,
38
- "loss": 0.4821,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.59,
43
- "learning_rate": 0.0001411764705882353,
44
- "loss": 0.5245,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.69,
49
- "learning_rate": 0.00013137254901960784,
50
- "loss": 0.5182,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.78,
55
- "learning_rate": 0.00012156862745098039,
56
- "loss": 0.4576,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.88,
61
- "learning_rate": 0.00011176470588235294,
62
- "loss": 0.4357,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.98,
67
- "learning_rate": 0.00010196078431372549,
68
- "loss": 0.3775,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.98,
73
- "eval_accuracy": 0.7862796833773087,
74
- "eval_auroc": 0.9004555872323774,
75
- "eval_f1": 0.7216494845360824,
76
- "eval_loss": 0.4742838144302368,
77
- "eval_runtime": 18.5519,
78
- "eval_samples_per_second": 20.429,
79
- "eval_sensitivity": 0.5898876404494382,
80
- "eval_specificty": 0.9601990049751243,
81
- "eval_steps_per_second": 2.587,
82
  "step": 100
83
  },
84
  {
85
  "epoch": 1.08,
86
- "learning_rate": 9.215686274509804e-05,
87
- "loss": 0.5825,
88
  "step": 110
89
  },
90
  {
91
  "epoch": 1.18,
92
- "learning_rate": 8.23529411764706e-05,
93
- "loss": 0.3861,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 1.27,
98
- "learning_rate": 7.254901960784314e-05,
99
- "loss": 0.4775,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 1.37,
104
- "learning_rate": 6.274509803921569e-05,
105
- "loss": 0.4094,
106
  "step": 140
107
  },
108
  {
109
  "epoch": 1.47,
110
- "learning_rate": 5.294117647058824e-05,
111
- "loss": 0.4128,
112
  "step": 150
113
  },
114
  {
115
  "epoch": 1.57,
116
- "learning_rate": 4.313725490196079e-05,
117
- "loss": 0.4352,
118
  "step": 160
119
  },
120
  {
121
  "epoch": 1.67,
122
- "learning_rate": 3.3333333333333335e-05,
123
- "loss": 0.4043,
124
  "step": 170
125
  },
126
  {
127
  "epoch": 1.76,
128
- "learning_rate": 2.3529411764705884e-05,
129
- "loss": 0.3687,
130
  "step": 180
131
  },
132
  {
133
  "epoch": 1.86,
134
- "learning_rate": 1.3725490196078432e-05,
135
- "loss": 0.3775,
136
  "step": 190
137
  },
138
  {
139
  "epoch": 1.96,
140
- "learning_rate": 3.92156862745098e-06,
141
- "loss": 0.3238,
142
  "step": 200
143
  },
144
  {
145
  "epoch": 1.96,
146
- "eval_accuracy": 0.8232189973614775,
147
- "eval_auroc": 0.8972832466879087,
148
- "eval_f1": 0.7963525835866261,
149
- "eval_loss": 0.4048151969909668,
150
- "eval_runtime": 18.6017,
151
- "eval_samples_per_second": 20.374,
152
- "eval_sensitivity": 0.7359550561797753,
153
- "eval_specificty": 0.900497512437811,
154
- "eval_steps_per_second": 2.58,
155
  "step": 200
156
  },
157
  {
158
- "epoch": 2.0,
159
- "step": 204,
160
- "total_flos": 2.51848966247424e+17,
161
- "train_loss": 0.45571306347846985,
162
- "train_runtime": 150.3892,
163
- "train_samples_per_second": 21.611,
164
- "train_steps_per_second": 1.356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
  ],
167
  "logging_steps": 10,
168
- "max_steps": 204,
169
  "num_input_tokens_seen": 0,
170
- "num_train_epochs": 2,
171
  "save_steps": 100,
172
- "total_flos": 2.51848966247424e+17,
173
  "train_batch_size": 16,
174
  "trial_name": null,
175
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8863966683436749,
3
+ "best_model_checkpoint": "./stool-condition-classification\\checkpoint-300",
4
+ "epoch": 5.882352941176471,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
+ "learning_rate": 0.00019980392156862745,
14
+ "loss": 0.6109,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.2,
19
+ "learning_rate": 0.00019960784313725492,
20
+ "loss": 0.5518,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.29,
25
+ "learning_rate": 0.00019941176470588236,
26
+ "loss": 0.4854,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.39,
31
+ "learning_rate": 0.0001992156862745098,
32
+ "loss": 0.5984,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.49,
37
+ "learning_rate": 0.00019901960784313727,
38
+ "loss": 0.5832,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.59,
43
+ "learning_rate": 0.00019882352941176472,
44
+ "loss": 0.5199,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.69,
49
+ "learning_rate": 0.00019862745098039218,
50
+ "loss": 0.5347,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.78,
55
+ "learning_rate": 0.00019843137254901963,
56
+ "loss": 0.5159,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.88,
61
+ "learning_rate": 0.00019823529411764707,
62
+ "loss": 0.5069,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.98,
67
+ "learning_rate": 0.00019803921568627454,
68
+ "loss": 0.5303,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.98,
73
+ "eval_accuracy": 0.7941952506596306,
74
+ "eval_auroc": 0.8826094247861815,
75
+ "eval_f1": 0.7664670658682635,
76
+ "eval_loss": 0.43268465995788574,
77
+ "eval_runtime": 19.5335,
78
+ "eval_samples_per_second": 19.403,
79
+ "eval_sensitivity": 0.7191011235955056,
80
+ "eval_specificty": 0.8606965174129353,
81
+ "eval_steps_per_second": 2.457,
82
  "step": 100
83
  },
84
  {
85
  "epoch": 1.08,
86
+ "learning_rate": 0.00019784313725490198,
87
+ "loss": 0.514,
88
  "step": 110
89
  },
90
  {
91
  "epoch": 1.18,
92
+ "learning_rate": 0.00019764705882352942,
93
+ "loss": 0.3919,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 1.27,
98
+ "learning_rate": 0.0001974509803921569,
99
+ "loss": 0.5246,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 1.37,
104
+ "learning_rate": 0.00019725490196078433,
105
+ "loss": 0.5262,
106
  "step": 140
107
  },
108
  {
109
  "epoch": 1.47,
110
+ "learning_rate": 0.00019705882352941177,
111
+ "loss": 0.4092,
112
  "step": 150
113
  },
114
  {
115
  "epoch": 1.57,
116
+ "learning_rate": 0.00019686274509803922,
117
+ "loss": 0.6038,
118
  "step": 160
119
  },
120
  {
121
  "epoch": 1.67,
122
+ "learning_rate": 0.00019666666666666666,
123
+ "loss": 0.5079,
124
  "step": 170
125
  },
126
  {
127
  "epoch": 1.76,
128
+ "learning_rate": 0.00019647058823529413,
129
+ "loss": 0.4992,
130
  "step": 180
131
  },
132
  {
133
  "epoch": 1.86,
134
+ "learning_rate": 0.00019627450980392157,
135
+ "loss": 0.4788,
136
  "step": 190
137
  },
138
  {
139
  "epoch": 1.96,
140
+ "learning_rate": 0.000196078431372549,
141
+ "loss": 0.3909,
142
  "step": 200
143
  },
144
  {
145
  "epoch": 1.96,
146
+ "eval_accuracy": 0.8047493403693932,
147
+ "eval_auroc": 0.8674604505562076,
148
+ "eval_f1": 0.8042328042328041,
149
+ "eval_loss": 0.5196136832237244,
150
+ "eval_runtime": 19.2864,
151
+ "eval_samples_per_second": 19.651,
152
+ "eval_sensitivity": 0.8539325842696629,
153
+ "eval_specificty": 0.7611940298507462,
154
+ "eval_steps_per_second": 2.489,
155
  "step": 200
156
  },
157
  {
158
+ "epoch": 2.06,
159
+ "learning_rate": 0.00019588235294117648,
160
+ "loss": 0.4421,
161
+ "step": 210
162
+ },
163
+ {
164
+ "epoch": 2.16,
165
+ "learning_rate": 0.00019568627450980392,
166
+ "loss": 0.4479,
167
+ "step": 220
168
+ },
169
+ {
170
+ "epoch": 2.25,
171
+ "learning_rate": 0.00019549019607843136,
172
+ "loss": 0.4655,
173
+ "step": 230
174
+ },
175
+ {
176
+ "epoch": 2.35,
177
+ "learning_rate": 0.00019529411764705883,
178
+ "loss": 0.4479,
179
+ "step": 240
180
+ },
181
+ {
182
+ "epoch": 2.45,
183
+ "learning_rate": 0.00019509803921568628,
184
+ "loss": 0.3894,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 2.55,
189
+ "learning_rate": 0.00019490196078431372,
190
+ "loss": 0.4315,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 2.65,
195
+ "learning_rate": 0.0001947058823529412,
196
+ "loss": 0.4783,
197
+ "step": 270
198
+ },
199
+ {
200
+ "epoch": 2.75,
201
+ "learning_rate": 0.00019450980392156863,
202
+ "loss": 0.3792,
203
+ "step": 280
204
+ },
205
+ {
206
+ "epoch": 2.84,
207
+ "learning_rate": 0.0001943137254901961,
208
+ "loss": 0.5301,
209
+ "step": 290
210
+ },
211
+ {
212
+ "epoch": 2.94,
213
+ "learning_rate": 0.00019411764705882354,
214
+ "loss": 0.5328,
215
+ "step": 300
216
+ },
217
+ {
218
+ "epoch": 2.94,
219
+ "eval_accuracy": 0.8073878627968337,
220
+ "eval_auroc": 0.8863966683436749,
221
+ "eval_f1": 0.7859237536656891,
222
+ "eval_loss": 0.4421083927154541,
223
+ "eval_runtime": 19.1397,
224
+ "eval_samples_per_second": 19.802,
225
+ "eval_sensitivity": 0.7528089887640449,
226
+ "eval_specificty": 0.8557213930348259,
227
+ "eval_steps_per_second": 2.508,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 3.04,
232
+ "learning_rate": 0.00019392156862745098,
233
+ "loss": 0.4437,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 3.14,
238
+ "learning_rate": 0.00019372549019607845,
239
+ "loss": 0.3859,
240
+ "step": 320
241
+ },
242
+ {
243
+ "epoch": 3.24,
244
+ "learning_rate": 0.0001935294117647059,
245
+ "loss": 0.4995,
246
+ "step": 330
247
+ },
248
+ {
249
+ "epoch": 3.33,
250
+ "learning_rate": 0.00019333333333333333,
251
+ "loss": 0.4282,
252
+ "step": 340
253
+ },
254
+ {
255
+ "epoch": 3.43,
256
+ "learning_rate": 0.0001931372549019608,
257
+ "loss": 0.4742,
258
+ "step": 350
259
+ },
260
+ {
261
+ "epoch": 3.53,
262
+ "learning_rate": 0.00019294117647058825,
263
+ "loss": 0.4219,
264
+ "step": 360
265
+ },
266
+ {
267
+ "epoch": 3.63,
268
+ "learning_rate": 0.0001927450980392157,
269
+ "loss": 0.4531,
270
+ "step": 370
271
+ },
272
+ {
273
+ "epoch": 3.73,
274
+ "learning_rate": 0.00019254901960784316,
275
+ "loss": 0.545,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 3.82,
280
+ "learning_rate": 0.0001923529411764706,
281
+ "loss": 0.4216,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 3.92,
286
+ "learning_rate": 0.00019215686274509807,
287
+ "loss": 0.4834,
288
+ "step": 400
289
+ },
290
+ {
291
+ "epoch": 3.92,
292
+ "eval_accuracy": 0.7757255936675461,
293
+ "eval_auroc": 0.8596483872770976,
294
+ "eval_f1": 0.7492625368731564,
295
+ "eval_loss": 0.4720683693885803,
296
+ "eval_runtime": 19.0669,
297
+ "eval_samples_per_second": 19.877,
298
+ "eval_sensitivity": 0.7134831460674157,
299
+ "eval_specificty": 0.8308457711442786,
300
+ "eval_steps_per_second": 2.517,
301
+ "step": 400
302
+ },
303
+ {
304
+ "epoch": 4.02,
305
+ "learning_rate": 0.0001919607843137255,
306
+ "loss": 0.3281,
307
+ "step": 410
308
+ },
309
+ {
310
+ "epoch": 4.12,
311
+ "learning_rate": 0.00019176470588235295,
312
+ "loss": 0.5168,
313
+ "step": 420
314
+ },
315
+ {
316
+ "epoch": 4.22,
317
+ "learning_rate": 0.00019156862745098042,
318
+ "loss": 0.5219,
319
+ "step": 430
320
+ },
321
+ {
322
+ "epoch": 4.31,
323
+ "learning_rate": 0.00019137254901960786,
324
+ "loss": 0.4131,
325
+ "step": 440
326
+ },
327
+ {
328
+ "epoch": 4.41,
329
+ "learning_rate": 0.0001911764705882353,
330
+ "loss": 0.4494,
331
+ "step": 450
332
+ },
333
+ {
334
+ "epoch": 4.51,
335
+ "learning_rate": 0.00019098039215686277,
336
+ "loss": 0.4206,
337
+ "step": 460
338
+ },
339
+ {
340
+ "epoch": 4.61,
341
+ "learning_rate": 0.00019078431372549022,
342
+ "loss": 0.3932,
343
+ "step": 470
344
+ },
345
+ {
346
+ "epoch": 4.71,
347
+ "learning_rate": 0.00019058823529411766,
348
+ "loss": 0.4607,
349
+ "step": 480
350
+ },
351
+ {
352
+ "epoch": 4.8,
353
+ "learning_rate": 0.0001903921568627451,
354
+ "loss": 0.3892,
355
+ "step": 490
356
+ },
357
+ {
358
+ "epoch": 4.9,
359
+ "learning_rate": 0.00019019607843137254,
360
+ "loss": 0.4209,
361
+ "step": 500
362
+ },
363
+ {
364
+ "epoch": 4.9,
365
+ "eval_accuracy": 0.7862796833773087,
366
+ "eval_auroc": 0.8624853261780983,
367
+ "eval_f1": 0.7492260061919503,
368
+ "eval_loss": 0.47969380021095276,
369
+ "eval_runtime": 18.951,
370
+ "eval_samples_per_second": 19.999,
371
+ "eval_sensitivity": 0.6797752808988764,
372
+ "eval_specificty": 0.8805970149253731,
373
+ "eval_steps_per_second": 2.533,
374
+ "step": 500
375
+ },
376
+ {
377
+ "epoch": 5.0,
378
+ "learning_rate": 0.00019,
379
+ "loss": 0.3681,
380
+ "step": 510
381
+ },
382
+ {
383
+ "epoch": 5.1,
384
+ "learning_rate": 0.00018980392156862745,
385
+ "loss": 0.397,
386
+ "step": 520
387
+ },
388
+ {
389
+ "epoch": 5.2,
390
+ "learning_rate": 0.0001896078431372549,
391
+ "loss": 0.3833,
392
+ "step": 530
393
+ },
394
+ {
395
+ "epoch": 5.29,
396
+ "learning_rate": 0.00018941176470588236,
397
+ "loss": 0.4202,
398
+ "step": 540
399
+ },
400
+ {
401
+ "epoch": 5.39,
402
+ "learning_rate": 0.0001892156862745098,
403
+ "loss": 0.4304,
404
+ "step": 550
405
+ },
406
+ {
407
+ "epoch": 5.49,
408
+ "learning_rate": 0.00018901960784313725,
409
+ "loss": 0.3883,
410
+ "step": 560
411
+ },
412
+ {
413
+ "epoch": 5.59,
414
+ "learning_rate": 0.00018882352941176472,
415
+ "loss": 0.4737,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 5.69,
420
+ "learning_rate": 0.00018862745098039216,
421
+ "loss": 0.4661,
422
+ "step": 580
423
+ },
424
+ {
425
+ "epoch": 5.78,
426
+ "learning_rate": 0.0001884313725490196,
427
+ "loss": 0.3469,
428
+ "step": 590
429
+ },
430
+ {
431
+ "epoch": 5.88,
432
+ "learning_rate": 0.00018823529411764707,
433
+ "loss": 0.4567,
434
+ "step": 600
435
+ },
436
+ {
437
+ "epoch": 5.88,
438
+ "eval_accuracy": 0.7941952506596306,
439
+ "eval_auroc": 0.8687741069931243,
440
+ "eval_f1": 0.7328767123287672,
441
+ "eval_loss": 0.514951765537262,
442
+ "eval_runtime": 18.7983,
443
+ "eval_samples_per_second": 20.161,
444
+ "eval_sensitivity": 0.601123595505618,
445
+ "eval_specificty": 0.9651741293532339,
446
+ "eval_steps_per_second": 2.553,
447
+ "step": 600
448
+ },
449
+ {
450
+ "epoch": 5.88,
451
+ "step": 600,
452
+ "total_flos": 7.412108806635725e+17,
453
+ "train_loss": 0.4638279656569163,
454
+ "train_runtime": 457.0802,
455
+ "train_samples_per_second": 355.517,
456
+ "train_steps_per_second": 22.316
457
  }
458
  ],
459
  "logging_steps": 10,
460
+ "max_steps": 10200,
461
  "num_input_tokens_seen": 0,
462
+ "num_train_epochs": 100,
463
  "save_steps": 100,
464
+ "total_flos": 7.412108806635725e+17,
465
  "train_batch_size": 16,
466
  "trial_name": null,
467
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc54caf773d8dfee9791bdd5766a05bc8c92e8f4edfbd648c35360165e253b04
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00021469e2fc2da6e352c64ba5c1eff722ad7949ad69e804b54677deb2c22f06
3
  size 4283