groderg commited on
Commit
f59385d
1 Parent(s): 9cbd8dd

Evaluation on the test set completed on 2024_10_31.

Browse files
README.md CHANGED
@@ -16,12 +16,12 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [facebook/dinov2-large](https://huggingface.co/facebook/dinov2-large) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.6333
20
- - Rmse: 0.3468
21
- - Mae: 0.3060
22
- - R2: -1.9752
23
- - Explained Variance: 0.1029
24
- - Learning Rate: 0.0000
25
 
26
  ## Model description
27
 
@@ -53,66 +53,46 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rmse | Mae | R2 | Explained Variance | Rate |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:--------:|:------------------:|:------:|
56
- | No log | 1.0 | 2 | 0.7005 | 0.3966 | 0.3705 | -21.5069 | 0.0684 | 0.001 |
57
- | No log | 2.0 | 4 | 0.7249 | 0.4021 | 0.3746 | -26.3836 | 0.0822 | 0.001 |
58
- | No log | 3.0 | 6 | 0.7532 | 0.4114 | 0.3816 | -29.6868 | 0.1178 | 0.001 |
59
- | No log | 4.0 | 8 | 0.7681 | 0.4186 | 0.3850 | -29.0398 | 0.0566 | 0.001 |
60
- | No log | 5.0 | 10 | 0.7665 | 0.4178 | 0.3827 | -26.6101 | 0.0116 | 0.001 |
61
- | No log | 6.0 | 12 | 0.7594 | 0.4152 | 0.3779 | -24.2590 | -0.0414 | 0.001 |
62
- | No log | 7.0 | 14 | 0.7494 | 0.4108 | 0.3715 | -22.3016 | -0.1878 | 0.001 |
63
- | No log | 8.0 | 16 | 0.7214 | 0.3992 | 0.3610 | -20.1630 | -0.1876 | 0.0001 |
64
- | No log | 9.0 | 18 | 0.7013 | 0.3905 | 0.3530 | -18.6708 | -0.1643 | 0.0001 |
65
- | No log | 10.0 | 20 | 0.6869 | 0.3836 | 0.3467 | -17.4192 | -0.1505 | 0.0001 |
66
- | No log | 11.0 | 22 | 0.6764 | 0.3787 | 0.3425 | -16.5076 | -0.1282 | 0.0001 |
67
- | No log | 12.0 | 24 | 0.6669 | 0.3740 | 0.3384 | -16.0072 | -0.1085 | 0.0001 |
68
- | No log | 13.0 | 26 | 0.6617 | 0.3712 | 0.3358 | -15.5612 | -0.0882 | 0.0001 |
69
- | No log | 14.0 | 28 | 0.6557 | 0.3683 | 0.3332 | -14.8471 | -0.0399 | 0.0001 |
70
- | No log | 15.0 | 30 | 0.6517 | 0.3661 | 0.3313 | -14.3744 | -0.0149 | 0.0001 |
71
- | No log | 16.0 | 32 | 0.6494 | 0.3650 | 0.3302 | -14.0923 | 0.0009 | 0.0001 |
72
- | No log | 17.0 | 34 | 0.6469 | 0.3634 | 0.3284 | -14.0430 | 0.0076 | 0.0001 |
73
- | No log | 18.0 | 36 | 0.6455 | 0.3626 | 0.3275 | -13.8481 | 0.0275 | 0.0001 |
74
- | No log | 19.0 | 38 | 0.6437 | 0.3617 | 0.3270 | -13.7294 | 0.0458 | 0.0001 |
75
- | No log | 20.0 | 40 | 0.6426 | 0.3611 | 0.3265 | -13.4695 | 0.0571 | 0.0001 |
76
- | No log | 21.0 | 42 | 0.6414 | 0.3605 | 0.3256 | -13.4449 | 0.0581 | 0.0001 |
77
- | No log | 22.0 | 44 | 0.6422 | 0.3605 | 0.3257 | -13.3180 | 0.0542 | 0.0001 |
78
- | No log | 23.0 | 46 | 0.6407 | 0.3593 | 0.3246 | -13.2487 | 0.0755 | 0.0001 |
79
- | No log | 24.0 | 48 | 0.6375 | 0.3576 | 0.3230 | -13.2495 | 0.0741 | 0.0001 |
80
- | No log | 25.0 | 50 | 0.6332 | 0.3551 | 0.3205 | -12.9650 | 0.0843 | 0.0001 |
81
- | No log | 26.0 | 52 | 0.6316 | 0.3540 | 0.3191 | -12.7124 | 0.0903 | 0.0001 |
82
- | No log | 27.0 | 54 | 0.6298 | 0.3527 | 0.3176 | -12.5315 | 0.0972 | 0.0001 |
83
- | No log | 28.0 | 56 | 0.6287 | 0.3519 | 0.3168 | -12.3934 | 0.1010 | 0.0001 |
84
- | No log | 29.0 | 58 | 0.6279 | 0.3514 | 0.3163 | -12.3234 | 0.1064 | 0.0001 |
85
- | No log | 30.0 | 60 | 0.6246 | 0.3494 | 0.3141 | -12.2314 | 0.1160 | 0.0001 |
86
- | No log | 31.0 | 62 | 0.6211 | 0.3475 | 0.3123 | -12.0643 | 0.1264 | 0.0001 |
87
- | No log | 32.0 | 64 | 0.6218 | 0.3477 | 0.3125 | -11.9670 | 0.1294 | 0.0001 |
88
- | No log | 33.0 | 66 | 0.6202 | 0.3470 | 0.3120 | -11.7550 | 0.1365 | 0.0001 |
89
- | No log | 34.0 | 68 | 0.6191 | 0.3463 | 0.3111 | -11.6145 | 0.1364 | 0.0001 |
90
- | No log | 35.0 | 70 | 0.6174 | 0.3455 | 0.3105 | -11.5861 | 0.1400 | 0.0001 |
91
- | No log | 36.0 | 72 | 0.6195 | 0.3462 | 0.3109 | -11.7605 | 0.1398 | 0.0001 |
92
- | No log | 37.0 | 74 | 0.6210 | 0.3470 | 0.3114 | -11.7035 | 0.1367 | 0.0001 |
93
- | No log | 38.0 | 76 | 0.6201 | 0.3463 | 0.3107 | -11.6608 | 0.1387 | 0.0001 |
94
- | No log | 39.0 | 78 | 0.6195 | 0.3461 | 0.3106 | -11.6294 | 0.1362 | 0.0001 |
95
- | No log | 40.0 | 80 | 0.6195 | 0.3459 | 0.3101 | -11.6709 | 0.1279 | 0.0001 |
96
- | No log | 41.0 | 82 | 0.6196 | 0.3456 | 0.3095 | -11.4656 | 0.1154 | 0.0001 |
97
- | No log | 42.0 | 84 | 0.6185 | 0.3453 | 0.3096 | -11.4190 | 0.1220 | 1e-05 |
98
- | No log | 43.0 | 86 | 0.6196 | 0.3457 | 0.3099 | -11.4211 | 0.1224 | 1e-05 |
99
- | No log | 44.0 | 88 | 0.6175 | 0.3448 | 0.3091 | -11.3422 | 0.1252 | 1e-05 |
100
- | No log | 45.0 | 90 | 0.6148 | 0.3435 | 0.3079 | -11.2377 | 0.1267 | 1e-05 |
101
- | No log | 46.0 | 92 | 0.6156 | 0.3439 | 0.3081 | -11.2161 | 0.1232 | 1e-05 |
102
- | No log | 47.0 | 94 | 0.6162 | 0.3442 | 0.3084 | -11.2359 | 0.1219 | 1e-05 |
103
- | No log | 48.0 | 96 | 0.6153 | 0.3438 | 0.3079 | -11.1407 | 0.1218 | 1e-05 |
104
- | No log | 49.0 | 98 | 0.6142 | 0.3434 | 0.3075 | -11.0878 | 0.1259 | 1e-05 |
105
- | No log | 50.0 | 100 | 0.6125 | 0.3427 | 0.3071 | -11.1648 | 0.1241 | 1e-05 |
106
- | No log | 51.0 | 102 | 0.6131 | 0.3430 | 0.3072 | -11.2371 | 0.1274 | 1e-05 |
107
- | No log | 52.0 | 104 | 0.6137 | 0.3434 | 0.3077 | -11.3909 | 0.1274 | 1e-05 |
108
- | No log | 53.0 | 106 | 0.6139 | 0.3434 | 0.3077 | -11.5018 | 0.1224 | 1e-05 |
109
- | No log | 54.0 | 108 | 0.6157 | 0.3445 | 0.3089 | -11.6674 | 0.1222 | 1e-05 |
110
- | No log | 55.0 | 110 | 0.6168 | 0.3448 | 0.3090 | -11.6467 | 0.1222 | 1e-05 |
111
- | No log | 56.0 | 112 | 0.6140 | 0.3434 | 0.3077 | -11.4968 | 0.1250 | 1e-05 |
112
- | No log | 57.0 | 114 | 0.6133 | 0.3430 | 0.3071 | -11.5002 | 0.1216 | 0.0000 |
113
- | No log | 58.0 | 116 | 0.6130 | 0.3428 | 0.3070 | -11.4475 | 0.1210 | 0.0000 |
114
- | No log | 59.0 | 118 | 0.6150 | 0.3441 | 0.3083 | -11.5562 | 0.1178 | 0.0000 |
115
- | No log | 60.0 | 120 | 0.6167 | 0.3450 | 0.3092 | -11.4676 | 0.1243 | 0.0000 |
116
 
117
 
118
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [facebook/dinov2-large](https://huggingface.co/facebook/dinov2-large) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.6195
20
+ - Rmse: 0.3419
21
+ - Mae: 0.3068
22
+ - R2: -1.6131
23
+ - Explained Variance: 0.2071
24
+ - Learning Rate: 1e-05
25
 
26
  ## Model description
27
 
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rmse | Mae | R2 | Explained Variance | Rate |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:--------:|:------------------:|:------:|
56
+ | No log | 1.0 | 2 | 0.7150 | 0.4100 | 0.3849 | -20.2909 | 0.0364 | 0.001 |
57
+ | No log | 2.0 | 4 | 0.7314 | 0.4163 | 0.3895 | -21.2182 | 0.0241 | 0.001 |
58
+ | No log | 3.0 | 6 | 0.7726 | 0.4321 | 0.4041 | -24.8224 | -0.0469 | 0.001 |
59
+ | No log | 4.0 | 8 | 0.7917 | 0.4380 | 0.4095 | -26.5816 | -0.0667 | 0.001 |
60
+ | No log | 5.0 | 10 | 0.7853 | 0.4318 | 0.4021 | -26.9559 | -0.1362 | 0.001 |
61
+ | No log | 6.0 | 12 | 0.7648 | 0.4224 | 0.3905 | -24.4015 | -0.1297 | 0.001 |
62
+ | No log | 7.0 | 14 | 0.7392 | 0.4103 | 0.3760 | -22.5579 | -0.1098 | 0.001 |
63
+ | No log | 8.0 | 16 | 0.7115 | 0.3983 | 0.3639 | -20.0674 | -0.1054 | 0.0001 |
64
+ | No log | 9.0 | 18 | 0.6897 | 0.3879 | 0.3535 | -18.1665 | -0.0925 | 0.0001 |
65
+ | No log | 10.0 | 20 | 0.6777 | 0.3818 | 0.3468 | -16.9447 | -0.1029 | 0.0001 |
66
+ | No log | 11.0 | 22 | 0.6702 | 0.3780 | 0.3424 | -16.0375 | -0.1169 | 0.0001 |
67
+ | No log | 12.0 | 24 | 0.6639 | 0.3744 | 0.3389 | -15.6052 | -0.1121 | 0.0001 |
68
+ | No log | 13.0 | 26 | 0.6565 | 0.3703 | 0.3346 | -14.8051 | -0.1065 | 0.0001 |
69
+ | No log | 14.0 | 28 | 0.6501 | 0.3668 | 0.3310 | -14.2312 | -0.0958 | 0.0001 |
70
+ | No log | 15.0 | 30 | 0.6468 | 0.3648 | 0.3289 | -14.0799 | -0.0855 | 0.0001 |
71
+ | No log | 16.0 | 32 | 0.6471 | 0.3650 | 0.3289 | -14.2557 | -0.0823 | 0.0001 |
72
+ | No log | 17.0 | 34 | 0.6435 | 0.3631 | 0.3268 | -14.0598 | -0.0810 | 0.0001 |
73
+ | No log | 18.0 | 36 | 0.6438 | 0.3634 | 0.3270 | -14.0369 | -0.0799 | 0.0001 |
74
+ | No log | 19.0 | 38 | 0.6400 | 0.3614 | 0.3250 | -13.8152 | -0.0888 | 0.0001 |
75
+ | No log | 20.0 | 40 | 0.6392 | 0.3609 | 0.3246 | -13.7104 | -0.0935 | 0.0001 |
76
+ | No log | 21.0 | 42 | 0.6387 | 0.3606 | 0.3246 | -13.8099 | -0.0993 | 0.0001 |
77
+ | No log | 22.0 | 44 | 0.6388 | 0.3606 | 0.3243 | -13.8497 | -0.1056 | 0.0001 |
78
+ | No log | 23.0 | 46 | 0.6362 | 0.3590 | 0.3228 | -13.5622 | -0.1035 | 0.0001 |
79
+ | No log | 24.0 | 48 | 0.6354 | 0.3585 | 0.3223 | -13.6453 | -0.1058 | 0.0001 |
80
+ | No log | 25.0 | 50 | 0.6345 | 0.3578 | 0.3214 | -13.6023 | -0.1036 | 0.0001 |
81
+ | No log | 26.0 | 52 | 0.6349 | 0.3581 | 0.3212 | -13.6304 | -0.1173 | 0.0001 |
82
+ | No log | 27.0 | 54 | 0.6333 | 0.3571 | 0.3201 | -13.5613 | -0.1148 | 0.0001 |
83
+ | No log | 28.0 | 56 | 0.6295 | 0.3548 | 0.3177 | -13.2331 | -0.1083 | 0.0001 |
84
+ | No log | 29.0 | 58 | 0.6285 | 0.3543 | 0.3173 | -13.1623 | -0.1047 | 0.0001 |
85
+ | No log | 30.0 | 60 | 0.6263 | 0.3532 | 0.3163 | -12.7132 | -0.0926 | 0.0001 |
86
+ | No log | 31.0 | 62 | 0.6273 | 0.3538 | 0.3167 | -12.8739 | -0.0893 | 0.0001 |
87
+ | No log | 32.0 | 64 | 0.6294 | 0.3550 | 0.3181 | -12.9355 | -0.0790 | 0.0001 |
88
+ | No log | 33.0 | 66 | 0.6299 | 0.3554 | 0.3185 | -12.9352 | -0.0752 | 0.0001 |
89
+ | No log | 34.0 | 68 | 0.6321 | 0.3564 | 0.3193 | -13.2672 | -0.0702 | 0.0001 |
90
+ | No log | 35.0 | 70 | 0.6279 | 0.3541 | 0.3175 | -12.9995 | -0.0487 | 0.0001 |
91
+ | No log | 36.0 | 72 | 0.6280 | 0.3541 | 0.3174 | -13.0074 | -0.0466 | 0.0001 |
92
+ | No log | 37.0 | 74 | 0.6304 | 0.3554 | 0.3187 | -13.2310 | -0.0494 | 1e-05 |
93
+ | No log | 38.0 | 76 | 0.6297 | 0.3551 | 0.3183 | -12.9830 | -0.0439 | 1e-05 |
94
+ | No log | 39.0 | 78 | 0.6308 | 0.3558 | 0.3193 | -13.1598 | -0.0430 | 1e-05 |
95
+ | No log | 40.0 | 80 | 0.6292 | 0.3548 | 0.3183 | -13.0698 | -0.0435 | 1e-05 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  ### Framework versions
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 60.0,
3
- "eval_explained_variance": 0.10288698971271515,
4
- "eval_loss": 0.633324384689331,
5
- "eval_mae": 0.30597275495529175,
6
- "eval_r2": -1.975172758102417,
7
- "eval_rmse": 0.3468049466609955,
8
- "eval_runtime": 1.0485,
9
- "eval_samples_per_second": 47.685,
10
- "eval_steps_per_second": 1.907,
11
- "learning_rate": 1.0000000000000002e-06,
12
- "total_flos": 4.4402778184752e+17,
13
- "train_loss": 0.6329069137573242,
14
- "train_runtime": 375.0945,
15
- "train_samples_per_second": 19.995,
16
- "train_steps_per_second": 0.8
17
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_explained_variance": 0.20714369416236877,
4
+ "eval_loss": 0.6195399761199951,
5
+ "eval_mae": 0.3067561388015747,
6
+ "eval_r2": -1.6130945682525635,
7
+ "eval_rmse": 0.34193673729896545,
8
+ "eval_runtime": 1.099,
9
+ "eval_samples_per_second": 45.496,
10
+ "eval_steps_per_second": 1.82,
11
+ "learning_rate": 1e-05,
12
+ "total_flos": 2.9601852123168e+17,
13
+ "train_loss": 0.64580397605896,
14
+ "train_runtime": 275.9938,
15
+ "train_samples_per_second": 27.175,
16
+ "train_steps_per_second": 1.087
17
  }
config.json CHANGED
@@ -1,107 +1,91 @@
1
  {
2
- "_name_or_path": "facebook/dinov2-large",
3
- "apply_layernorm": true,
4
- "architectures": [
5
- "Dinov2ForImageClassification"
6
- ],
7
- "attention_probs_dropout_prob": 0.0,
8
- "drop_path_rate": 0.0,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.0,
11
- "hidden_size": 1024,
12
- "id2label": {
13
- "0": "Acropore_branched",
14
- "1": "Acropore_digitised",
15
- "2": "Acropore_tabular",
16
- "3": "Algae",
17
- "4": "Dead_coral",
18
- "5": "Fish",
19
- "6": "Millepore",
20
- "7": "No_acropore_encrusting",
21
- "8": "No_acropore_massive",
22
- "9": "No_acropore_sub_massive",
23
- "10": "Rock",
24
- "11": "Rubble",
25
- "12": "Sand"
26
- },
27
- "image_size": 164,
28
- "initializer_range": 0.02,
29
- "label2id": {
30
- "Acropore_branched": 0,
31
- "Acropore_digitised": 1,
32
- "Acropore_tabular": 2,
33
- "Algae": 3,
34
- "Dead_coral": 4,
35
- "Fish": 5,
36
- "Millepore": 6,
37
- "No_acropore_encrusting": 7,
38
- "No_acropore_massive": 8,
39
- "No_acropore_sub_massive": 9,
40
- "Rock": 10,
41
- "Rubble": 11,
42
- "Sand": 12
43
- },
44
- "layer_norm_eps": 1e-06,
45
- "layerscale_value": 1.0,
46
- "mlp_ratio": 4,
47
- "model_type": "dinov2",
48
- "num_attention_heads": 16,
49
- "num_channels": 3,
50
- "num_hidden_layers": 24,
51
- "out_features": [
52
- "stage24"
53
- ],
54
- "out_indices": [
55
- 24
56
- ],
57
- "patch_size": 14,
58
- "problem_type": "multi_label_classification",
59
- "qkv_bias": true,
60
- "reshape_hidden_states": true,
61
- "stage_names": [
62
- "stem",
63
- "stage1",
64
- "stage2",
65
- "stage3",
66
- "stage4",
67
- "stage5",
68
- "stage6",
69
- "stage7",
70
- "stage8",
71
- "stage9",
72
- "stage10",
73
- "stage11",
74
- "stage12",
75
- "stage13",
76
- "stage14",
77
- "stage15",
78
- "stage16",
79
- "stage17",
80
- "stage18",
81
- "stage19",
82
- "stage20",
83
- "stage21",
84
- "stage22",
85
- "stage23",
86
- "stage24"
87
- ],
88
- "torch_dtype": "float32",
89
- "transformers_version": "4.44.2",
90
- "use_swiglu_ffn": false,
91
- "initial_learning_rate": 0.001,
92
- "train_batch_size": 32,
93
- "eval_batch_size": 32,
94
- "optimizer": {
95
- "type": "Adam"
96
- },
97
- "lr_scheduler_type": {
98
- "type": "ReduceLROnPlateau"
99
- },
100
- "patience_lr_scheduler": 5,
101
- "factor_lr_scheduler": 0.1,
102
- "weight_decay": 0.0001,
103
- "early_stopping_patience": 10,
104
- "freeze_encoder": true,
105
- "data_augmentation": true,
106
- "num_epochs": 150
107
- }
 
1
  {
2
+ "_name_or_path": "facebook/dinov2-large",
3
+ "apply_layernorm": true,
4
+ "architectures": [
5
+ "Dinov2ForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "drop_path_rate": 0.0,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "Acropore_branched",
14
+ "1": "Acropore_digitised",
15
+ "2": "Acropore_tabular",
16
+ "3": "Algae",
17
+ "4": "Dead_coral",
18
+ "5": "Fish",
19
+ "6": "Millepore",
20
+ "7": "No_acropore_encrusting",
21
+ "8": "No_acropore_massive",
22
+ "9": "No_acropore_sub_massive",
23
+ "10": "Rock",
24
+ "11": "Rubble",
25
+ "12": "Sand"
26
+ },
27
+ "image_size": 164,
28
+ "initializer_range": 0.02,
29
+ "label2id": {
30
+ "Acropore_branched": 0,
31
+ "Acropore_digitised": 1,
32
+ "Acropore_tabular": 2,
33
+ "Algae": 3,
34
+ "Dead_coral": 4,
35
+ "Fish": 5,
36
+ "Millepore": 6,
37
+ "No_acropore_encrusting": 7,
38
+ "No_acropore_massive": 8,
39
+ "No_acropore_sub_massive": 9,
40
+ "Rock": 10,
41
+ "Rubble": 11,
42
+ "Sand": 12
43
+ },
44
+ "layer_norm_eps": 1e-06,
45
+ "layerscale_value": 1.0,
46
+ "mlp_ratio": 4,
47
+ "model_type": "dinov2",
48
+ "num_attention_heads": 16,
49
+ "num_channels": 3,
50
+ "num_hidden_layers": 24,
51
+ "out_features": [
52
+ "stage24"
53
+ ],
54
+ "out_indices": [
55
+ 24
56
+ ],
57
+ "patch_size": 14,
58
+ "problem_type": "multi_label_classification",
59
+ "qkv_bias": true,
60
+ "reshape_hidden_states": true,
61
+ "stage_names": [
62
+ "stem",
63
+ "stage1",
64
+ "stage2",
65
+ "stage3",
66
+ "stage4",
67
+ "stage5",
68
+ "stage6",
69
+ "stage7",
70
+ "stage8",
71
+ "stage9",
72
+ "stage10",
73
+ "stage11",
74
+ "stage12",
75
+ "stage13",
76
+ "stage14",
77
+ "stage15",
78
+ "stage16",
79
+ "stage17",
80
+ "stage18",
81
+ "stage19",
82
+ "stage20",
83
+ "stage21",
84
+ "stage22",
85
+ "stage23",
86
+ "stage24"
87
+ ],
88
+ "torch_dtype": "float32",
89
+ "transformers_version": "4.44.2",
90
+ "use_swiglu_ffn": false
91
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/events.out.tfevents.1730353344.datavisu3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599619c4bf040e2e2108488a096b7ab47b8d33f71ba2deaec16843b577491876
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da2ac5ad39216a587fed6417c22c3f0c9fbe120eb5f2fda537c771f1c00547eb
3
+ size 562
logs/events.out.tfevents.1730354208.datavisu4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f09a86ceb3ba5902bb455494bc444ad34068b80a03b84a1af738deee197d3aa9
3
+ size 27578
logs/events.out.tfevents.1730354489.datavisu4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d595ee38637c40e64c90e923415ce5643091f9cce108836d577f42de7b55d900
3
+ size 40
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec42591f36bdfb80f97bb92cd3915ad2b8ba59617c4a381b145ce5e3de2f726
3
  size 1222958756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb8018d733bef1a8345ef794b9f74c454fe90a64f664c4f475de6f2ce48c64d
3
  size 1222958756
runs/events.out.tfevents.1730352882.datavisu3 CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4f2c5da543c6ae5472319e39eb74aed35ce6a0508f4bfe799213764e16657e
3
+ size 40
runs/events.out.tfevents.1730354197.datavisu4 ADDED
File without changes
test_results.json CHANGED
@@ -1 +1,12 @@
1
- {"test_loss": 0.6322654485702515, "test_model_preparation_time": 0.0067, "test_rmse": 0.34622713923454285, "test_mae": 0.30555403232574463, "test_r2": -1.965573787689209, "test_explained_variance": 0.10564449429512024, "test_mse_per_class": [0.26136571168899536, 0.29893758893013, 0.36430591344833374, 0.42200082540512085, 0.3574856221675873, 0.26048266887664795, 0.434121310710907, 0.4173690378665924, 0.40536266565322876, 0.2731235921382904, 0.3835618793964386, 0.2693272531032562, 0.3535088896751404], "test_f1_micro": 0.9515717926932881, "test_f1_macro": 0.9342119494403047, "test_accuracy": 0.3, "test_f1_per_class": [0.9397590361445783, 0.6511627906976745, 1.0, 1.0, 0.8, 0.8674698795180723, 1.0, 1.0, 1.0, 0.8863636363636364, 1.0, 1.0, 1.0], "test_runtime": 0.5432, "test_samples_per_second": 92.049, "test_steps_per_second": 12.887}
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_explained_variance": 0.20714369416236877,
4
+ "eval_loss": 0.6195399761199951,
5
+ "eval_mae": 0.3067561388015747,
6
+ "eval_r2": -1.6130945682525635,
7
+ "eval_rmse": 0.34193673729896545,
8
+ "eval_runtime": 1.099,
9
+ "eval_samples_per_second": 45.496,
10
+ "eval_steps_per_second": 1.82,
11
+ "learning_rate": 1e-05
12
+ }
threshold.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Acropore_branched": 0.085, "Acropore_digitised": 0.431, "Acropore_tabular": 0.0, "Algae": 0.0, "Dead_coral": 0.17, "Fish": 0.009, "Millepore": 0.0, "No_acropore_encrusting": 0.0, "No_acropore_massive": 0.0, "No_acropore_sub_massive": 0.077, "Rock": 0.0, "Rubble": 0.0, "Sand": 0.0}
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 60.0,
3
- "learning_rate": 1.0000000000000002e-06,
4
- "total_flos": 4.4402778184752e+17,
5
- "train_loss": 0.6329069137573242,
6
- "train_runtime": 375.0945,
7
- "train_samples_per_second": 19.995,
8
- "train_steps_per_second": 0.8
9
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "learning_rate": 1e-05,
4
+ "total_flos": 2.9601852123168e+17,
5
+ "train_loss": 0.64580397605896,
6
+ "train_runtime": 275.9938,
7
+ "train_samples_per_second": 27.175,
8
+ "train_steps_per_second": 1.087
9
  }
trainer_state.json CHANGED
@@ -1,802 +1,542 @@
1
  {
2
- "best_metric": 0.6125256419181824,
3
- "best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Ziboiai-large-2024_10_31-prova_batch-size32_freeze_probs/checkpoint-100",
4
- "epoch": 60.0,
5
  "eval_steps": 500,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_explained_variance": 0.06840751320123672,
14
- "eval_loss": 0.7004616260528564,
15
- "eval_mae": 0.37051576375961304,
16
- "eval_r2": -21.506893157958984,
17
- "eval_rmse": 0.3965546190738678,
18
- "eval_runtime": 1.2359,
19
- "eval_samples_per_second": 40.458,
20
- "eval_steps_per_second": 1.618,
21
  "learning_rate": 0.001,
22
  "step": 2
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_explained_variance": 0.08221305161714554,
27
- "eval_loss": 0.7249369621276855,
28
- "eval_mae": 0.37463027238845825,
29
- "eval_r2": -26.38355827331543,
30
- "eval_rmse": 0.40212786197662354,
31
- "eval_runtime": 0.3187,
32
- "eval_samples_per_second": 156.871,
33
- "eval_steps_per_second": 6.275,
34
  "learning_rate": 0.001,
35
  "step": 4
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_explained_variance": 0.11782943457365036,
40
- "eval_loss": 0.7532215118408203,
41
- "eval_mae": 0.3815617859363556,
42
- "eval_r2": -29.68680763244629,
43
- "eval_rmse": 0.4113939702510834,
44
- "eval_runtime": 0.3018,
45
- "eval_samples_per_second": 165.696,
46
- "eval_steps_per_second": 6.628,
47
  "learning_rate": 0.001,
48
  "step": 6
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_explained_variance": 0.05660984292626381,
53
- "eval_loss": 0.7681224942207336,
54
- "eval_mae": 0.384958416223526,
55
- "eval_r2": -29.039794921875,
56
- "eval_rmse": 0.4185601472854614,
57
- "eval_runtime": 0.3198,
58
- "eval_samples_per_second": 156.355,
59
- "eval_steps_per_second": 6.254,
60
  "learning_rate": 0.001,
61
  "step": 8
62
  },
63
  {
64
  "epoch": 5.0,
65
- "eval_explained_variance": 0.011602365411818027,
66
- "eval_loss": 0.7665389776229858,
67
- "eval_mae": 0.3826686143875122,
68
- "eval_r2": -26.610107421875,
69
- "eval_rmse": 0.4177513122558594,
70
- "eval_runtime": 0.2997,
71
- "eval_samples_per_second": 166.846,
72
- "eval_steps_per_second": 6.674,
73
  "learning_rate": 0.001,
74
  "step": 10
75
  },
76
  {
77
  "epoch": 6.0,
78
- "eval_explained_variance": -0.041439518332481384,
79
- "eval_loss": 0.7593734860420227,
80
- "eval_mae": 0.37791818380355835,
81
- "eval_r2": -24.258955001831055,
82
- "eval_rmse": 0.41521450877189636,
83
- "eval_runtime": 0.3036,
84
- "eval_samples_per_second": 164.675,
85
- "eval_steps_per_second": 6.587,
86
  "learning_rate": 0.001,
87
  "step": 12
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_explained_variance": -0.18780630826950073,
92
- "eval_loss": 0.7493597269058228,
93
- "eval_mae": 0.3714950978755951,
94
- "eval_r2": -22.30160903930664,
95
- "eval_rmse": 0.41082921624183655,
96
- "eval_runtime": 0.3045,
97
- "eval_samples_per_second": 164.187,
98
- "eval_steps_per_second": 6.567,
99
  "learning_rate": 0.001,
100
  "step": 14
101
  },
102
  {
103
  "epoch": 8.0,
104
- "eval_explained_variance": -0.1876310557126999,
105
- "eval_loss": 0.7214367389678955,
106
- "eval_mae": 0.36099693179130554,
107
- "eval_r2": -20.16297149658203,
108
- "eval_rmse": 0.3992151916027069,
109
- "eval_runtime": 0.325,
110
- "eval_samples_per_second": 153.846,
111
- "eval_steps_per_second": 6.154,
112
  "learning_rate": 0.0001,
113
  "step": 16
114
  },
115
  {
116
  "epoch": 9.0,
117
- "eval_explained_variance": -0.164337158203125,
118
- "eval_loss": 0.7012718915939331,
119
- "eval_mae": 0.352971613407135,
120
- "eval_r2": -18.67078971862793,
121
- "eval_rmse": 0.3905433118343353,
122
- "eval_runtime": 0.3097,
123
- "eval_samples_per_second": 161.468,
124
- "eval_steps_per_second": 6.459,
125
  "learning_rate": 0.0001,
126
  "step": 18
127
  },
128
  {
129
  "epoch": 10.0,
130
- "eval_explained_variance": -0.15046803653240204,
131
- "eval_loss": 0.6868528723716736,
132
- "eval_mae": 0.346741646528244,
133
- "eval_r2": -17.41924476623535,
134
- "eval_rmse": 0.3835551142692566,
135
- "eval_runtime": 0.3073,
136
- "eval_samples_per_second": 162.696,
137
- "eval_steps_per_second": 6.508,
138
  "learning_rate": 0.0001,
139
  "step": 20
140
  },
141
  {
142
  "epoch": 11.0,
143
- "eval_explained_variance": -0.12824733555316925,
144
- "eval_loss": 0.6764166951179504,
145
- "eval_mae": 0.34248146414756775,
146
- "eval_r2": -16.507604598999023,
147
- "eval_rmse": 0.3787185549736023,
148
- "eval_runtime": 0.3242,
149
- "eval_samples_per_second": 154.219,
150
- "eval_steps_per_second": 6.169,
151
  "learning_rate": 0.0001,
152
  "step": 22
153
  },
154
  {
155
  "epoch": 12.0,
156
- "eval_explained_variance": -0.10852393507957458,
157
- "eval_loss": 0.6669396758079529,
158
- "eval_mae": 0.33840715885162354,
159
- "eval_r2": -16.007225036621094,
160
- "eval_rmse": 0.3739539384841919,
161
- "eval_runtime": 0.3298,
162
- "eval_samples_per_second": 151.614,
163
- "eval_steps_per_second": 6.065,
164
  "learning_rate": 0.0001,
165
  "step": 24
166
  },
167
  {
168
  "epoch": 13.0,
169
- "eval_explained_variance": -0.08818976581096649,
170
- "eval_loss": 0.661744236946106,
171
- "eval_mae": 0.3357750177383423,
172
- "eval_r2": -15.561150550842285,
173
- "eval_rmse": 0.3711845278739929,
174
- "eval_runtime": 0.3053,
175
- "eval_samples_per_second": 163.757,
176
- "eval_steps_per_second": 6.55,
177
  "learning_rate": 0.0001,
178
  "step": 26
179
  },
180
  {
181
  "epoch": 14.0,
182
- "eval_explained_variance": -0.03988352045416832,
183
- "eval_loss": 0.6557328701019287,
184
- "eval_mae": 0.33316293358802795,
185
- "eval_r2": -14.847132682800293,
186
- "eval_rmse": 0.3683193624019623,
187
- "eval_runtime": 0.3055,
188
- "eval_samples_per_second": 163.645,
189
- "eval_steps_per_second": 6.546,
190
  "learning_rate": 0.0001,
191
  "step": 28
192
  },
193
  {
194
  "epoch": 15.0,
195
- "eval_explained_variance": -0.014903265982866287,
196
- "eval_loss": 0.6516609787940979,
197
- "eval_mae": 0.33125975728034973,
198
- "eval_r2": -14.374401092529297,
199
- "eval_rmse": 0.36609962582588196,
200
- "eval_runtime": 0.3245,
201
- "eval_samples_per_second": 154.102,
202
- "eval_steps_per_second": 6.164,
203
  "learning_rate": 0.0001,
204
  "step": 30
205
  },
206
  {
207
  "epoch": 16.0,
208
- "eval_explained_variance": 0.0008760415366850793,
209
- "eval_loss": 0.6493728756904602,
210
- "eval_mae": 0.33018919825553894,
211
- "eval_r2": -14.09227180480957,
212
- "eval_rmse": 0.36498352885246277,
213
- "eval_runtime": 0.2973,
214
- "eval_samples_per_second": 168.196,
215
- "eval_steps_per_second": 6.728,
216
  "learning_rate": 0.0001,
217
  "step": 32
218
  },
219
  {
220
  "epoch": 17.0,
221
- "eval_explained_variance": 0.007616698741912842,
222
- "eval_loss": 0.6469070315361023,
223
- "eval_mae": 0.3284085690975189,
224
- "eval_r2": -14.042997360229492,
225
- "eval_rmse": 0.3634098768234253,
226
- "eval_runtime": 0.2968,
227
- "eval_samples_per_second": 168.439,
228
- "eval_steps_per_second": 6.738,
229
  "learning_rate": 0.0001,
230
  "step": 34
231
  },
232
  {
233
  "epoch": 18.0,
234
- "eval_explained_variance": 0.027522683143615723,
235
- "eval_loss": 0.6455032825469971,
236
- "eval_mae": 0.327472984790802,
237
- "eval_r2": -13.848074913024902,
238
- "eval_rmse": 0.3625529706478119,
239
- "eval_runtime": 0.3023,
240
- "eval_samples_per_second": 165.395,
241
- "eval_steps_per_second": 6.616,
242
  "learning_rate": 0.0001,
243
  "step": 36
244
  },
245
  {
246
  "epoch": 19.0,
247
- "eval_explained_variance": 0.04580416530370712,
248
- "eval_loss": 0.64373379945755,
249
- "eval_mae": 0.3270127475261688,
250
- "eval_r2": -13.729372024536133,
251
- "eval_rmse": 0.36171066761016846,
252
- "eval_runtime": 0.2975,
253
- "eval_samples_per_second": 168.069,
254
- "eval_steps_per_second": 6.723,
255
  "learning_rate": 0.0001,
256
  "step": 38
257
  },
258
  {
259
  "epoch": 20.0,
260
- "eval_explained_variance": 0.05705893412232399,
261
- "eval_loss": 0.6426065564155579,
262
- "eval_mae": 0.3264869153499603,
263
- "eval_r2": -13.46953296661377,
264
- "eval_rmse": 0.3610925078392029,
265
- "eval_runtime": 0.303,
266
- "eval_samples_per_second": 164.994,
267
- "eval_steps_per_second": 6.6,
268
  "learning_rate": 0.0001,
269
  "step": 40
270
  },
271
  {
272
  "epoch": 21.0,
273
- "eval_explained_variance": 0.058054760098457336,
274
- "eval_loss": 0.6413648128509521,
275
- "eval_mae": 0.32564398646354675,
276
- "eval_r2": -13.444860458374023,
277
- "eval_rmse": 0.36047738790512085,
278
- "eval_runtime": 0.2971,
279
- "eval_samples_per_second": 168.316,
280
- "eval_steps_per_second": 6.733,
281
  "learning_rate": 0.0001,
282
  "step": 42
283
  },
284
  {
285
  "epoch": 22.0,
286
- "eval_explained_variance": 0.05415903031826019,
287
- "eval_loss": 0.6421814560890198,
288
- "eval_mae": 0.32565081119537354,
289
- "eval_r2": -13.317983627319336,
290
- "eval_rmse": 0.360535204410553,
291
- "eval_runtime": 0.2965,
292
- "eval_samples_per_second": 168.653,
293
- "eval_steps_per_second": 6.746,
294
  "learning_rate": 0.0001,
295
  "step": 44
296
  },
297
  {
298
  "epoch": 23.0,
299
- "eval_explained_variance": 0.07554426789283752,
300
- "eval_loss": 0.6406619548797607,
301
- "eval_mae": 0.3246156871318817,
302
- "eval_r2": -13.248732566833496,
303
- "eval_rmse": 0.35933929681777954,
304
- "eval_runtime": 0.2972,
305
- "eval_samples_per_second": 168.22,
306
- "eval_steps_per_second": 6.729,
307
  "learning_rate": 0.0001,
308
  "step": 46
309
  },
310
  {
311
  "epoch": 24.0,
312
- "eval_explained_variance": 0.07407143712043762,
313
- "eval_loss": 0.637528121471405,
314
- "eval_mae": 0.32300955057144165,
315
- "eval_r2": -13.249483108520508,
316
- "eval_rmse": 0.35758209228515625,
317
- "eval_runtime": 0.3004,
318
- "eval_samples_per_second": 166.463,
319
- "eval_steps_per_second": 6.659,
320
  "learning_rate": 0.0001,
321
  "step": 48
322
  },
323
  {
324
  "epoch": 25.0,
325
- "eval_explained_variance": 0.08428207039833069,
326
- "eval_loss": 0.6331558227539062,
327
- "eval_mae": 0.3205055892467499,
328
- "eval_r2": -12.964966773986816,
329
- "eval_rmse": 0.35511815547943115,
330
- "eval_runtime": 0.2968,
331
- "eval_samples_per_second": 168.439,
332
- "eval_steps_per_second": 6.738,
333
  "learning_rate": 0.0001,
334
  "step": 50
335
  },
336
  {
337
  "epoch": 26.0,
338
- "eval_explained_variance": 0.0902879610657692,
339
- "eval_loss": 0.6315688490867615,
340
- "eval_mae": 0.3191172182559967,
341
- "eval_r2": -12.712376594543457,
342
- "eval_rmse": 0.3540325164794922,
343
- "eval_runtime": 0.3046,
344
- "eval_samples_per_second": 164.139,
345
- "eval_steps_per_second": 6.566,
346
  "learning_rate": 0.0001,
347
  "step": 52
348
  },
349
  {
350
  "epoch": 27.0,
351
- "eval_explained_variance": 0.09721990674734116,
352
- "eval_loss": 0.6297861337661743,
353
- "eval_mae": 0.3175807297229767,
354
- "eval_r2": -12.53150749206543,
355
- "eval_rmse": 0.3526574671268463,
356
- "eval_runtime": 0.2981,
357
- "eval_samples_per_second": 167.733,
358
- "eval_steps_per_second": 6.709,
359
  "learning_rate": 0.0001,
360
  "step": 54
361
  },
362
  {
363
  "epoch": 28.0,
364
- "eval_explained_variance": 0.10100533068180084,
365
- "eval_loss": 0.6287277340888977,
366
- "eval_mae": 0.3168259263038635,
367
- "eval_r2": -12.393423080444336,
368
- "eval_rmse": 0.35192248225212097,
369
- "eval_runtime": 0.305,
370
- "eval_samples_per_second": 163.956,
371
- "eval_steps_per_second": 6.558,
372
  "learning_rate": 0.0001,
373
  "step": 56
374
  },
375
  {
376
  "epoch": 29.0,
377
- "eval_explained_variance": 0.1064082533121109,
378
- "eval_loss": 0.627882719039917,
379
- "eval_mae": 0.3163486123085022,
380
- "eval_r2": -12.323440551757812,
381
- "eval_rmse": 0.35138675570487976,
382
- "eval_runtime": 0.2979,
383
- "eval_samples_per_second": 167.823,
384
- "eval_steps_per_second": 6.713,
385
  "learning_rate": 0.0001,
386
  "step": 58
387
  },
388
  {
389
  "epoch": 30.0,
390
- "eval_explained_variance": 0.1159624308347702,
391
- "eval_loss": 0.6246291995048523,
392
- "eval_mae": 0.3141040503978729,
393
- "eval_r2": -12.231378555297852,
394
- "eval_rmse": 0.3494300842285156,
395
- "eval_runtime": 0.2969,
396
- "eval_samples_per_second": 168.407,
397
- "eval_steps_per_second": 6.736,
398
  "learning_rate": 0.0001,
399
  "step": 60
400
  },
401
  {
402
  "epoch": 31.0,
403
- "eval_explained_variance": 0.1264413446187973,
404
- "eval_loss": 0.6210973262786865,
405
- "eval_mae": 0.31232017278671265,
406
- "eval_r2": -12.064345359802246,
407
- "eval_rmse": 0.34745490550994873,
408
- "eval_runtime": 0.2987,
409
- "eval_samples_per_second": 167.391,
410
- "eval_steps_per_second": 6.696,
411
  "learning_rate": 0.0001,
412
  "step": 62
413
  },
414
  {
415
  "epoch": 32.0,
416
- "eval_explained_variance": 0.12937316298484802,
417
- "eval_loss": 0.6217814087867737,
418
- "eval_mae": 0.31251102685928345,
419
- "eval_r2": -11.966958045959473,
420
- "eval_rmse": 0.3476860225200653,
421
- "eval_runtime": 0.2966,
422
- "eval_samples_per_second": 168.6,
423
- "eval_steps_per_second": 6.744,
424
  "learning_rate": 0.0001,
425
  "step": 64
426
  },
427
  {
428
  "epoch": 33.0,
429
- "eval_explained_variance": 0.13646358251571655,
430
- "eval_loss": 0.6202294230461121,
431
- "eval_mae": 0.3120400309562683,
432
- "eval_r2": -11.754964828491211,
433
- "eval_rmse": 0.3470361530780792,
434
- "eval_runtime": 0.2983,
435
- "eval_samples_per_second": 167.594,
436
- "eval_steps_per_second": 6.704,
437
  "learning_rate": 0.0001,
438
  "step": 66
439
  },
440
  {
441
  "epoch": 34.0,
442
- "eval_explained_variance": 0.13642433285713196,
443
- "eval_loss": 0.6191384792327881,
444
- "eval_mae": 0.31109973788261414,
445
- "eval_r2": -11.614458084106445,
446
- "eval_rmse": 0.34630581736564636,
447
- "eval_runtime": 0.2968,
448
- "eval_samples_per_second": 168.491,
449
- "eval_steps_per_second": 6.74,
450
  "learning_rate": 0.0001,
451
  "step": 68
452
  },
453
  {
454
  "epoch": 35.0,
455
- "eval_explained_variance": 0.1399742215871811,
456
- "eval_loss": 0.617404043674469,
457
- "eval_mae": 0.31046897172927856,
458
- "eval_r2": -11.586063385009766,
459
- "eval_rmse": 0.3454545736312866,
460
- "eval_runtime": 0.3043,
461
- "eval_samples_per_second": 164.292,
462
- "eval_steps_per_second": 6.572,
463
  "learning_rate": 0.0001,
464
  "step": 70
465
  },
466
  {
467
  "epoch": 36.0,
468
- "eval_explained_variance": 0.13982802629470825,
469
- "eval_loss": 0.6194512248039246,
470
- "eval_mae": 0.31094375252723694,
471
- "eval_r2": -11.760537147521973,
472
- "eval_rmse": 0.34623825550079346,
473
- "eval_runtime": 0.3044,
474
- "eval_samples_per_second": 164.269,
475
- "eval_steps_per_second": 6.571,
476
  "learning_rate": 0.0001,
477
  "step": 72
478
  },
479
  {
480
  "epoch": 37.0,
481
- "eval_explained_variance": 0.13672398030757904,
482
- "eval_loss": 0.6209845542907715,
483
- "eval_mae": 0.3114151358604431,
484
- "eval_r2": -11.703511238098145,
485
- "eval_rmse": 0.34697073698043823,
486
- "eval_runtime": 0.2966,
487
- "eval_samples_per_second": 168.568,
488
- "eval_steps_per_second": 6.743,
489
- "learning_rate": 0.0001,
490
  "step": 74
491
  },
492
  {
493
  "epoch": 38.0,
494
- "eval_explained_variance": 0.1387355923652649,
495
- "eval_loss": 0.6201078295707703,
496
- "eval_mae": 0.31073129177093506,
497
- "eval_r2": -11.660818099975586,
498
- "eval_rmse": 0.3463174104690552,
499
- "eval_runtime": 0.3051,
500
- "eval_samples_per_second": 163.878,
501
- "eval_steps_per_second": 6.555,
502
- "learning_rate": 0.0001,
503
  "step": 76
504
  },
505
  {
506
  "epoch": 39.0,
507
- "eval_explained_variance": 0.1361588090658188,
508
- "eval_loss": 0.6195101141929626,
509
- "eval_mae": 0.31057408452033997,
510
- "eval_r2": -11.629423141479492,
511
- "eval_rmse": 0.34605538845062256,
512
- "eval_runtime": 0.3059,
513
- "eval_samples_per_second": 163.478,
514
- "eval_steps_per_second": 6.539,
515
- "learning_rate": 0.0001,
516
  "step": 78
517
  },
518
  {
519
  "epoch": 40.0,
520
- "eval_explained_variance": 0.12785865366458893,
521
- "eval_loss": 0.619523823261261,
522
- "eval_mae": 0.3101230263710022,
523
- "eval_r2": -11.670927047729492,
524
- "eval_rmse": 0.34588271379470825,
525
- "eval_runtime": 0.2984,
526
- "eval_samples_per_second": 167.532,
527
- "eval_steps_per_second": 6.701,
528
- "learning_rate": 0.0001,
529
- "step": 80
530
- },
531
- {
532
- "epoch": 41.0,
533
- "eval_explained_variance": 0.11544161289930344,
534
- "eval_loss": 0.6195899844169617,
535
- "eval_mae": 0.30954211950302124,
536
- "eval_r2": -11.46561050415039,
537
- "eval_rmse": 0.34560662508010864,
538
- "eval_runtime": 0.3049,
539
- "eval_samples_per_second": 163.969,
540
- "eval_steps_per_second": 6.559,
541
- "learning_rate": 0.0001,
542
- "step": 82
543
- },
544
- {
545
- "epoch": 42.0,
546
- "eval_explained_variance": 0.12202569097280502,
547
- "eval_loss": 0.6184768676757812,
548
- "eval_mae": 0.3095985949039459,
549
- "eval_r2": -11.41903305053711,
550
- "eval_rmse": 0.3453018367290497,
551
- "eval_runtime": 0.3052,
552
- "eval_samples_per_second": 163.826,
553
- "eval_steps_per_second": 6.553,
554
- "learning_rate": 1e-05,
555
- "step": 84
556
- },
557
- {
558
- "epoch": 43.0,
559
- "eval_explained_variance": 0.12239150702953339,
560
- "eval_loss": 0.619608998298645,
561
- "eval_mae": 0.3099028468132019,
562
- "eval_r2": -11.421133995056152,
563
- "eval_rmse": 0.3457002341747284,
564
- "eval_runtime": 0.2987,
565
- "eval_samples_per_second": 167.405,
566
- "eval_steps_per_second": 6.696,
567
- "learning_rate": 1e-05,
568
- "step": 86
569
- },
570
- {
571
- "epoch": 44.0,
572
- "eval_explained_variance": 0.12524205446243286,
573
- "eval_loss": 0.6175010204315186,
574
- "eval_mae": 0.3090519905090332,
575
- "eval_r2": -11.342177391052246,
576
- "eval_rmse": 0.34480178356170654,
577
- "eval_runtime": 0.3287,
578
- "eval_samples_per_second": 152.124,
579
- "eval_steps_per_second": 6.085,
580
- "learning_rate": 1e-05,
581
- "step": 88
582
- },
583
- {
584
- "epoch": 45.0,
585
- "eval_explained_variance": 0.12674368917942047,
586
- "eval_loss": 0.6147510409355164,
587
- "eval_mae": 0.307899534702301,
588
- "eval_r2": -11.237728118896484,
589
- "eval_rmse": 0.3434869647026062,
590
- "eval_runtime": 0.3235,
591
- "eval_samples_per_second": 154.557,
592
- "eval_steps_per_second": 6.182,
593
- "learning_rate": 1e-05,
594
- "step": 90
595
- },
596
- {
597
- "epoch": 46.0,
598
- "eval_explained_variance": 0.12321905046701431,
599
- "eval_loss": 0.6155768036842346,
600
- "eval_mae": 0.3080970048904419,
601
- "eval_r2": -11.216095924377441,
602
- "eval_rmse": 0.3438577651977539,
603
- "eval_runtime": 0.299,
604
- "eval_samples_per_second": 167.246,
605
- "eval_steps_per_second": 6.69,
606
- "learning_rate": 1e-05,
607
- "step": 92
608
- },
609
- {
610
- "epoch": 47.0,
611
- "eval_explained_variance": 0.12190257012844086,
612
- "eval_loss": 0.616154670715332,
613
- "eval_mae": 0.30844491720199585,
614
- "eval_r2": -11.23585319519043,
615
- "eval_rmse": 0.3442190885543823,
616
- "eval_runtime": 0.299,
617
- "eval_samples_per_second": 167.245,
618
- "eval_steps_per_second": 6.69,
619
- "learning_rate": 1e-05,
620
- "step": 94
621
- },
622
- {
623
- "epoch": 48.0,
624
- "eval_explained_variance": 0.12182028591632843,
625
- "eval_loss": 0.6153092980384827,
626
- "eval_mae": 0.3079487085342407,
627
- "eval_r2": -11.14069652557373,
628
- "eval_rmse": 0.34381839632987976,
629
- "eval_runtime": 0.3282,
630
- "eval_samples_per_second": 152.339,
631
- "eval_steps_per_second": 6.094,
632
- "learning_rate": 1e-05,
633
- "step": 96
634
- },
635
- {
636
- "epoch": 49.0,
637
- "eval_explained_variance": 0.12591353058815002,
638
- "eval_loss": 0.6142339110374451,
639
- "eval_mae": 0.3075259327888489,
640
- "eval_r2": -11.087849617004395,
641
- "eval_rmse": 0.3433670699596405,
642
- "eval_runtime": 0.3232,
643
- "eval_samples_per_second": 154.7,
644
- "eval_steps_per_second": 6.188,
645
  "learning_rate": 1e-05,
646
- "step": 98
647
- },
648
- {
649
- "epoch": 50.0,
650
- "eval_explained_variance": 0.12409819662570953,
651
- "eval_loss": 0.6125256419181824,
652
- "eval_mae": 0.3070906400680542,
653
- "eval_r2": -11.164841651916504,
654
- "eval_rmse": 0.34268108010292053,
655
- "eval_runtime": 0.323,
656
- "eval_samples_per_second": 154.8,
657
- "eval_steps_per_second": 6.192,
658
- "learning_rate": 1e-05,
659
- "step": 100
660
  },
661
  {
662
- "epoch": 51.0,
663
- "eval_explained_variance": 0.12737669050693512,
664
- "eval_loss": 0.6131001114845276,
665
- "eval_mae": 0.3071759045124054,
666
- "eval_r2": -11.23709487915039,
667
- "eval_rmse": 0.34303510189056396,
668
- "eval_runtime": 0.3251,
669
- "eval_samples_per_second": 153.791,
670
- "eval_steps_per_second": 6.152,
671
- "learning_rate": 1e-05,
672
- "step": 102
673
- },
674
- {
675
- "epoch": 52.0,
676
- "eval_explained_variance": 0.12739494442939758,
677
- "eval_loss": 0.6136941313743591,
678
- "eval_mae": 0.30773404240608215,
679
- "eval_r2": -11.390901565551758,
680
- "eval_rmse": 0.34343937039375305,
681
- "eval_runtime": 0.3154,
682
- "eval_samples_per_second": 158.551,
683
- "eval_steps_per_second": 6.342,
684
- "learning_rate": 1e-05,
685
- "step": 104
686
- },
687
- {
688
- "epoch": 53.0,
689
- "eval_explained_variance": 0.12241682410240173,
690
- "eval_loss": 0.613895058631897,
691
- "eval_mae": 0.3076512813568115,
692
- "eval_r2": -11.501760482788086,
693
- "eval_rmse": 0.3434452712535858,
694
- "eval_runtime": 0.3001,
695
- "eval_samples_per_second": 166.603,
696
- "eval_steps_per_second": 6.664,
697
- "learning_rate": 1e-05,
698
- "step": 106
699
- },
700
- {
701
- "epoch": 54.0,
702
- "eval_explained_variance": 0.1222328469157219,
703
- "eval_loss": 0.6156599521636963,
704
- "eval_mae": 0.30890733003616333,
705
- "eval_r2": -11.667372703552246,
706
- "eval_rmse": 0.3445097804069519,
707
- "eval_runtime": 0.3773,
708
- "eval_samples_per_second": 132.51,
709
- "eval_steps_per_second": 5.3,
710
- "learning_rate": 1e-05,
711
- "step": 108
712
- },
713
- {
714
- "epoch": 55.0,
715
- "eval_explained_variance": 0.12218669801950455,
716
- "eval_loss": 0.6167553067207336,
717
- "eval_mae": 0.30896657705307007,
718
- "eval_r2": -11.646692276000977,
719
- "eval_rmse": 0.34482434391975403,
720
- "eval_runtime": 0.3779,
721
- "eval_samples_per_second": 132.317,
722
- "eval_steps_per_second": 5.293,
723
- "learning_rate": 1e-05,
724
- "step": 110
725
- },
726
- {
727
- "epoch": 56.0,
728
- "eval_explained_variance": 0.12501628696918488,
729
- "eval_loss": 0.6139956116676331,
730
- "eval_mae": 0.3077108561992645,
731
- "eval_r2": -11.49679946899414,
732
- "eval_rmse": 0.3434431850910187,
733
- "eval_runtime": 0.2984,
734
- "eval_samples_per_second": 167.575,
735
- "eval_steps_per_second": 6.703,
736
  "learning_rate": 1e-05,
737
- "step": 112
738
- },
739
- {
740
- "epoch": 57.0,
741
- "eval_explained_variance": 0.12155170738697052,
742
- "eval_loss": 0.6132706999778748,
743
- "eval_mae": 0.3070979416370392,
744
- "eval_r2": -11.500228881835938,
745
- "eval_rmse": 0.342957466840744,
746
- "eval_runtime": 0.3776,
747
- "eval_samples_per_second": 132.398,
748
- "eval_steps_per_second": 5.296,
749
- "learning_rate": 1.0000000000000002e-06,
750
- "step": 114
751
- },
752
- {
753
- "epoch": 58.0,
754
- "eval_explained_variance": 0.12097452580928802,
755
- "eval_loss": 0.6130067706108093,
756
- "eval_mae": 0.30698391795158386,
757
- "eval_r2": -11.447521209716797,
758
- "eval_rmse": 0.3428483307361603,
759
- "eval_runtime": 0.3786,
760
- "eval_samples_per_second": 132.082,
761
- "eval_steps_per_second": 5.283,
762
- "learning_rate": 1.0000000000000002e-06,
763
- "step": 116
764
- },
765
- {
766
- "epoch": 59.0,
767
- "eval_explained_variance": 0.11784002184867859,
768
- "eval_loss": 0.61500084400177,
769
- "eval_mae": 0.30830222368240356,
770
- "eval_r2": -11.556161880493164,
771
- "eval_rmse": 0.34413495659828186,
772
- "eval_runtime": 0.3056,
773
- "eval_samples_per_second": 163.604,
774
- "eval_steps_per_second": 6.544,
775
- "learning_rate": 1.0000000000000002e-06,
776
- "step": 118
777
- },
778
- {
779
- "epoch": 60.0,
780
- "eval_explained_variance": 0.12427856028079987,
781
- "eval_loss": 0.6166698336601257,
782
- "eval_mae": 0.30922549962997437,
783
- "eval_r2": -11.467646598815918,
784
- "eval_rmse": 0.34501928091049194,
785
- "eval_runtime": 0.3778,
786
- "eval_samples_per_second": 132.332,
787
- "eval_steps_per_second": 5.293,
788
- "learning_rate": 1.0000000000000002e-06,
789
- "step": 120
790
- },
791
- {
792
- "epoch": 60.0,
793
- "learning_rate": 1.0000000000000002e-06,
794
- "step": 120,
795
- "total_flos": 4.4402778184752e+17,
796
- "train_loss": 0.6329069137573242,
797
- "train_runtime": 375.0945,
798
- "train_samples_per_second": 19.995,
799
- "train_steps_per_second": 0.8
800
  }
801
  ],
802
  "logging_steps": 500,
@@ -825,7 +565,7 @@
825
  "attributes": {}
826
  }
827
  },
828
- "total_flos": 4.4402778184752e+17,
829
  "train_batch_size": 32,
830
  "trial_name": null,
831
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6263097524642944,
3
+ "best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Ziboiai-large-2024_10_31-prova_batch-size32_freeze_probs/checkpoint-60",
4
+ "epoch": 40.0,
5
  "eval_steps": 500,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_explained_variance": 0.03638218343257904,
14
+ "eval_loss": 0.7150455713272095,
15
+ "eval_mae": 0.3848940134048462,
16
+ "eval_r2": -20.29086685180664,
17
+ "eval_rmse": 0.40997111797332764,
18
+ "eval_runtime": 1.32,
19
+ "eval_samples_per_second": 37.88,
20
+ "eval_steps_per_second": 1.515,
21
  "learning_rate": 0.001,
22
  "step": 2
23
  },
24
  {
25
  "epoch": 2.0,
26
+ "eval_explained_variance": 0.0240942370146513,
27
+ "eval_loss": 0.7314126491546631,
28
+ "eval_mae": 0.3895121216773987,
29
+ "eval_r2": -21.218204498291016,
30
+ "eval_rmse": 0.4163060486316681,
31
+ "eval_runtime": 0.3125,
32
+ "eval_samples_per_second": 160.002,
33
+ "eval_steps_per_second": 6.4,
34
  "learning_rate": 0.001,
35
  "step": 4
36
  },
37
  {
38
  "epoch": 3.0,
39
+ "eval_explained_variance": -0.04694412648677826,
40
+ "eval_loss": 0.7726277112960815,
41
+ "eval_mae": 0.40413352847099304,
42
+ "eval_r2": -24.822391510009766,
43
+ "eval_rmse": 0.4320966601371765,
44
+ "eval_runtime": 0.3275,
45
+ "eval_samples_per_second": 152.667,
46
+ "eval_steps_per_second": 6.107,
47
  "learning_rate": 0.001,
48
  "step": 6
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_explained_variance": -0.06671242415904999,
53
+ "eval_loss": 0.7917326092720032,
54
+ "eval_mae": 0.4094983637332916,
55
+ "eval_r2": -26.581586837768555,
56
+ "eval_rmse": 0.4379725754261017,
57
+ "eval_runtime": 0.3153,
58
+ "eval_samples_per_second": 158.574,
59
+ "eval_steps_per_second": 6.343,
60
  "learning_rate": 0.001,
61
  "step": 8
62
  },
63
  {
64
  "epoch": 5.0,
65
+ "eval_explained_variance": -0.13621382415294647,
66
+ "eval_loss": 0.7852649092674255,
67
+ "eval_mae": 0.402120441198349,
68
+ "eval_r2": -26.95589256286621,
69
+ "eval_rmse": 0.43184274435043335,
70
+ "eval_runtime": 0.3123,
71
+ "eval_samples_per_second": 160.086,
72
+ "eval_steps_per_second": 6.403,
73
  "learning_rate": 0.001,
74
  "step": 10
75
  },
76
  {
77
  "epoch": 6.0,
78
+ "eval_explained_variance": -0.12974193692207336,
79
+ "eval_loss": 0.7647674679756165,
80
+ "eval_mae": 0.3905399441719055,
81
+ "eval_r2": -24.40153694152832,
82
+ "eval_rmse": 0.42244094610214233,
83
+ "eval_runtime": 0.3317,
84
+ "eval_samples_per_second": 150.759,
85
+ "eval_steps_per_second": 6.03,
86
  "learning_rate": 0.001,
87
  "step": 12
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_explained_variance": -0.10977767407894135,
92
+ "eval_loss": 0.7391812205314636,
93
+ "eval_mae": 0.376028835773468,
94
+ "eval_r2": -22.557889938354492,
95
+ "eval_rmse": 0.41028541326522827,
96
+ "eval_runtime": 0.3205,
97
+ "eval_samples_per_second": 155.989,
98
+ "eval_steps_per_second": 6.24,
99
  "learning_rate": 0.001,
100
  "step": 14
101
  },
102
  {
103
  "epoch": 8.0,
104
+ "eval_explained_variance": -0.10544480383396149,
105
+ "eval_loss": 0.7115270495414734,
106
+ "eval_mae": 0.36385056376457214,
107
+ "eval_r2": -20.067392349243164,
108
+ "eval_rmse": 0.39825379848480225,
109
+ "eval_runtime": 0.3104,
110
+ "eval_samples_per_second": 161.103,
111
+ "eval_steps_per_second": 6.444,
112
  "learning_rate": 0.0001,
113
  "step": 16
114
  },
115
  {
116
  "epoch": 9.0,
117
+ "eval_explained_variance": -0.09249210357666016,
118
+ "eval_loss": 0.6896975040435791,
119
+ "eval_mae": 0.35347798466682434,
120
+ "eval_r2": -18.16646385192871,
121
+ "eval_rmse": 0.3878582715988159,
122
+ "eval_runtime": 0.3226,
123
+ "eval_samples_per_second": 155.006,
124
+ "eval_steps_per_second": 6.2,
125
  "learning_rate": 0.0001,
126
  "step": 18
127
  },
128
  {
129
  "epoch": 10.0,
130
+ "eval_explained_variance": -0.10285507887601852,
131
+ "eval_loss": 0.6777035593986511,
132
+ "eval_mae": 0.34683120250701904,
133
+ "eval_r2": -16.94469451904297,
134
+ "eval_rmse": 0.3818005323410034,
135
+ "eval_runtime": 0.3016,
136
+ "eval_samples_per_second": 165.76,
137
+ "eval_steps_per_second": 6.63,
138
  "learning_rate": 0.0001,
139
  "step": 20
140
  },
141
  {
142
  "epoch": 11.0,
143
+ "eval_explained_variance": -0.11687294393777847,
144
+ "eval_loss": 0.6701759099960327,
145
+ "eval_mae": 0.3423532247543335,
146
+ "eval_r2": -16.037521362304688,
147
+ "eval_rmse": 0.3779585659503937,
148
+ "eval_runtime": 0.3107,
149
+ "eval_samples_per_second": 160.916,
150
+ "eval_steps_per_second": 6.437,
151
  "learning_rate": 0.0001,
152
  "step": 22
153
  },
154
  {
155
  "epoch": 12.0,
156
+ "eval_explained_variance": -0.11208173632621765,
157
+ "eval_loss": 0.663905918598175,
158
+ "eval_mae": 0.3388546407222748,
159
+ "eval_r2": -15.605177879333496,
160
+ "eval_rmse": 0.37438222765922546,
161
+ "eval_runtime": 0.3308,
162
+ "eval_samples_per_second": 151.17,
163
+ "eval_steps_per_second": 6.047,
164
  "learning_rate": 0.0001,
165
  "step": 24
166
  },
167
  {
168
  "epoch": 13.0,
169
+ "eval_explained_variance": -0.10647904872894287,
170
+ "eval_loss": 0.656491219997406,
171
+ "eval_mae": 0.3345881700515747,
172
+ "eval_r2": -14.805088996887207,
173
+ "eval_rmse": 0.3702985942363739,
174
+ "eval_runtime": 0.3222,
175
+ "eval_samples_per_second": 155.166,
176
+ "eval_steps_per_second": 6.207,
177
  "learning_rate": 0.0001,
178
  "step": 26
179
  },
180
  {
181
  "epoch": 14.0,
182
+ "eval_explained_variance": -0.0958017110824585,
183
+ "eval_loss": 0.6501385569572449,
184
+ "eval_mae": 0.33100754022598267,
185
+ "eval_r2": -14.231175422668457,
186
+ "eval_rmse": 0.3668138384819031,
187
+ "eval_runtime": 0.3293,
188
+ "eval_samples_per_second": 151.853,
189
+ "eval_steps_per_second": 6.074,
190
  "learning_rate": 0.0001,
191
  "step": 28
192
  },
193
  {
194
  "epoch": 15.0,
195
+ "eval_explained_variance": -0.08547426015138626,
196
+ "eval_loss": 0.6467865705490112,
197
+ "eval_mae": 0.32885220646858215,
198
+ "eval_r2": -14.07986831665039,
199
+ "eval_rmse": 0.36475783586502075,
200
+ "eval_runtime": 0.3253,
201
+ "eval_samples_per_second": 153.717,
202
+ "eval_steps_per_second": 6.149,
203
  "learning_rate": 0.0001,
204
  "step": 30
205
  },
206
  {
207
  "epoch": 16.0,
208
+ "eval_explained_variance": -0.08231981098651886,
209
+ "eval_loss": 0.6471170783042908,
210
+ "eval_mae": 0.3288896679878235,
211
+ "eval_r2": -14.255745887756348,
212
+ "eval_rmse": 0.3650059998035431,
213
+ "eval_runtime": 0.305,
214
+ "eval_samples_per_second": 163.945,
215
+ "eval_steps_per_second": 6.558,
216
  "learning_rate": 0.0001,
217
  "step": 32
218
  },
219
  {
220
  "epoch": 17.0,
221
+ "eval_explained_variance": -0.08097466081380844,
222
+ "eval_loss": 0.6435126662254333,
223
+ "eval_mae": 0.3268200755119324,
224
+ "eval_r2": -14.059813499450684,
225
+ "eval_rmse": 0.36310678720474243,
226
+ "eval_runtime": 0.3322,
227
+ "eval_samples_per_second": 150.492,
228
+ "eval_steps_per_second": 6.02,
229
  "learning_rate": 0.0001,
230
  "step": 34
231
  },
232
  {
233
  "epoch": 18.0,
234
+ "eval_explained_variance": -0.07994352281093597,
235
+ "eval_loss": 0.6437923908233643,
236
+ "eval_mae": 0.3269612491130829,
237
+ "eval_r2": -14.036934852600098,
238
+ "eval_rmse": 0.36342939734458923,
239
+ "eval_runtime": 0.3107,
240
+ "eval_samples_per_second": 160.922,
241
+ "eval_steps_per_second": 6.437,
242
  "learning_rate": 0.0001,
243
  "step": 36
244
  },
245
  {
246
  "epoch": 19.0,
247
+ "eval_explained_variance": -0.08883289247751236,
248
+ "eval_loss": 0.6399621367454529,
249
+ "eval_mae": 0.3249860107898712,
250
+ "eval_r2": -13.81522274017334,
251
+ "eval_rmse": 0.36136963963508606,
252
+ "eval_runtime": 0.3104,
253
+ "eval_samples_per_second": 161.092,
254
+ "eval_steps_per_second": 6.444,
255
  "learning_rate": 0.0001,
256
  "step": 38
257
  },
258
  {
259
  "epoch": 20.0,
260
+ "eval_explained_variance": -0.09353505074977875,
261
+ "eval_loss": 0.6391971707344055,
262
+ "eval_mae": 0.3246455192565918,
263
+ "eval_r2": -13.710391998291016,
264
+ "eval_rmse": 0.3608955144882202,
265
+ "eval_runtime": 0.3119,
266
+ "eval_samples_per_second": 160.306,
267
+ "eval_steps_per_second": 6.412,
268
  "learning_rate": 0.0001,
269
  "step": 40
270
  },
271
  {
272
  "epoch": 21.0,
273
+ "eval_explained_variance": -0.09930111467838287,
274
+ "eval_loss": 0.6386714577674866,
275
+ "eval_mae": 0.32462170720100403,
276
+ "eval_r2": -13.809860229492188,
277
+ "eval_rmse": 0.3606450855731964,
278
+ "eval_runtime": 0.3149,
279
+ "eval_samples_per_second": 158.8,
280
+ "eval_steps_per_second": 6.352,
281
  "learning_rate": 0.0001,
282
  "step": 42
283
  },
284
  {
285
  "epoch": 22.0,
286
+ "eval_explained_variance": -0.10561199486255646,
287
+ "eval_loss": 0.6388444304466248,
288
+ "eval_mae": 0.3243348002433777,
289
+ "eval_r2": -13.849721908569336,
290
+ "eval_rmse": 0.36056435108184814,
291
+ "eval_runtime": 0.3094,
292
+ "eval_samples_per_second": 161.607,
293
+ "eval_steps_per_second": 6.464,
294
  "learning_rate": 0.0001,
295
  "step": 44
296
  },
297
  {
298
  "epoch": 23.0,
299
+ "eval_explained_variance": -0.1035044863820076,
300
+ "eval_loss": 0.6361631155014038,
301
+ "eval_mae": 0.3227779269218445,
302
+ "eval_r2": -13.562189102172852,
303
+ "eval_rmse": 0.35895633697509766,
304
+ "eval_runtime": 0.3094,
305
+ "eval_samples_per_second": 161.581,
306
+ "eval_steps_per_second": 6.463,
307
  "learning_rate": 0.0001,
308
  "step": 46
309
  },
310
  {
311
  "epoch": 24.0,
312
+ "eval_explained_variance": -0.10584529489278793,
313
+ "eval_loss": 0.635435163974762,
314
+ "eval_mae": 0.3223152160644531,
315
+ "eval_r2": -13.645319938659668,
316
+ "eval_rmse": 0.35847193002700806,
317
+ "eval_runtime": 0.3094,
318
+ "eval_samples_per_second": 161.602,
319
+ "eval_steps_per_second": 6.464,
320
  "learning_rate": 0.0001,
321
  "step": 48
322
  },
323
  {
324
  "epoch": 25.0,
325
+ "eval_explained_variance": -0.1035505086183548,
326
+ "eval_loss": 0.6344550848007202,
327
+ "eval_mae": 0.32144099473953247,
328
+ "eval_r2": -13.602314949035645,
329
+ "eval_rmse": 0.35783687233924866,
330
+ "eval_runtime": 0.3092,
331
+ "eval_samples_per_second": 161.704,
332
+ "eval_steps_per_second": 6.468,
333
  "learning_rate": 0.0001,
334
  "step": 50
335
  },
336
  {
337
  "epoch": 26.0,
338
+ "eval_explained_variance": -0.11728200316429138,
339
+ "eval_loss": 0.6348865628242493,
340
+ "eval_mae": 0.3211889863014221,
341
+ "eval_r2": -13.630416870117188,
342
+ "eval_rmse": 0.3580625355243683,
343
+ "eval_runtime": 0.331,
344
+ "eval_samples_per_second": 151.064,
345
+ "eval_steps_per_second": 6.043,
346
  "learning_rate": 0.0001,
347
  "step": 52
348
  },
349
  {
350
  "epoch": 27.0,
351
+ "eval_explained_variance": -0.11483900249004364,
352
+ "eval_loss": 0.6332749724388123,
353
+ "eval_mae": 0.32009246945381165,
354
+ "eval_r2": -13.561347007751465,
355
+ "eval_rmse": 0.3570806384086609,
356
+ "eval_runtime": 0.3173,
357
+ "eval_samples_per_second": 157.565,
358
+ "eval_steps_per_second": 6.303,
359
  "learning_rate": 0.0001,
360
  "step": 54
361
  },
362
  {
363
  "epoch": 28.0,
364
+ "eval_explained_variance": -0.10828801989555359,
365
+ "eval_loss": 0.6295092701911926,
366
+ "eval_mae": 0.31767499446868896,
367
+ "eval_r2": -13.23308277130127,
368
+ "eval_rmse": 0.35479238629341125,
369
+ "eval_runtime": 0.3087,
370
+ "eval_samples_per_second": 161.989,
371
+ "eval_steps_per_second": 6.48,
372
  "learning_rate": 0.0001,
373
  "step": 56
374
  },
375
  {
376
  "epoch": 29.0,
377
+ "eval_explained_variance": -0.1047045886516571,
378
+ "eval_loss": 0.6285346746444702,
379
+ "eval_mae": 0.3173280954360962,
380
+ "eval_r2": -13.162256240844727,
381
+ "eval_rmse": 0.35434553027153015,
382
+ "eval_runtime": 0.3277,
383
+ "eval_samples_per_second": 152.596,
384
+ "eval_steps_per_second": 6.104,
385
  "learning_rate": 0.0001,
386
  "step": 58
387
  },
388
  {
389
  "epoch": 30.0,
390
+ "eval_explained_variance": -0.09264782071113586,
391
+ "eval_loss": 0.6263097524642944,
392
+ "eval_mae": 0.31627562642097473,
393
+ "eval_r2": -12.713174819946289,
394
+ "eval_rmse": 0.3532228171825409,
395
+ "eval_runtime": 0.3523,
396
+ "eval_samples_per_second": 141.931,
397
+ "eval_steps_per_second": 5.677,
398
  "learning_rate": 0.0001,
399
  "step": 60
400
  },
401
  {
402
  "epoch": 31.0,
403
+ "eval_explained_variance": -0.08934260159730911,
404
+ "eval_loss": 0.6272528767585754,
405
+ "eval_mae": 0.316723495721817,
406
+ "eval_r2": -12.873921394348145,
407
+ "eval_rmse": 0.35376670956611633,
408
+ "eval_runtime": 0.3073,
409
+ "eval_samples_per_second": 162.723,
410
+ "eval_steps_per_second": 6.509,
411
  "learning_rate": 0.0001,
412
  "step": 62
413
  },
414
  {
415
  "epoch": 32.0,
416
+ "eval_explained_variance": -0.07898036390542984,
417
+ "eval_loss": 0.6294133067131042,
418
+ "eval_mae": 0.31807586550712585,
419
+ "eval_r2": -12.935453414916992,
420
+ "eval_rmse": 0.3550169765949249,
421
+ "eval_runtime": 0.3094,
422
+ "eval_samples_per_second": 161.626,
423
+ "eval_steps_per_second": 6.465,
424
  "learning_rate": 0.0001,
425
  "step": 64
426
  },
427
  {
428
  "epoch": 33.0,
429
+ "eval_explained_variance": -0.07519607990980148,
430
+ "eval_loss": 0.6299176216125488,
431
+ "eval_mae": 0.3185364603996277,
432
+ "eval_r2": -12.93520736694336,
433
+ "eval_rmse": 0.35538923740386963,
434
+ "eval_runtime": 0.3097,
435
+ "eval_samples_per_second": 161.472,
436
+ "eval_steps_per_second": 6.459,
437
  "learning_rate": 0.0001,
438
  "step": 66
439
  },
440
  {
441
  "epoch": 34.0,
442
+ "eval_explained_variance": -0.07019602507352829,
443
+ "eval_loss": 0.6320692300796509,
444
+ "eval_mae": 0.3193182349205017,
445
+ "eval_r2": -13.267191886901855,
446
+ "eval_rmse": 0.35644862055778503,
447
+ "eval_runtime": 0.3161,
448
+ "eval_samples_per_second": 158.177,
449
+ "eval_steps_per_second": 6.327,
450
  "learning_rate": 0.0001,
451
  "step": 68
452
  },
453
  {
454
  "epoch": 35.0,
455
+ "eval_explained_variance": -0.04873532056808472,
456
+ "eval_loss": 0.6279481649398804,
457
+ "eval_mae": 0.31752488017082214,
458
+ "eval_r2": -12.99951171875,
459
+ "eval_rmse": 0.3541102707386017,
460
+ "eval_runtime": 0.3124,
461
+ "eval_samples_per_second": 160.036,
462
+ "eval_steps_per_second": 6.401,
463
  "learning_rate": 0.0001,
464
  "step": 70
465
  },
466
  {
467
  "epoch": 36.0,
468
+ "eval_explained_variance": -0.04663123935461044,
469
+ "eval_loss": 0.6280075907707214,
470
+ "eval_mae": 0.31736499071121216,
471
+ "eval_r2": -13.00741195678711,
472
+ "eval_rmse": 0.35407301783561707,
473
+ "eval_runtime": 0.3095,
474
+ "eval_samples_per_second": 161.554,
475
+ "eval_steps_per_second": 6.462,
476
  "learning_rate": 0.0001,
477
  "step": 72
478
  },
479
  {
480
  "epoch": 37.0,
481
+ "eval_explained_variance": -0.04936327785253525,
482
+ "eval_loss": 0.6303659081459045,
483
+ "eval_mae": 0.3187006115913391,
484
+ "eval_r2": -13.230977058410645,
485
+ "eval_rmse": 0.35543760657310486,
486
+ "eval_runtime": 0.3251,
487
+ "eval_samples_per_second": 153.806,
488
+ "eval_steps_per_second": 6.152,
489
+ "learning_rate": 1e-05,
490
  "step": 74
491
  },
492
  {
493
  "epoch": 38.0,
494
+ "eval_explained_variance": -0.04394898936152458,
495
+ "eval_loss": 0.6297122836112976,
496
+ "eval_mae": 0.31833118200302124,
497
+ "eval_r2": -12.983016967773438,
498
+ "eval_rmse": 0.3550592064857483,
499
+ "eval_runtime": 0.3087,
500
+ "eval_samples_per_second": 161.995,
501
+ "eval_steps_per_second": 6.48,
502
+ "learning_rate": 1e-05,
503
  "step": 76
504
  },
505
  {
506
  "epoch": 39.0,
507
+ "eval_explained_variance": -0.04296223446726799,
508
+ "eval_loss": 0.630845308303833,
509
+ "eval_mae": 0.3193325996398926,
510
+ "eval_r2": -13.159842491149902,
511
+ "eval_rmse": 0.35580796003341675,
512
+ "eval_runtime": 0.3097,
513
+ "eval_samples_per_second": 161.465,
514
+ "eval_steps_per_second": 6.459,
515
+ "learning_rate": 1e-05,
516
  "step": 78
517
  },
518
  {
519
  "epoch": 40.0,
520
+ "eval_explained_variance": -0.04348618537187576,
521
+ "eval_loss": 0.6291573643684387,
522
+ "eval_mae": 0.3182610869407654,
523
+ "eval_r2": -13.069788932800293,
524
+ "eval_rmse": 0.3547934889793396,
525
+ "eval_runtime": 0.3206,
526
+ "eval_samples_per_second": 155.938,
527
+ "eval_steps_per_second": 6.238,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  "learning_rate": 1e-05,
529
+ "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  },
531
  {
532
+ "epoch": 40.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  "learning_rate": 1e-05,
534
+ "step": 80,
535
+ "total_flos": 2.9601852123168e+17,
536
+ "train_loss": 0.64580397605896,
537
+ "train_runtime": 275.9938,
538
+ "train_samples_per_second": 27.175,
539
+ "train_steps_per_second": 1.087
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  }
541
  ],
542
  "logging_steps": 500,
 
565
  "attributes": {}
566
  }
567
  },
568
+ "total_flos": 2.9601852123168e+17,
569
  "train_batch_size": 32,
570
  "trial_name": null,
571
  "trial_params": null