tiagoblima commited on
Commit
73caed2
1 Parent(s): ace833b

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: debug_t5-small_squad
8
  results: []
@@ -13,7 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # debug_t5-small_squad
15
 
16
- This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/qg_squad_v1_pt
8
  model-index:
9
  - name: debug_t5-small_squad
10
  results: []
 
15
 
16
  # debug_t5-small_squad
17
 
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on the tiagoblima/qg_squad_v1_pt dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.5534
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 1.553363561630249,
4
- "eval_runtime": 136.5092,
5
  "eval_samples": 14945,
6
- "eval_samples_per_second": 109.48,
7
- "eval_steps_per_second": 13.691,
8
  "train_loss": 1.505635230187694,
9
- "train_runtime": 4170.6739,
10
  "train_samples": 87599,
11
- "train_samples_per_second": 42.007,
12
- "train_steps_per_second": 0.656
13
  }
 
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 1.553363561630249,
4
+ "eval_runtime": 128.6101,
5
  "eval_samples": 14945,
6
+ "eval_samples_per_second": 116.204,
7
+ "eval_steps_per_second": 14.532,
8
  "train_loss": 1.505635230187694,
9
+ "train_runtime": 4543.9022,
10
  "train_samples": 87599,
11
+ "train_samples_per_second": 38.557,
12
+ "train_steps_per_second": 0.603
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 1.553363561630249,
4
- "eval_runtime": 136.5092,
5
  "eval_samples": 14945,
6
- "eval_samples_per_second": 109.48,
7
- "eval_steps_per_second": 13.691
8
  }
 
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 1.553363561630249,
4
+ "eval_runtime": 128.6101,
5
  "eval_samples": 14945,
6
+ "eval_samples_per_second": 116.204,
7
+ "eval_steps_per_second": 14.532
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
  "train_loss": 1.505635230187694,
4
- "train_runtime": 4170.6739,
5
  "train_samples": 87599,
6
- "train_samples_per_second": 42.007,
7
- "train_steps_per_second": 0.656
8
  }
 
1
  {
2
  "epoch": 2.0,
3
  "train_loss": 1.505635230187694,
4
+ "train_runtime": 4543.9022,
5
  "train_samples": 87599,
6
+ "train_samples_per_second": 38.557,
7
+ "train_steps_per_second": 0.603
8
  }
trainer_state.json CHANGED
@@ -14,38 +14,78 @@
14
  "loss": 1.8198,
15
  "step": 500
16
  },
 
 
 
 
 
 
 
 
17
  {
18
  "epoch": 0.73,
19
  "learning_rate": 6.347699050401752e-05,
20
  "loss": 1.5114,
21
  "step": 1000
22
  },
 
 
 
 
 
 
 
 
23
  {
24
  "epoch": 1.1,
25
  "learning_rate": 4.52154857560263e-05,
26
  "loss": 1.4445,
27
  "step": 1500
28
  },
 
 
 
 
 
 
 
 
29
  {
30
  "epoch": 1.46,
31
  "learning_rate": 2.695398100803506e-05,
32
  "loss": 1.4118,
33
  "step": 2000
34
  },
 
 
 
 
 
 
 
 
35
  {
36
  "epoch": 1.83,
37
  "learning_rate": 8.692476260043827e-06,
38
  "loss": 1.3941,
39
  "step": 2500
40
  },
 
 
 
 
 
 
 
 
41
  {
42
  "epoch": 2.0,
43
  "step": 2738,
44
  "total_flos": 1.7783709700718592e+16,
45
  "train_loss": 1.505635230187694,
46
- "train_runtime": 4170.6739,
47
- "train_samples_per_second": 42.007,
48
- "train_steps_per_second": 0.656
49
  }
50
  ],
51
  "logging_steps": 500,
 
14
  "loss": 1.8198,
15
  "step": 500
16
  },
17
+ {
18
+ "None_loss": 1.661426305770874,
19
+ "None_runtime": 129.2092,
20
+ "None_samples_per_second": 115.665,
21
+ "None_steps_per_second": 14.465,
22
+ "epoch": 0.37,
23
+ "step": 500
24
+ },
25
  {
26
  "epoch": 0.73,
27
  "learning_rate": 6.347699050401752e-05,
28
  "loss": 1.5114,
29
  "step": 1000
30
  },
31
+ {
32
+ "None_loss": 1.5952973365783691,
33
+ "None_runtime": 129.3057,
34
+ "None_samples_per_second": 115.579,
35
+ "None_steps_per_second": 14.454,
36
+ "epoch": 0.73,
37
+ "step": 1000
38
+ },
39
  {
40
  "epoch": 1.1,
41
  "learning_rate": 4.52154857560263e-05,
42
  "loss": 1.4445,
43
  "step": 1500
44
  },
45
+ {
46
+ "None_loss": 1.5740190744400024,
47
+ "None_runtime": 129.646,
48
+ "None_samples_per_second": 115.275,
49
+ "None_steps_per_second": 14.416,
50
+ "epoch": 1.1,
51
+ "step": 1500
52
+ },
53
  {
54
  "epoch": 1.46,
55
  "learning_rate": 2.695398100803506e-05,
56
  "loss": 1.4118,
57
  "step": 2000
58
  },
59
+ {
60
+ "None_loss": 1.5628818273544312,
61
+ "None_runtime": 129.6751,
62
+ "None_samples_per_second": 115.25,
63
+ "None_steps_per_second": 14.413,
64
+ "epoch": 1.46,
65
+ "step": 2000
66
+ },
67
  {
68
  "epoch": 1.83,
69
  "learning_rate": 8.692476260043827e-06,
70
  "loss": 1.3941,
71
  "step": 2500
72
  },
73
+ {
74
+ "None_loss": 1.5557925701141357,
75
+ "None_runtime": 128.5071,
76
+ "None_samples_per_second": 116.297,
77
+ "None_steps_per_second": 14.544,
78
+ "epoch": 1.83,
79
+ "step": 2500
80
+ },
81
  {
82
  "epoch": 2.0,
83
  "step": 2738,
84
  "total_flos": 1.7783709700718592e+16,
85
  "train_loss": 1.505635230187694,
86
+ "train_runtime": 4543.9022,
87
+ "train_samples_per_second": 38.557,
88
+ "train_steps_per_second": 0.603
89
  }
90
  ],
91
  "logging_steps": 500,