tiagoblima commited on
Commit
d238c7a
1 Parent(s): 8accba7

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -1
  2. all_results.json +8 -8
  3. eval_results.json +4 -4
  4. train_results.json +4 -4
  5. trainer_state.json +14 -14
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: debug_t5-small_squad
8
  results: []
@@ -13,7 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # debug_t5-small_squad
15
 
16
- This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: unicamp-dl/ptt5-small-t5-vocab
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/qg_squad_v1_pt
8
  model-index:
9
  - name: debug_t5-small_squad
10
  results: []
 
15
 
16
  # debug_t5-small_squad
17
 
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-small-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-small-t5-vocab) on the tiagoblima/qg_squad_v1_pt dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.5534
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_loss": 1.5792150497436523,
4
- "eval_runtime": 130.2194,
5
  "eval_samples": 14945,
6
- "eval_samples_per_second": 114.768,
7
- "eval_steps_per_second": 14.353,
8
- "train_loss": 1.4037482273500275,
9
- "train_runtime": 3969.1352,
10
  "train_samples": 87599,
11
- "train_samples_per_second": 44.14,
12
- "train_steps_per_second": 0.69
13
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_loss": 1.553363561630249,
4
+ "eval_runtime": 136.5092,
5
  "eval_samples": 14945,
6
+ "eval_samples_per_second": 109.48,
7
+ "eval_steps_per_second": 13.691,
8
+ "train_loss": 1.505635230187694,
9
+ "train_runtime": 4170.6739,
10
  "train_samples": 87599,
11
+ "train_samples_per_second": 42.007,
12
+ "train_steps_per_second": 0.656
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_loss": 1.5792150497436523,
4
- "eval_runtime": 130.2194,
5
  "eval_samples": 14945,
6
- "eval_samples_per_second": 114.768,
7
- "eval_steps_per_second": 14.353
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_loss": 1.553363561630249,
4
+ "eval_runtime": 136.5092,
5
  "eval_samples": 14945,
6
+ "eval_samples_per_second": 109.48,
7
+ "eval_steps_per_second": 13.691
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 1.4037482273500275,
4
- "train_runtime": 3969.1352,
5
  "train_samples": 87599,
6
- "train_samples_per_second": 44.14,
7
- "train_steps_per_second": 0.69
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 1.505635230187694,
4
+ "train_runtime": 4170.6739,
5
  "train_samples": 87599,
6
+ "train_samples_per_second": 42.007,
7
+ "train_steps_per_second": 0.656
8
  }
trainer_state.json CHANGED
@@ -10,42 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.37,
13
- "learning_rate": 8.692476260043827e-06,
14
- "loss": 1.2919,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.73,
19
- "learning_rate": 0.0,
20
- "loss": 1.4435,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 1.1,
25
- "learning_rate": 0.0,
26
- "loss": 1.4318,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 1.46,
31
- "learning_rate": 0.0,
32
- "loss": 1.4235,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 1.83,
37
- "learning_rate": 0.0,
38
- "loss": 1.4192,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 2.0,
43
  "step": 2738,
44
  "total_flos": 1.7783709700718592e+16,
45
- "train_loss": 1.4037482273500275,
46
- "train_runtime": 3969.1352,
47
- "train_samples_per_second": 44.14,
48
- "train_steps_per_second": 0.69
49
  }
50
  ],
51
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.37,
13
+ "learning_rate": 8.173849525200878e-05,
14
+ "loss": 1.8198,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.73,
19
+ "learning_rate": 6.347699050401752e-05,
20
+ "loss": 1.5114,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 1.1,
25
+ "learning_rate": 4.52154857560263e-05,
26
+ "loss": 1.4445,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 1.46,
31
+ "learning_rate": 2.695398100803506e-05,
32
+ "loss": 1.4118,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 1.83,
37
+ "learning_rate": 8.692476260043827e-06,
38
+ "loss": 1.3941,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 2.0,
43
  "step": 2738,
44
  "total_flos": 1.7783709700718592e+16,
45
+ "train_loss": 1.505635230187694,
46
+ "train_runtime": 4170.6739,
47
+ "train_samples_per_second": 42.007,
48
+ "train_steps_per_second": 0.656
49
  }
50
  ],
51
  "logging_steps": 500,