DylanJHJ commited on
Commit
7b27ed3
1 Parent(s): bc218c3

add fidmrg

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +17 -0
  2. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/config.json +62 -0
  3. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/generation_config.json +7 -0
  4. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/optimizer.pt +3 -0
  5. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin +3 -0
  6. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/rng_state.pth +3 -0
  7. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/scheduler.pt +3 -0
  8. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/special_tokens_map.json +107 -0
  9. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/spiece.model +3 -0
  10. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/tokenizer.json +0 -0
  11. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/tokenizer_config.json +112 -0
  12. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/trainer_state.json +299 -0
  13. fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/training_args.bin +3 -0
  14. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/config.json +62 -0
  15. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/generation_config.json +7 -0
  16. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/optimizer.pt +3 -0
  17. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin +3 -0
  18. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/rng_state.pth +3 -0
  19. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/scheduler.pt +3 -0
  20. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/special_tokens_map.json +107 -0
  21. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/spiece.model +3 -0
  22. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/tokenizer.json +0 -0
  23. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/tokenizer_config.json +112 -0
  24. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/trainer_state.json +159 -0
  25. fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/training_args.bin +3 -0
  26. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/config.json +62 -0
  27. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/generation_config.json +7 -0
  28. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/optimizer.pt +3 -0
  29. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin +3 -0
  30. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/rng_state.pth +3 -0
  31. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/scheduler.pt +3 -0
  32. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/special_tokens_map.json +107 -0
  33. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/spiece.model +3 -0
  34. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/tokenizer.json +0 -0
  35. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/tokenizer_config.json +112 -0
  36. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/trainer_state.json +299 -0
  37. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/training_args.bin +3 -0
  38. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/config.json +62 -0
  39. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/generation_config.json +7 -0
  40. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/optimizer.pt +3 -0
  41. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin +3 -0
  42. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/rng_state.pth +3 -0
  43. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/scheduler.pt +3 -0
  44. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/special_tokens_map.json +107 -0
  45. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/spiece.model +3 -0
  46. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/tokenizer.json +0 -0
  47. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/tokenizer_config.json +112 -0
  48. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/trainer_state.json +159 -0
  49. fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/training_args.bin +3 -0
  50. fidmrg_random.bm25.fidcqg-w/checkpoint-10000/config.json +62 -0
.gitattributes CHANGED
@@ -33,3 +33,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ */*/ptorch_model.bin filter=lfs diff=lfs merge=lfs -text
37
+ fidmrg_random.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
38
+ fidmrg_random.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
39
+ fidmrg_random.bm25.fidcqg/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
40
+ fidmrg_random.bm25.fidcqg/checkpoint-5000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
41
+ fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
42
+ fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
43
+ fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
44
+ fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
45
+ fidmrg_random.bm25.fidcqg/checkpoint-10000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
46
+ fidmrg_random.bm25.fidcqg/checkpoint-5000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
47
+ fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
48
+ fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
49
+ fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
50
+ fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
51
+ fidmrg_random.bm25.fidcqg-w/checkpoint-10000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
52
+ fidmrg_random.bm25.fidcqg-w/checkpoint-5000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "FiDT5"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.33.1",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9289bad16b8a763d35664e88bca3a333d8548754e4554fe732c63c7079b36b
3
+ size 1980864314
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:087f380635736f9eb5b8fbd098aa7157aa88ac3bc5099b1ffaa6993969b11d81
3
+ size 990411190
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bffb6fd7cce7b2e1cf3044180ca98a74bce41013a965361396b448a436701a
3
+ size 14244
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07356cb668b8efc440ddb2f7f81f7da3152060e81d0229922fa3b30df3313f82
3
+ size 1064
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/trainer_state.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.2597631645250693,
5
+ "eval_steps": 500,
6
+ "global_step": 10000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 9.5e-05,
14
+ "loss": 57.2732,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.06,
19
+ "eval_loss": 51.029930114746094,
20
+ "eval_runtime": 3.1759,
21
+ "eval_samples_per_second": 31.487,
22
+ "eval_steps_per_second": 4.093,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.13,
27
+ "learning_rate": 9e-05,
28
+ "loss": 56.0705,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.13,
33
+ "eval_loss": 49.898155212402344,
34
+ "eval_runtime": 4.4269,
35
+ "eval_samples_per_second": 22.589,
36
+ "eval_steps_per_second": 2.937,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 0.19,
41
+ "learning_rate": 8.5e-05,
42
+ "loss": 55.3144,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 0.19,
47
+ "eval_loss": 49.3874397277832,
48
+ "eval_runtime": 4.228,
49
+ "eval_samples_per_second": 23.652,
50
+ "eval_steps_per_second": 3.075,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.25,
55
+ "learning_rate": 8e-05,
56
+ "loss": 54.301,
57
+ "step": 2000
58
+ },
59
+ {
60
+ "epoch": 0.25,
61
+ "eval_loss": 48.694358825683594,
62
+ "eval_runtime": 3.1949,
63
+ "eval_samples_per_second": 31.299,
64
+ "eval_steps_per_second": 4.069,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 0.31,
69
+ "learning_rate": 7.500000000000001e-05,
70
+ "loss": 54.8258,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 0.31,
75
+ "eval_loss": 49.120418548583984,
76
+ "eval_runtime": 4.0702,
77
+ "eval_samples_per_second": 24.569,
78
+ "eval_steps_per_second": 3.194,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 0.38,
83
+ "learning_rate": 7e-05,
84
+ "loss": 55.0078,
85
+ "step": 3000
86
+ },
87
+ {
88
+ "epoch": 0.38,
89
+ "eval_loss": 48.479034423828125,
90
+ "eval_runtime": 3.9143,
91
+ "eval_samples_per_second": 25.547,
92
+ "eval_steps_per_second": 3.321,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 0.44,
97
+ "learning_rate": 6.500000000000001e-05,
98
+ "loss": 55.0228,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 0.44,
103
+ "eval_loss": 48.023162841796875,
104
+ "eval_runtime": 3.9112,
105
+ "eval_samples_per_second": 25.568,
106
+ "eval_steps_per_second": 3.324,
107
+ "step": 3500
108
+ },
109
+ {
110
+ "epoch": 0.5,
111
+ "learning_rate": 6e-05,
112
+ "loss": 53.2773,
113
+ "step": 4000
114
+ },
115
+ {
116
+ "epoch": 0.5,
117
+ "eval_loss": 47.419307708740234,
118
+ "eval_runtime": 4.7859,
119
+ "eval_samples_per_second": 20.895,
120
+ "eval_steps_per_second": 2.716,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 0.57,
125
+ "learning_rate": 5.500000000000001e-05,
126
+ "loss": 54.9082,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 0.57,
131
+ "eval_loss": 46.841548919677734,
132
+ "eval_runtime": 4.8111,
133
+ "eval_samples_per_second": 20.785,
134
+ "eval_steps_per_second": 2.702,
135
+ "step": 4500
136
+ },
137
+ {
138
+ "epoch": 0.63,
139
+ "learning_rate": 5e-05,
140
+ "loss": 53.3684,
141
+ "step": 5000
142
+ },
143
+ {
144
+ "epoch": 0.63,
145
+ "eval_loss": 46.429256439208984,
146
+ "eval_runtime": 4.525,
147
+ "eval_samples_per_second": 22.099,
148
+ "eval_steps_per_second": 2.873,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 0.69,
153
+ "learning_rate": 4.5e-05,
154
+ "loss": 54.4457,
155
+ "step": 5500
156
+ },
157
+ {
158
+ "epoch": 0.69,
159
+ "eval_loss": 46.04310607910156,
160
+ "eval_runtime": 4.6243,
161
+ "eval_samples_per_second": 21.625,
162
+ "eval_steps_per_second": 2.811,
163
+ "step": 5500
164
+ },
165
+ {
166
+ "epoch": 0.76,
167
+ "learning_rate": 4e-05,
168
+ "loss": 53.9792,
169
+ "step": 6000
170
+ },
171
+ {
172
+ "epoch": 0.76,
173
+ "eval_loss": 45.82972717285156,
174
+ "eval_runtime": 3.6612,
175
+ "eval_samples_per_second": 27.314,
176
+ "eval_steps_per_second": 3.551,
177
+ "step": 6000
178
+ },
179
+ {
180
+ "epoch": 0.82,
181
+ "learning_rate": 3.5e-05,
182
+ "loss": 53.924,
183
+ "step": 6500
184
+ },
185
+ {
186
+ "epoch": 0.82,
187
+ "eval_loss": 45.8700065612793,
188
+ "eval_runtime": 4.1117,
189
+ "eval_samples_per_second": 24.321,
190
+ "eval_steps_per_second": 3.162,
191
+ "step": 6500
192
+ },
193
+ {
194
+ "epoch": 0.88,
195
+ "learning_rate": 3e-05,
196
+ "loss": 54.2944,
197
+ "step": 7000
198
+ },
199
+ {
200
+ "epoch": 0.88,
201
+ "eval_loss": 45.5869140625,
202
+ "eval_runtime": 4.5188,
203
+ "eval_samples_per_second": 22.13,
204
+ "eval_steps_per_second": 2.877,
205
+ "step": 7000
206
+ },
207
+ {
208
+ "epoch": 0.94,
209
+ "learning_rate": 2.5e-05,
210
+ "loss": 53.9664,
211
+ "step": 7500
212
+ },
213
+ {
214
+ "epoch": 0.94,
215
+ "eval_loss": 45.52823257446289,
216
+ "eval_runtime": 4.5014,
217
+ "eval_samples_per_second": 22.215,
218
+ "eval_steps_per_second": 2.888,
219
+ "step": 7500
220
+ },
221
+ {
222
+ "epoch": 1.01,
223
+ "learning_rate": 2e-05,
224
+ "loss": 52.0975,
225
+ "step": 8000
226
+ },
227
+ {
228
+ "epoch": 1.01,
229
+ "eval_loss": 45.37757873535156,
230
+ "eval_runtime": 4.4906,
231
+ "eval_samples_per_second": 22.269,
232
+ "eval_steps_per_second": 2.895,
233
+ "step": 8000
234
+ },
235
+ {
236
+ "epoch": 1.07,
237
+ "learning_rate": 1.5e-05,
238
+ "loss": 50.0576,
239
+ "step": 8500
240
+ },
241
+ {
242
+ "epoch": 1.07,
243
+ "eval_loss": 45.175968170166016,
244
+ "eval_runtime": 4.5314,
245
+ "eval_samples_per_second": 22.068,
246
+ "eval_steps_per_second": 2.869,
247
+ "step": 8500
248
+ },
249
+ {
250
+ "epoch": 1.13,
251
+ "learning_rate": 1e-05,
252
+ "loss": 50.6407,
253
+ "step": 9000
254
+ },
255
+ {
256
+ "epoch": 1.13,
257
+ "eval_loss": 45.02992630004883,
258
+ "eval_runtime": 4.196,
259
+ "eval_samples_per_second": 23.832,
260
+ "eval_steps_per_second": 3.098,
261
+ "step": 9000
262
+ },
263
+ {
264
+ "epoch": 1.2,
265
+ "learning_rate": 5e-06,
266
+ "loss": 50.3118,
267
+ "step": 9500
268
+ },
269
+ {
270
+ "epoch": 1.2,
271
+ "eval_loss": 44.964359283447266,
272
+ "eval_runtime": 4.7953,
273
+ "eval_samples_per_second": 20.854,
274
+ "eval_steps_per_second": 2.711,
275
+ "step": 9500
276
+ },
277
+ {
278
+ "epoch": 1.26,
279
+ "learning_rate": 0.0,
280
+ "loss": 48.9218,
281
+ "step": 10000
282
+ },
283
+ {
284
+ "epoch": 1.26,
285
+ "eval_loss": 44.92063903808594,
286
+ "eval_runtime": 4.609,
287
+ "eval_samples_per_second": 21.697,
288
+ "eval_steps_per_second": 2.821,
289
+ "step": 10000
290
+ }
291
+ ],
292
+ "logging_steps": 500,
293
+ "max_steps": 10000,
294
+ "num_train_epochs": 2,
295
+ "save_steps": 5000,
296
+ "total_flos": 1.3694633480945664e+17,
297
+ "trial_name": null,
298
+ "trial_params": null
299
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-10000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd5596d549c770f38076b75ce0924f1801d992e81220f6d8bc270e00f65960db
3
+ size 4472
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "FiDT5"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.33.1",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20f60a3295b74a07b267d61c25c68b7d4aa25172ac5daf91ff43784aa899aa5
3
+ size 1980864314
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5f7db81aba54158bf048357fd142642374958f03cb6fb473a3b92cb6ad681e
3
+ size 990411190
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e0d46eb43cded34db6d2c2e318774f14fbb9a206f56f7984381e69049a4151
3
+ size 14244
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a54271c874708bb67c02c881532d3a933ce5fa5776358051b2434fd912493cc
3
+ size 1064
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/trainer_state.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6298815822625347,
5
+ "eval_steps": 500,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 9.5e-05,
14
+ "loss": 57.2732,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.06,
19
+ "eval_loss": 51.029930114746094,
20
+ "eval_runtime": 3.1759,
21
+ "eval_samples_per_second": 31.487,
22
+ "eval_steps_per_second": 4.093,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.13,
27
+ "learning_rate": 9e-05,
28
+ "loss": 56.0705,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.13,
33
+ "eval_loss": 49.898155212402344,
34
+ "eval_runtime": 4.4269,
35
+ "eval_samples_per_second": 22.589,
36
+ "eval_steps_per_second": 2.937,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 0.19,
41
+ "learning_rate": 8.5e-05,
42
+ "loss": 55.3144,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 0.19,
47
+ "eval_loss": 49.3874397277832,
48
+ "eval_runtime": 4.228,
49
+ "eval_samples_per_second": 23.652,
50
+ "eval_steps_per_second": 3.075,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.25,
55
+ "learning_rate": 8e-05,
56
+ "loss": 54.301,
57
+ "step": 2000
58
+ },
59
+ {
60
+ "epoch": 0.25,
61
+ "eval_loss": 48.694358825683594,
62
+ "eval_runtime": 3.1949,
63
+ "eval_samples_per_second": 31.299,
64
+ "eval_steps_per_second": 4.069,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 0.31,
69
+ "learning_rate": 7.500000000000001e-05,
70
+ "loss": 54.8258,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 0.31,
75
+ "eval_loss": 49.120418548583984,
76
+ "eval_runtime": 4.0702,
77
+ "eval_samples_per_second": 24.569,
78
+ "eval_steps_per_second": 3.194,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 0.38,
83
+ "learning_rate": 7e-05,
84
+ "loss": 55.0078,
85
+ "step": 3000
86
+ },
87
+ {
88
+ "epoch": 0.38,
89
+ "eval_loss": 48.479034423828125,
90
+ "eval_runtime": 3.9143,
91
+ "eval_samples_per_second": 25.547,
92
+ "eval_steps_per_second": 3.321,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 0.44,
97
+ "learning_rate": 6.500000000000001e-05,
98
+ "loss": 55.0228,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 0.44,
103
+ "eval_loss": 48.023162841796875,
104
+ "eval_runtime": 3.9112,
105
+ "eval_samples_per_second": 25.568,
106
+ "eval_steps_per_second": 3.324,
107
+ "step": 3500
108
+ },
109
+ {
110
+ "epoch": 0.5,
111
+ "learning_rate": 6e-05,
112
+ "loss": 53.2773,
113
+ "step": 4000
114
+ },
115
+ {
116
+ "epoch": 0.5,
117
+ "eval_loss": 47.419307708740234,
118
+ "eval_runtime": 4.7859,
119
+ "eval_samples_per_second": 20.895,
120
+ "eval_steps_per_second": 2.716,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 0.57,
125
+ "learning_rate": 5.500000000000001e-05,
126
+ "loss": 54.9082,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 0.57,
131
+ "eval_loss": 46.841548919677734,
132
+ "eval_runtime": 4.8111,
133
+ "eval_samples_per_second": 20.785,
134
+ "eval_steps_per_second": 2.702,
135
+ "step": 4500
136
+ },
137
+ {
138
+ "epoch": 0.63,
139
+ "learning_rate": 5e-05,
140
+ "loss": 53.3684,
141
+ "step": 5000
142
+ },
143
+ {
144
+ "epoch": 0.63,
145
+ "eval_loss": 46.429256439208984,
146
+ "eval_runtime": 4.525,
147
+ "eval_samples_per_second": 22.099,
148
+ "eval_steps_per_second": 2.873,
149
+ "step": 5000
150
+ }
151
+ ],
152
+ "logging_steps": 500,
153
+ "max_steps": 10000,
154
+ "num_train_epochs": 2,
155
+ "save_steps": 5000,
156
+ "total_flos": 6.84757352448e+16,
157
+ "trial_name": null,
158
+ "trial_params": null
159
+ }
fidmrg_answer.bm25.fidcqg-w/checkpoint-5000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd5596d549c770f38076b75ce0924f1801d992e81220f6d8bc270e00f65960db
3
+ size 4472
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "FiDT5"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.33.1",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd17bcfe3aacca1b46ca6bcc5b27e83d895f034b414db1a22ea522ad9d2edb60
3
+ size 1980864314
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32583cd56b50201a41a61cc96ab9b90f5657883caeece052e9789534a13fbf4a
3
+ size 990411190
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:603aba05bfe4e59fa0c1c9b0281c6f463fe8d68cd35d50ee6a73070cbba623d3
3
+ size 14244
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07356cb668b8efc440ddb2f7f81f7da3152060e81d0229922fa3b30df3313f82
3
+ size 1064
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/trainer_state.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6298815822625347,
5
+ "eval_steps": 500,
6
+ "global_step": 10000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 9.5e-05,
14
+ "loss": 37.4397,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_loss": 31.211898803710938,
20
+ "eval_runtime": 6.944,
21
+ "eval_samples_per_second": 14.401,
22
+ "eval_steps_per_second": 3.6,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "learning_rate": 9e-05,
28
+ "loss": 33.7011,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.06,
33
+ "eval_loss": 30.628690719604492,
34
+ "eval_runtime": 8.8362,
35
+ "eval_samples_per_second": 11.317,
36
+ "eval_steps_per_second": 2.829,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 0.09,
41
+ "learning_rate": 8.5e-05,
42
+ "loss": 34.3949,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 0.09,
47
+ "eval_loss": 30.2351016998291,
48
+ "eval_runtime": 9.7911,
49
+ "eval_samples_per_second": 10.213,
50
+ "eval_steps_per_second": 2.553,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.13,
55
+ "learning_rate": 8e-05,
56
+ "loss": 33.7534,
57
+ "step": 2000
58
+ },
59
+ {
60
+ "epoch": 0.13,
61
+ "eval_loss": 30.151206970214844,
62
+ "eval_runtime": 9.0893,
63
+ "eval_samples_per_second": 11.002,
64
+ "eval_steps_per_second": 2.75,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 0.16,
69
+ "learning_rate": 7.500000000000001e-05,
70
+ "loss": 33.9418,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 0.16,
75
+ "eval_loss": 30.117929458618164,
76
+ "eval_runtime": 7.9747,
77
+ "eval_samples_per_second": 12.54,
78
+ "eval_steps_per_second": 3.135,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 0.19,
83
+ "learning_rate": 7e-05,
84
+ "loss": 33.3573,
85
+ "step": 3000
86
+ },
87
+ {
88
+ "epoch": 0.19,
89
+ "eval_loss": 29.591096878051758,
90
+ "eval_runtime": 7.5459,
91
+ "eval_samples_per_second": 13.252,
92
+ "eval_steps_per_second": 3.313,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 0.22,
97
+ "learning_rate": 6.500000000000001e-05,
98
+ "loss": 33.3372,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 0.22,
103
+ "eval_loss": 29.37584686279297,
104
+ "eval_runtime": 9.1309,
105
+ "eval_samples_per_second": 10.952,
106
+ "eval_steps_per_second": 2.738,
107
+ "step": 3500
108
+ },
109
+ {
110
+ "epoch": 0.25,
111
+ "learning_rate": 6e-05,
112
+ "loss": 32.5095,
113
+ "step": 4000
114
+ },
115
+ {
116
+ "epoch": 0.25,
117
+ "eval_loss": 29.21722412109375,
118
+ "eval_runtime": 8.935,
119
+ "eval_samples_per_second": 11.192,
120
+ "eval_steps_per_second": 2.798,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 0.28,
125
+ "learning_rate": 5.500000000000001e-05,
126
+ "loss": 32.6192,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 0.28,
131
+ "eval_loss": 28.85555648803711,
132
+ "eval_runtime": 8.0464,
133
+ "eval_samples_per_second": 12.428,
134
+ "eval_steps_per_second": 3.107,
135
+ "step": 4500
136
+ },
137
+ {
138
+ "epoch": 0.31,
139
+ "learning_rate": 5e-05,
140
+ "loss": 32.902,
141
+ "step": 5000
142
+ },
143
+ {
144
+ "epoch": 0.31,
145
+ "eval_loss": 28.798521041870117,
146
+ "eval_runtime": 8.3773,
147
+ "eval_samples_per_second": 11.937,
148
+ "eval_steps_per_second": 2.984,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 0.35,
153
+ "learning_rate": 4.5e-05,
154
+ "loss": 33.2756,
155
+ "step": 5500
156
+ },
157
+ {
158
+ "epoch": 0.35,
159
+ "eval_loss": 28.811630249023438,
160
+ "eval_runtime": 8.4098,
161
+ "eval_samples_per_second": 11.891,
162
+ "eval_steps_per_second": 2.973,
163
+ "step": 5500
164
+ },
165
+ {
166
+ "epoch": 0.38,
167
+ "learning_rate": 4e-05,
168
+ "loss": 32.3762,
169
+ "step": 6000
170
+ },
171
+ {
172
+ "epoch": 0.38,
173
+ "eval_loss": 28.51260757446289,
174
+ "eval_runtime": 8.6441,
175
+ "eval_samples_per_second": 11.569,
176
+ "eval_steps_per_second": 2.892,
177
+ "step": 6000
178
+ },
179
+ {
180
+ "epoch": 0.41,
181
+ "learning_rate": 3.5e-05,
182
+ "loss": 32.5452,
183
+ "step": 6500
184
+ },
185
+ {
186
+ "epoch": 0.41,
187
+ "eval_loss": 28.600557327270508,
188
+ "eval_runtime": 7.2739,
189
+ "eval_samples_per_second": 13.748,
190
+ "eval_steps_per_second": 3.437,
191
+ "step": 6500
192
+ },
193
+ {
194
+ "epoch": 0.44,
195
+ "learning_rate": 3e-05,
196
+ "loss": 33.0182,
197
+ "step": 7000
198
+ },
199
+ {
200
+ "epoch": 0.44,
201
+ "eval_loss": 28.495351791381836,
202
+ "eval_runtime": 6.2937,
203
+ "eval_samples_per_second": 15.889,
204
+ "eval_steps_per_second": 3.972,
205
+ "step": 7000
206
+ },
207
+ {
208
+ "epoch": 0.47,
209
+ "learning_rate": 2.5e-05,
210
+ "loss": 31.8452,
211
+ "step": 7500
212
+ },
213
+ {
214
+ "epoch": 0.47,
215
+ "eval_loss": 28.361299514770508,
216
+ "eval_runtime": 8.0596,
217
+ "eval_samples_per_second": 12.407,
218
+ "eval_steps_per_second": 3.102,
219
+ "step": 7500
220
+ },
221
+ {
222
+ "epoch": 0.5,
223
+ "learning_rate": 2e-05,
224
+ "loss": 32.0009,
225
+ "step": 8000
226
+ },
227
+ {
228
+ "epoch": 0.5,
229
+ "eval_loss": 28.153642654418945,
230
+ "eval_runtime": 6.3145,
231
+ "eval_samples_per_second": 15.837,
232
+ "eval_steps_per_second": 3.959,
233
+ "step": 8000
234
+ },
235
+ {
236
+ "epoch": 0.54,
237
+ "learning_rate": 1.5e-05,
238
+ "loss": 33.4938,
239
+ "step": 8500
240
+ },
241
+ {
242
+ "epoch": 0.54,
243
+ "eval_loss": 28.087358474731445,
244
+ "eval_runtime": 6.3508,
245
+ "eval_samples_per_second": 15.746,
246
+ "eval_steps_per_second": 3.937,
247
+ "step": 8500
248
+ },
249
+ {
250
+ "epoch": 0.57,
251
+ "learning_rate": 1e-05,
252
+ "loss": 31.9753,
253
+ "step": 9000
254
+ },
255
+ {
256
+ "epoch": 0.57,
257
+ "eval_loss": 28.113698959350586,
258
+ "eval_runtime": 9.8066,
259
+ "eval_samples_per_second": 10.197,
260
+ "eval_steps_per_second": 2.549,
261
+ "step": 9000
262
+ },
263
+ {
264
+ "epoch": 0.6,
265
+ "learning_rate": 5e-06,
266
+ "loss": 31.9571,
267
+ "step": 9500
268
+ },
269
+ {
270
+ "epoch": 0.6,
271
+ "eval_loss": 28.042316436767578,
272
+ "eval_runtime": 9.4344,
273
+ "eval_samples_per_second": 10.6,
274
+ "eval_steps_per_second": 2.65,
275
+ "step": 9500
276
+ },
277
+ {
278
+ "epoch": 0.63,
279
+ "learning_rate": 0.0,
280
+ "loss": 31.8931,
281
+ "step": 10000
282
+ },
283
+ {
284
+ "epoch": 0.63,
285
+ "eval_loss": 28.017412185668945,
286
+ "eval_runtime": 9.3747,
287
+ "eval_samples_per_second": 10.667,
288
+ "eval_steps_per_second": 2.667,
289
+ "step": 10000
290
+ }
291
+ ],
292
+ "logging_steps": 500,
293
+ "max_steps": 10000,
294
+ "num_train_epochs": 1,
295
+ "save_steps": 5000,
296
+ "total_flos": 1.369514704896e+17,
297
+ "trial_name": null,
298
+ "trial_params": null
299
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-10000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6788bb6ce57287043165ad1f0e3348866b7c4ba15f6c60397df9721eb75d80a3
3
+ size 4472
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "FiDT5"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.33.1",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7877738917a82a130f1ca62fb594f7bf0f13c8d687263977828ff69018c7f93
3
+ size 1980864314
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c25b2e6e086b34f050d9d56521d164ba99b3e7707dbd655cf6e4af8ef02d2d4
3
+ size 990411190
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d4f44fd881db9d73886c213b534385398ac892b510ce9a9c6d21385fee986f
3
+ size 14244
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a54271c874708bb67c02c881532d3a933ce5fa5776358051b2434fd912493cc
3
+ size 1064
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/trainer_state.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.31494079113126733,
5
+ "eval_steps": 500,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 9.5e-05,
14
+ "loss": 37.4397,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_loss": 31.211898803710938,
20
+ "eval_runtime": 6.944,
21
+ "eval_samples_per_second": 14.401,
22
+ "eval_steps_per_second": 3.6,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "learning_rate": 9e-05,
28
+ "loss": 33.7011,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.06,
33
+ "eval_loss": 30.628690719604492,
34
+ "eval_runtime": 8.8362,
35
+ "eval_samples_per_second": 11.317,
36
+ "eval_steps_per_second": 2.829,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 0.09,
41
+ "learning_rate": 8.5e-05,
42
+ "loss": 34.3949,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 0.09,
47
+ "eval_loss": 30.2351016998291,
48
+ "eval_runtime": 9.7911,
49
+ "eval_samples_per_second": 10.213,
50
+ "eval_steps_per_second": 2.553,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 0.13,
55
+ "learning_rate": 8e-05,
56
+ "loss": 33.7534,
57
+ "step": 2000
58
+ },
59
+ {
60
+ "epoch": 0.13,
61
+ "eval_loss": 30.151206970214844,
62
+ "eval_runtime": 9.0893,
63
+ "eval_samples_per_second": 11.002,
64
+ "eval_steps_per_second": 2.75,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 0.16,
69
+ "learning_rate": 7.500000000000001e-05,
70
+ "loss": 33.9418,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 0.16,
75
+ "eval_loss": 30.117929458618164,
76
+ "eval_runtime": 7.9747,
77
+ "eval_samples_per_second": 12.54,
78
+ "eval_steps_per_second": 3.135,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 0.19,
83
+ "learning_rate": 7e-05,
84
+ "loss": 33.3573,
85
+ "step": 3000
86
+ },
87
+ {
88
+ "epoch": 0.19,
89
+ "eval_loss": 29.591096878051758,
90
+ "eval_runtime": 7.5459,
91
+ "eval_samples_per_second": 13.252,
92
+ "eval_steps_per_second": 3.313,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 0.22,
97
+ "learning_rate": 6.500000000000001e-05,
98
+ "loss": 33.3372,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 0.22,
103
+ "eval_loss": 29.37584686279297,
104
+ "eval_runtime": 9.1309,
105
+ "eval_samples_per_second": 10.952,
106
+ "eval_steps_per_second": 2.738,
107
+ "step": 3500
108
+ },
109
+ {
110
+ "epoch": 0.25,
111
+ "learning_rate": 6e-05,
112
+ "loss": 32.5095,
113
+ "step": 4000
114
+ },
115
+ {
116
+ "epoch": 0.25,
117
+ "eval_loss": 29.21722412109375,
118
+ "eval_runtime": 8.935,
119
+ "eval_samples_per_second": 11.192,
120
+ "eval_steps_per_second": 2.798,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 0.28,
125
+ "learning_rate": 5.500000000000001e-05,
126
+ "loss": 32.6192,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 0.28,
131
+ "eval_loss": 28.85555648803711,
132
+ "eval_runtime": 8.0464,
133
+ "eval_samples_per_second": 12.428,
134
+ "eval_steps_per_second": 3.107,
135
+ "step": 4500
136
+ },
137
+ {
138
+ "epoch": 0.31,
139
+ "learning_rate": 5e-05,
140
+ "loss": 32.902,
141
+ "step": 5000
142
+ },
143
+ {
144
+ "epoch": 0.31,
145
+ "eval_loss": 28.798521041870117,
146
+ "eval_runtime": 8.3773,
147
+ "eval_samples_per_second": 11.937,
148
+ "eval_steps_per_second": 2.984,
149
+ "step": 5000
150
+ }
151
+ ],
152
+ "logging_steps": 500,
153
+ "max_steps": 10000,
154
+ "num_train_epochs": 1,
155
+ "save_steps": 5000,
156
+ "total_flos": 6.84757352448e+16,
157
+ "trial_name": null,
158
+ "trial_params": null
159
+ }
fidmrg_enumerated.bm25.fidcqg-w/checkpoint-5000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6788bb6ce57287043165ad1f0e3348866b7c4ba15f6c60397df9721eb75d80a3
3
+ size 4472
fidmrg_random.bm25.fidcqg-w/checkpoint-10000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "FiDT5"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.33.1",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }