DylanJHJ
/

flan-t5-readqg.msmarco-harg-neg

Model card Files Files and versions Community

3v324v23 commited on Jun 2

Commit

f4bdf0f

•

1 Parent(s): ec62f1c

add models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +10 -0
README.md +3 -3
baseline_bs4_top1.log +0 -0
baseline_bs4_top1/checkpoint-10000/config.json +62 -0
baseline_bs4_top1/checkpoint-10000/generation_config.json +7 -0
baseline_bs4_top1/checkpoint-10000/optimizer.pt +3 -0
baseline_bs4_top1/checkpoint-10000/pytorch_model.bin +3 -0
baseline_bs4_top1/checkpoint-10000/rng_state.pth +3 -0
baseline_bs4_top1/checkpoint-10000/scheduler.pt +3 -0
baseline_bs4_top1/checkpoint-10000/special_tokens_map.json +107 -0
baseline_bs4_top1/checkpoint-10000/spiece.model +3 -0
baseline_bs4_top1/checkpoint-10000/tokenizer.json +0 -0
baseline_bs4_top1/checkpoint-10000/tokenizer_config.json +112 -0
baseline_bs4_top1/checkpoint-10000/trainer_state.json +139 -0
baseline_bs4_top1/checkpoint-10000/training_args.bin +3 -0
baseline_bs4_top1/checkpoint-20000/config.json +62 -0
baseline_bs4_top1/checkpoint-20000/generation_config.json +7 -0
baseline_bs4_top1/checkpoint-20000/optimizer.pt +3 -0
baseline_bs4_top1/checkpoint-20000/pytorch_model.bin +3 -0
baseline_bs4_top1/checkpoint-20000/rng_state.pth +3 -0
baseline_bs4_top1/checkpoint-20000/scheduler.pt +3 -0
baseline_bs4_top1/checkpoint-20000/special_tokens_map.json +107 -0
baseline_bs4_top1/checkpoint-20000/spiece.model +3 -0
baseline_bs4_top1/checkpoint-20000/tokenizer.json +0 -0
baseline_bs4_top1/checkpoint-20000/tokenizer_config.json +112 -0
baseline_bs4_top1/checkpoint-20000/trainer_state.json +259 -0
baseline_bs4_top1/checkpoint-20000/training_args.bin +3 -0
baseline_bs4_top1/data_config.json +1 -0
baseline_bs4_top1/hfmodel_config.json +1 -0
baseline_bs4_top1/model_config.json +1 -0
baseline_bs4_top1/train_config.json +1 -0
baseline_bs4_top2.log +0 -0
baseline_bs4_top2/checkpoint-10000/config.json +62 -0
baseline_bs4_top2/checkpoint-10000/generation_config.json +7 -0
baseline_bs4_top2/checkpoint-10000/optimizer.pt +3 -0
baseline_bs4_top2/checkpoint-10000/pytorch_model.bin +3 -0
baseline_bs4_top2/checkpoint-10000/rng_state.pth +3 -0
baseline_bs4_top2/checkpoint-10000/scheduler.pt +3 -0
baseline_bs4_top2/checkpoint-10000/special_tokens_map.json +107 -0
baseline_bs4_top2/checkpoint-10000/spiece.model +3 -0
baseline_bs4_top2/checkpoint-10000/tokenizer.json +0 -0
baseline_bs4_top2/checkpoint-10000/tokenizer_config.json +112 -0
baseline_bs4_top2/checkpoint-10000/trainer_state.json +139 -0
baseline_bs4_top2/checkpoint-10000/training_args.bin +3 -0
baseline_bs4_top2/checkpoint-20000/config.json +62 -0
baseline_bs4_top2/checkpoint-20000/generation_config.json +7 -0
baseline_bs4_top2/checkpoint-20000/optimizer.pt +3 -0
baseline_bs4_top2/checkpoint-20000/pytorch_model.bin +3 -0
baseline_bs4_top2/checkpoint-20000/rng_state.pth +3 -0
baseline_bs4_top2/checkpoint-20000/scheduler.pt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top4/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+calibrate_margin_ibn_dd/checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+calibrate_rank_ibn_dd/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top1/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top1/checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top2/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top2/checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+baseline_bs4_top4/checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+calibrate_margin_ibn_dd/checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+calibrate_rank_ibn_dd/checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,3 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+---

baseline_bs4_top1.log ADDED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-10000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "SoftRelPromptFlanT5"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

baseline_bs4_top1/checkpoint-10000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.33.1"
+}

baseline_bs4_top1/checkpoint-10000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09c0ceab0ed46acb0ec16d626a5fcc26798ea2d7a8eddcbfe151546635d969fb
+size 144545

baseline_bs4_top1/checkpoint-10000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e12a403413d9634b6e7804e3e2e1979f6566c15d89f196db0bc292bd6885c61
+size 990480513

baseline_bs4_top1/checkpoint-10000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de4c87fc2dfbf5a627f8c2a0575b0effa1f233623d0165ebcd993a60952af24b
+size 14575

baseline_bs4_top1/checkpoint-10000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
+size 627

baseline_bs4_top1/checkpoint-10000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

baseline_bs4_top1/checkpoint-10000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

baseline_bs4_top1/checkpoint-10000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-10000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

baseline_bs4_top1/checkpoint-10000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,139 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.008527584604298755,
+  "eval_steps": 500,
+  "global_step": 10000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.6476,
+      "step": 500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5631,
+      "step": 1000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5854,
+      "step": 1500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5557,
+      "step": 2000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5653,
+      "step": 2500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5457,
+      "step": 3000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5552,
+      "step": 3500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5559,
+      "step": 4000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5465,
+      "step": 4500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5481,
+      "step": 5000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5311,
+      "step": 5500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5356,
+      "step": 6000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5502,
+      "step": 6500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.527,
+      "step": 7000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5383,
+      "step": 7500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5064,
+      "step": 8000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5271,
+      "step": 8500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5295,
+      "step": 9000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5098,
+      "step": 9500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.53,
+      "step": 10000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 20000,
+  "num_train_epochs": 1,
+  "save_steps": 10000,
+  "total_flos": 4.829257277256499e+16,
+  "trial_name": null,
+  "trial_params": null
+}

baseline_bs4_top1/checkpoint-10000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
+size 4539

baseline_bs4_top1/checkpoint-20000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "SoftRelPromptFlanT5"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

baseline_bs4_top1/checkpoint-20000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.33.1"
+}

baseline_bs4_top1/checkpoint-20000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d89bdef1fa8581fbf366465c2c48a742068cc56363ee861230021037b25a7a53
+size 144545

baseline_bs4_top1/checkpoint-20000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:979953b9f2b37eb669d4d0a5cdaf0a0fa69b4432d7bf17322cc56e064d696559
+size 990480513

baseline_bs4_top1/checkpoint-20000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ee65fd173e43a5bb96f2f07bf1e86b7666cd24f1ff7c2f132f19e39ccc7b2b9
+size 14575

baseline_bs4_top1/checkpoint-20000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
+size 627

baseline_bs4_top1/checkpoint-20000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

baseline_bs4_top1/checkpoint-20000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

baseline_bs4_top1/checkpoint-20000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-20000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

baseline_bs4_top1/checkpoint-20000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,259 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.01705516920859751,
+  "eval_steps": 500,
+  "global_step": 20000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.6476,
+      "step": 500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5631,
+      "step": 1000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5854,
+      "step": 1500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5557,
+      "step": 2000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5653,
+      "step": 2500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5457,
+      "step": 3000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5552,
+      "step": 3500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5559,
+      "step": 4000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5465,
+      "step": 4500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5481,
+      "step": 5000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5311,
+      "step": 5500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5356,
+      "step": 6000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5502,
+      "step": 6500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.527,
+      "step": 7000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5383,
+      "step": 7500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5064,
+      "step": 8000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5271,
+      "step": 8500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5295,
+      "step": 9000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5098,
+      "step": 9500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.53,
+      "step": 10000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5387,
+      "step": 10500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5176,
+      "step": 11000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5296,
+      "step": 11500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5416,
+      "step": 12000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.514,
+      "step": 12500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.4975,
+      "step": 13000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5488,
+      "step": 13500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.4987,
+      "step": 14000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.4859,
+      "step": 14500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5495,
+      "step": 15000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5347,
+      "step": 15500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5225,
+      "step": 16000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.537,
+      "step": 16500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.512,
+      "step": 17000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5263,
+      "step": 17500
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.01,
+      "loss": 1.5188,
+      "step": 18000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.01,
+      "loss": 1.5163,
+      "step": 18500
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.01,
+      "loss": 1.5372,
+      "step": 19000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.01,
+      "loss": 1.5225,
+      "step": 19500
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 0.01,
+      "loss": 1.5114,
+      "step": 20000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 20000,
+  "num_train_epochs": 1,
+  "save_steps": 10000,
+  "total_flos": 9.652535293088563e+16,
+  "trial_name": null,
+  "trial_params": null
+}

baseline_bs4_top1/checkpoint-20000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
+size 4539

baseline_bs4_top1/data_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"dataset_config_name": null, "overwrite_cache": false, "preprocessing_num_workers": null, "train_file": "/home/jhju/datasets/nils.sentence.transformers/ce.minilm.hardneg.vL.jsonl", "eval_file": null, "max_p_length": 128, "max_q_length": 16, "m_negative_per_example": 4, "m_positive_per_example": 4, "random_corrupt_rate": 0.0}

baseline_bs4_top1/hfmodel_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_name_or_path": "google/flan-t5-base", "config_name": "google/flan-t5-base", "tokenizer_name": "google/flan-t5-base", "cache_dir": null, "use_fast_tokenizer": true, "use_auth_token": false}

baseline_bs4_top1/model_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"add_classification_head": false, "baseline_prefix": "{1}", "instruction_prompt": "Generate a question for the passage with relevance label: ", "instruction_prompt_idx": [6939, 2206, 3, 9, 822, 21, 8, 5454, 28, 20208, 3783, 10, 3], "pos_neg_prompt": null, "pos_neg_prompt_idx": null, "relevant_prompt": "true true true true true", "relevant_prompt_idx": [1176, 1176, 1176, 1176, 1176], "irrelevant_prompt": "false false false false false", "irrelevant_prompt_idx": [6136, 6136, 6136, 6136, 6136], "head_size": 64, "pooling": "mean", "activation": "sigmoid", "latent_size": 128, "activate_prompt_attention": true}

baseline_bs4_top1/train_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"output_dir": "/work/jhju/readqg-baseline//baseline_bs4_top1", "overwrite_output_dir": true, "do_train": true, "do_eval": false, "do_predict": false, "evaluation_strategy": "no", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "eval_delay": 0, "learning_rate": 0.01, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": 20000, "lr_scheduler_type": "constant", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "./logs", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "logging_nan_inf_filter": true, "save_strategy": "steps", "save_steps": 10000, "save_total_limit": 5, "save_safetensors": false, "save_on_each_node": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 500, "dataloader_num_workers": 0, "past_index": -1, "run_name": "prompt=5_batch=4_sample=top1", "disable_tqdm": false, "remove_unused_columns": false, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "sharded_ddp": [], "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": {"min_num_params": 0, "xla": false, "xla_fsdp_grad_ckpt": false}, "fsdp_transformer_layer_cls_to_wrap": null, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": ["wandb"], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": null, "hub_private_repo": false, "hub_always_push": false, "gradient_checkpointing": true, "include_inputs_for_metrics": false, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "dispatch_batches": null, "sortish_sampler": false, "predict_with_generate": false, "generation_max_length": null, "generation_num_beams": null, "generation_config": null, "random_init": false, "enable_unlikelihood": false, "enable_calibration": null, "calibration_margin_ngrams": null, "gamma": 1.0, "enable_similarity_loss": null, "document_wise_contrastive": false, "relevance_wise_contrastive": false, "tau": 1.0, "sample_random": true, "sample_topk": 1, "enable_vae_loss": false, "k": 0.0025, "x0": 2500, "annealing_fn": "logistic"}

baseline_bs4_top2.log ADDED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top2/checkpoint-10000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "SoftRelPromptFlanT5"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

baseline_bs4_top2/checkpoint-10000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.33.1"
+}

baseline_bs4_top2/checkpoint-10000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc67f5c5dc22f7c375b0c2101d705d270abdc690e52970bb6e6d499ca53cc6a7
+size 144545

baseline_bs4_top2/checkpoint-10000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d366a12d59545d20daa0dbcde33cbac47216dd084289a717889298622bb0e2ae
+size 990480513

baseline_bs4_top2/checkpoint-10000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89428fc2bed9ce2a97559ad926183a8a8fb059a55491935c6cdb5773685812f4
+size 14511

baseline_bs4_top2/checkpoint-10000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
+size 627

baseline_bs4_top2/checkpoint-10000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

baseline_bs4_top2/checkpoint-10000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

baseline_bs4_top2/checkpoint-10000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top2/checkpoint-10000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

baseline_bs4_top2/checkpoint-10000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,139 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.008527584604298755,
+  "eval_steps": 500,
+  "global_step": 10000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.6449,
+      "step": 500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5651,
+      "step": 1000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5875,
+      "step": 1500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5539,
+      "step": 2000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5524,
+      "step": 2500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5449,
+      "step": 3000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.549,
+      "step": 3500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5671,
+      "step": 4000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5251,
+      "step": 4500
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5532,
+      "step": 5000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 0.01,
+      "loss": 1.5262,
+      "step": 5500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5306,
+      "step": 6000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5273,
+      "step": 6500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.519,
+      "step": 7000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5305,
+      "step": 7500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.507,
+      "step": 8000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5255,
+      "step": 8500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5106,
+      "step": 9000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5224,
+      "step": 9500
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 0.01,
+      "loss": 1.5241,
+      "step": 10000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 20000,
+  "num_train_epochs": 1,
+  "save_steps": 10000,
+  "total_flos": 4.829257277256499e+16,
+  "trial_name": null,
+  "trial_params": null
+}

baseline_bs4_top2/checkpoint-10000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af2adb9b227cf0f18c45853aa1db09565dc63c29547902da6f50352111a9a5e7
+size 4539

baseline_bs4_top2/checkpoint-20000/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "SoftRelPromptFlanT5"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

baseline_bs4_top2/checkpoint-20000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.33.1"
+}

baseline_bs4_top2/checkpoint-20000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e916e263fcf89b5d721173eee18cb5b06899e27ec18f26d0a22ed9e2016282
+size 144545

baseline_bs4_top2/checkpoint-20000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3835ee505ebe813e921bacd0eea2ed25ed36c4ff4e46de9431631321049a3a53
+size 990480513

baseline_bs4_top2/checkpoint-20000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:973f9fb61da573e339acd801c6477b8cc4497eed865244e05b84a0eedbe74768
+size 14511

baseline_bs4_top2/checkpoint-20000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
+size 627