DylanJHJ
/

flan-t5-readqg.msmarco-harg-neg

Model card Files Files and versions Community

3v324v23 commited on Jun 2

Commit

bcba58a

•

1 Parent(s): f4bdf0f

remove unused checkoints

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

baseline_bs4_top1.log +0 -0
baseline_bs4_top1/checkpoint-10000/config.json +0 -62
baseline_bs4_top1/checkpoint-10000/generation_config.json +0 -7
baseline_bs4_top1/checkpoint-10000/optimizer.pt +0 -3
baseline_bs4_top1/checkpoint-10000/pytorch_model.bin +0 -3
baseline_bs4_top1/checkpoint-10000/rng_state.pth +0 -3
baseline_bs4_top1/checkpoint-10000/scheduler.pt +0 -3
baseline_bs4_top1/checkpoint-10000/special_tokens_map.json +0 -107
baseline_bs4_top1/checkpoint-10000/spiece.model +0 -3
baseline_bs4_top1/checkpoint-10000/tokenizer.json +0 -0
baseline_bs4_top1/checkpoint-10000/tokenizer_config.json +0 -112
baseline_bs4_top1/checkpoint-10000/trainer_state.json +0 -139
baseline_bs4_top1/checkpoint-10000/training_args.bin +0 -3
baseline_bs4_top1/checkpoint-20000/config.json +0 -62
baseline_bs4_top1/checkpoint-20000/generation_config.json +0 -7
baseline_bs4_top1/checkpoint-20000/optimizer.pt +0 -3
baseline_bs4_top1/checkpoint-20000/pytorch_model.bin +0 -3
baseline_bs4_top1/checkpoint-20000/rng_state.pth +0 -3
baseline_bs4_top1/checkpoint-20000/scheduler.pt +0 -3
baseline_bs4_top1/checkpoint-20000/special_tokens_map.json +0 -107
baseline_bs4_top1/checkpoint-20000/spiece.model +0 -3
baseline_bs4_top1/checkpoint-20000/tokenizer.json +0 -0
baseline_bs4_top1/checkpoint-20000/tokenizer_config.json +0 -112
baseline_bs4_top1/checkpoint-20000/trainer_state.json +0 -259
baseline_bs4_top1/checkpoint-20000/training_args.bin +0 -3
baseline_bs4_top1/data_config.json +0 -1
baseline_bs4_top1/hfmodel_config.json +0 -1
baseline_bs4_top1/model_config.json +0 -1
baseline_bs4_top1/train_config.json +0 -1
baseline_bs4_top2.log +0 -0
baseline_bs4_top2/checkpoint-10000/config.json +0 -62
baseline_bs4_top2/checkpoint-10000/generation_config.json +0 -7
baseline_bs4_top2/checkpoint-10000/optimizer.pt +0 -3
baseline_bs4_top2/checkpoint-10000/pytorch_model.bin +0 -3
baseline_bs4_top2/checkpoint-10000/rng_state.pth +0 -3
baseline_bs4_top2/checkpoint-10000/scheduler.pt +0 -3
baseline_bs4_top2/checkpoint-10000/special_tokens_map.json +0 -107
baseline_bs4_top2/checkpoint-10000/spiece.model +0 -3
baseline_bs4_top2/checkpoint-10000/tokenizer.json +0 -0
baseline_bs4_top2/checkpoint-10000/tokenizer_config.json +0 -112
baseline_bs4_top2/checkpoint-10000/trainer_state.json +0 -139
baseline_bs4_top2/checkpoint-10000/training_args.bin +0 -3
baseline_bs4_top2/checkpoint-20000/config.json +0 -62
baseline_bs4_top2/checkpoint-20000/generation_config.json +0 -7
baseline_bs4_top2/checkpoint-20000/optimizer.pt +0 -3
baseline_bs4_top2/checkpoint-20000/pytorch_model.bin +0 -3
baseline_bs4_top2/checkpoint-20000/rng_state.pth +0 -3
baseline_bs4_top2/checkpoint-20000/scheduler.pt +0 -3
baseline_bs4_top2/checkpoint-20000/special_tokens_map.json +0 -107
baseline_bs4_top2/checkpoint-20000/spiece.model +0 -3

baseline_bs4_top1.log DELETED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-10000/config.json DELETED Viewed

@@ -1,62 +0,0 @@
-{
-  "_name_or_path": "google/flan-t5-base",
-  "architectures": [
-    "SoftRelPromptFlanT5"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2048,
-  "d_kv": 64,
-  "d_model": 768,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "task_specific_params": {
-    "summarization": {
-      "early_stopping": true,
-      "length_penalty": 2.0,
-      "max_length": 200,
-      "min_length": 30,
-      "no_repeat_ngram_size": 3,
-      "num_beams": 4,
-      "prefix": "summarize: "
-    },
-    "translation_en_to_de": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to German: "
-    },
-    "translation_en_to_fr": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to French: "
-    },
-    "translation_en_to_ro": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to Romanian: "
-    }
-  },
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.33.1",
-  "use_cache": true,
-  "vocab_size": 32128
-}

baseline_bs4_top1/checkpoint-10000/generation_config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_from_model_config": true,
-  "decoder_start_token_id": 0,
-  "eos_token_id": 1,
-  "pad_token_id": 0,
-  "transformers_version": "4.33.1"
-}

baseline_bs4_top1/checkpoint-10000/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:09c0ceab0ed46acb0ec16d626a5fcc26798ea2d7a8eddcbfe151546635d969fb
-size 144545

baseline_bs4_top1/checkpoint-10000/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5e12a403413d9634b6e7804e3e2e1979f6566c15d89f196db0bc292bd6885c61
-size 990480513

baseline_bs4_top1/checkpoint-10000/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:de4c87fc2dfbf5a627f8c2a0575b0effa1f233623d0165ebcd993a60952af24b
-size 14575

baseline_bs4_top1/checkpoint-10000/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
-size 627

baseline_bs4_top1/checkpoint-10000/special_tokens_map.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
-}

baseline_bs4_top1/checkpoint-10000/spiece.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
-size 791656

baseline_bs4_top1/checkpoint-10000/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-10000/tokenizer_config.json DELETED Viewed

@@ -1,112 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "</s>",
-  "extra_ids": 100,
-  "model_max_length": 512,
-  "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "T5Tokenizer",
-  "unk_token": "<unk>"
-}

baseline_bs4_top1/checkpoint-10000/trainer_state.json DELETED Viewed

@@ -1,139 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.008527584604298755,
-  "eval_steps": 500,
-  "global_step": 10000,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.6476,
-      "step": 500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5631,
-      "step": 1000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5854,
-      "step": 1500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5557,
-      "step": 2000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5653,
-      "step": 2500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5457,
-      "step": 3000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5552,
-      "step": 3500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5559,
-      "step": 4000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5465,
-      "step": 4500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5481,
-      "step": 5000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5311,
-      "step": 5500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5356,
-      "step": 6000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5502,
-      "step": 6500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.527,
-      "step": 7000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5383,
-      "step": 7500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5064,
-      "step": 8000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5271,
-      "step": 8500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5295,
-      "step": 9000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5098,
-      "step": 9500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.53,
-      "step": 10000
-    }
-  ],
-  "logging_steps": 500,
-  "max_steps": 20000,
-  "num_train_epochs": 1,
-  "save_steps": 10000,
-  "total_flos": 4.829257277256499e+16,
-  "trial_name": null,
-  "trial_params": null
-}

baseline_bs4_top1/checkpoint-10000/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
-size 4539

baseline_bs4_top1/checkpoint-20000/config.json DELETED Viewed

@@ -1,62 +0,0 @@
-{
-  "_name_or_path": "google/flan-t5-base",
-  "architectures": [
-    "SoftRelPromptFlanT5"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2048,
-  "d_kv": 64,
-  "d_model": 768,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "task_specific_params": {
-    "summarization": {
-      "early_stopping": true,
-      "length_penalty": 2.0,
-      "max_length": 200,
-      "min_length": 30,
-      "no_repeat_ngram_size": 3,
-      "num_beams": 4,
-      "prefix": "summarize: "
-    },
-    "translation_en_to_de": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to German: "
-    },
-    "translation_en_to_fr": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to French: "
-    },
-    "translation_en_to_ro": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to Romanian: "
-    }
-  },
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.33.1",
-  "use_cache": true,
-  "vocab_size": 32128
-}

baseline_bs4_top1/checkpoint-20000/generation_config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_from_model_config": true,
-  "decoder_start_token_id": 0,
-  "eos_token_id": 1,
-  "pad_token_id": 0,
-  "transformers_version": "4.33.1"
-}

baseline_bs4_top1/checkpoint-20000/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d89bdef1fa8581fbf366465c2c48a742068cc56363ee861230021037b25a7a53
-size 144545

baseline_bs4_top1/checkpoint-20000/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:979953b9f2b37eb669d4d0a5cdaf0a0fa69b4432d7bf17322cc56e064d696559
-size 990480513

baseline_bs4_top1/checkpoint-20000/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ee65fd173e43a5bb96f2f07bf1e86b7666cd24f1ff7c2f132f19e39ccc7b2b9
-size 14575

baseline_bs4_top1/checkpoint-20000/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
-size 627

baseline_bs4_top1/checkpoint-20000/special_tokens_map.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
-}

baseline_bs4_top1/checkpoint-20000/spiece.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
-size 791656

baseline_bs4_top1/checkpoint-20000/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top1/checkpoint-20000/tokenizer_config.json DELETED Viewed

@@ -1,112 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "</s>",
-  "extra_ids": 100,
-  "model_max_length": 512,
-  "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "T5Tokenizer",
-  "unk_token": "<unk>"
-}

baseline_bs4_top1/checkpoint-20000/trainer_state.json DELETED Viewed

@@ -1,259 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.01705516920859751,
-  "eval_steps": 500,
-  "global_step": 20000,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.6476,
-      "step": 500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5631,
-      "step": 1000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5854,
-      "step": 1500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5557,
-      "step": 2000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5653,
-      "step": 2500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5457,
-      "step": 3000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5552,
-      "step": 3500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5559,
-      "step": 4000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5465,
-      "step": 4500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5481,
-      "step": 5000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5311,
-      "step": 5500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5356,
-      "step": 6000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5502,
-      "step": 6500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.527,
-      "step": 7000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5383,
-      "step": 7500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5064,
-      "step": 8000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5271,
-      "step": 8500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5295,
-      "step": 9000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5098,
-      "step": 9500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.53,
-      "step": 10000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5387,
-      "step": 10500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5176,
-      "step": 11000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5296,
-      "step": 11500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5416,
-      "step": 12000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.514,
-      "step": 12500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.4975,
-      "step": 13000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5488,
-      "step": 13500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.4987,
-      "step": 14000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.4859,
-      "step": 14500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5495,
-      "step": 15000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5347,
-      "step": 15500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5225,
-      "step": 16000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.537,
-      "step": 16500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.512,
-      "step": 17000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5263,
-      "step": 17500
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 0.01,
-      "loss": 1.5188,
-      "step": 18000
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 0.01,
-      "loss": 1.5163,
-      "step": 18500
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 0.01,
-      "loss": 1.5372,
-      "step": 19000
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 0.01,
-      "loss": 1.5225,
-      "step": 19500
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 0.01,
-      "loss": 1.5114,
-      "step": 20000
-    }
-  ],
-  "logging_steps": 500,
-  "max_steps": 20000,
-  "num_train_epochs": 1,
-  "save_steps": 10000,
-  "total_flos": 9.652535293088563e+16,
-  "trial_name": null,
-  "trial_params": null
-}

baseline_bs4_top1/checkpoint-20000/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
-size 4539

baseline_bs4_top1/data_config.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"dataset_config_name": null, "overwrite_cache": false, "preprocessing_num_workers": null, "train_file": "/home/jhju/datasets/nils.sentence.transformers/ce.minilm.hardneg.vL.jsonl", "eval_file": null, "max_p_length": 128, "max_q_length": 16, "m_negative_per_example": 4, "m_positive_per_example": 4, "random_corrupt_rate": 0.0}

baseline_bs4_top1/hfmodel_config.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"model_name_or_path": "google/flan-t5-base", "config_name": "google/flan-t5-base", "tokenizer_name": "google/flan-t5-base", "cache_dir": null, "use_fast_tokenizer": true, "use_auth_token": false}

baseline_bs4_top1/model_config.json DELETED Viewed

@@ -1 +0,0 @@

- {"add_classification_head": false, "baseline_prefix": "{1}", "instruction_prompt": "Generate a question for the passage with relevance label: ", "instruction_prompt_idx": [6939, 2206, 3, 9, 822, 21, 8, 5454, 28, 20208, 3783, 10, 3], "pos_neg_prompt": null, "pos_neg_prompt_idx": null, "relevant_prompt": "true true true true true", "relevant_prompt_idx": [1176, 1176, 1176, 1176, 1176], "irrelevant_prompt": "false false false false false", "irrelevant_prompt_idx": [6136, 6136, 6136, 6136, 6136], "head_size": 64, "pooling": "mean", "activation": "sigmoid", "latent_size": 128, "activate_prompt_attention": true}

baseline_bs4_top1/train_config.json DELETED Viewed

@@ -1 +0,0 @@

- {"output_dir": "/work/jhju/readqg-baseline//baseline_bs4_top1", "overwrite_output_dir": true, "do_train": true, "do_eval": false, "do_predict": false, "evaluation_strategy": "no", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "eval_delay": 0, "learning_rate": 0.01, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": 20000, "lr_scheduler_type": "constant", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "./logs", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "logging_nan_inf_filter": true, "save_strategy": "steps", "save_steps": 10000, "save_total_limit": 5, "save_safetensors": false, "save_on_each_node": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 500, "dataloader_num_workers": 0, "past_index": -1, "run_name": "prompt=5_batch=4_sample=top1", "disable_tqdm": false, "remove_unused_columns": false, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "sharded_ddp": [], "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": {"min_num_params": 0, "xla": false, "xla_fsdp_grad_ckpt": false}, "fsdp_transformer_layer_cls_to_wrap": null, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": ["wandb"], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": null, "hub_private_repo": false, "hub_always_push": false, "gradient_checkpointing": true, "include_inputs_for_metrics": false, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "dispatch_batches": null, "sortish_sampler": false, "predict_with_generate": false, "generation_max_length": null, "generation_num_beams": null, "generation_config": null, "random_init": false, "enable_unlikelihood": false, "enable_calibration": null, "calibration_margin_ngrams": null, "gamma": 1.0, "enable_similarity_loss": null, "document_wise_contrastive": false, "relevance_wise_contrastive": false, "tau": 1.0, "sample_random": true, "sample_topk": 1, "enable_vae_loss": false, "k": 0.0025, "x0": 2500, "annealing_fn": "logistic"}

baseline_bs4_top2.log DELETED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top2/checkpoint-10000/config.json DELETED Viewed

@@ -1,62 +0,0 @@
-{
-  "_name_or_path": "google/flan-t5-base",
-  "architectures": [
-    "SoftRelPromptFlanT5"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2048,
-  "d_kv": 64,
-  "d_model": 768,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "task_specific_params": {
-    "summarization": {
-      "early_stopping": true,
-      "length_penalty": 2.0,
-      "max_length": 200,
-      "min_length": 30,
-      "no_repeat_ngram_size": 3,
-      "num_beams": 4,
-      "prefix": "summarize: "
-    },
-    "translation_en_to_de": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to German: "
-    },
-    "translation_en_to_fr": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to French: "
-    },
-    "translation_en_to_ro": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to Romanian: "
-    }
-  },
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.33.1",
-  "use_cache": true,
-  "vocab_size": 32128
-}

baseline_bs4_top2/checkpoint-10000/generation_config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_from_model_config": true,
-  "decoder_start_token_id": 0,
-  "eos_token_id": 1,
-  "pad_token_id": 0,
-  "transformers_version": "4.33.1"
-}

baseline_bs4_top2/checkpoint-10000/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bc67f5c5dc22f7c375b0c2101d705d270abdc690e52970bb6e6d499ca53cc6a7
-size 144545

baseline_bs4_top2/checkpoint-10000/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d366a12d59545d20daa0dbcde33cbac47216dd084289a717889298622bb0e2ae
-size 990480513

baseline_bs4_top2/checkpoint-10000/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89428fc2bed9ce2a97559ad926183a8a8fb059a55491935c6cdb5773685812f4
-size 14511

baseline_bs4_top2/checkpoint-10000/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
-size 627

baseline_bs4_top2/checkpoint-10000/special_tokens_map.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
-}

baseline_bs4_top2/checkpoint-10000/spiece.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
-size 791656

baseline_bs4_top2/checkpoint-10000/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

baseline_bs4_top2/checkpoint-10000/tokenizer_config.json DELETED Viewed

@@ -1,112 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "</s>",
-  "extra_ids": 100,
-  "model_max_length": 512,
-  "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "T5Tokenizer",
-  "unk_token": "<unk>"
-}

baseline_bs4_top2/checkpoint-10000/trainer_state.json DELETED Viewed

@@ -1,139 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.008527584604298755,
-  "eval_steps": 500,
-  "global_step": 10000,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.6449,
-      "step": 500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5651,
-      "step": 1000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5875,
-      "step": 1500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5539,
-      "step": 2000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5524,
-      "step": 2500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5449,
-      "step": 3000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.549,
-      "step": 3500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5671,
-      "step": 4000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5251,
-      "step": 4500
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5532,
-      "step": 5000
-    },
-    {
-      "epoch": 0.0,
-      "learning_rate": 0.01,
-      "loss": 1.5262,
-      "step": 5500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5306,
-      "step": 6000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5273,
-      "step": 6500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.519,
-      "step": 7000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5305,
-      "step": 7500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.507,
-      "step": 8000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5255,
-      "step": 8500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5106,
-      "step": 9000
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5224,
-      "step": 9500
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 0.01,
-      "loss": 1.5241,
-      "step": 10000
-    }
-  ],
-  "logging_steps": 500,
-  "max_steps": 20000,
-  "num_train_epochs": 1,
-  "save_steps": 10000,
-  "total_flos": 4.829257277256499e+16,
-  "trial_name": null,
-  "trial_params": null
-}

baseline_bs4_top2/checkpoint-10000/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:af2adb9b227cf0f18c45853aa1db09565dc63c29547902da6f50352111a9a5e7
-size 4539

baseline_bs4_top2/checkpoint-20000/config.json DELETED Viewed

@@ -1,62 +0,0 @@
-{
-  "_name_or_path": "google/flan-t5-base",
-  "architectures": [
-    "SoftRelPromptFlanT5"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 2048,
-  "d_kv": 64,
-  "d_model": 768,
-  "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
-  "output_past": true,
-  "pad_token_id": 0,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "task_specific_params": {
-    "summarization": {
-      "early_stopping": true,
-      "length_penalty": 2.0,
-      "max_length": 200,
-      "min_length": 30,
-      "no_repeat_ngram_size": 3,
-      "num_beams": 4,
-      "prefix": "summarize: "
-    },
-    "translation_en_to_de": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to German: "
-    },
-    "translation_en_to_fr": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to French: "
-    },
-    "translation_en_to_ro": {
-      "early_stopping": true,
-      "max_length": 300,
-      "num_beams": 4,
-      "prefix": "translate English to Romanian: "
-    }
-  },
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.33.1",
-  "use_cache": true,
-  "vocab_size": 32128
-}

baseline_bs4_top2/checkpoint-20000/generation_config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_from_model_config": true,
-  "decoder_start_token_id": 0,
-  "eos_token_id": 1,
-  "pad_token_id": 0,
-  "transformers_version": "4.33.1"
-}

baseline_bs4_top2/checkpoint-20000/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a4e916e263fcf89b5d721173eee18cb5b06899e27ec18f26d0a22ed9e2016282
-size 144545

baseline_bs4_top2/checkpoint-20000/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3835ee505ebe813e921bacd0eea2ed25ed36c4ff4e46de9431631321049a3a53
-size 990480513

baseline_bs4_top2/checkpoint-20000/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:973f9fb61da573e339acd801c6477b8cc4497eed865244e05b84a0eedbe74768
-size 14511

baseline_bs4_top2/checkpoint-20000/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
-size 627

baseline_bs4_top2/checkpoint-20000/special_tokens_map.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<extra_id_0>",
-    "<extra_id_1>",
-    "<extra_id_2>",
-    "<extra_id_3>",
-    "<extra_id_4>",
-    "<extra_id_5>",
-    "<extra_id_6>",
-    "<extra_id_7>",
-    "<extra_id_8>",
-    "<extra_id_9>",
-    "<extra_id_10>",
-    "<extra_id_11>",
-    "<extra_id_12>",
-    "<extra_id_13>",
-    "<extra_id_14>",
-    "<extra_id_15>",
-    "<extra_id_16>",
-    "<extra_id_17>",
-    "<extra_id_18>",
-    "<extra_id_19>",
-    "<extra_id_20>",
-    "<extra_id_21>",
-    "<extra_id_22>",
-    "<extra_id_23>",
-    "<extra_id_24>",
-    "<extra_id_25>",
-    "<extra_id_26>",
-    "<extra_id_27>",
-    "<extra_id_28>",
-    "<extra_id_29>",
-    "<extra_id_30>",
-    "<extra_id_31>",
-    "<extra_id_32>",
-    "<extra_id_33>",
-    "<extra_id_34>",
-    "<extra_id_35>",
-    "<extra_id_36>",
-    "<extra_id_37>",
-    "<extra_id_38>",
-    "<extra_id_39>",
-    "<extra_id_40>",
-    "<extra_id_41>",
-    "<extra_id_42>",
-    "<extra_id_43>",
-    "<extra_id_44>",
-    "<extra_id_45>",
-    "<extra_id_46>",
-    "<extra_id_47>",
-    "<extra_id_48>",
-    "<extra_id_49>",
-    "<extra_id_50>",
-    "<extra_id_51>",
-    "<extra_id_52>",
-    "<extra_id_53>",
-    "<extra_id_54>",
-    "<extra_id_55>",
-    "<extra_id_56>",
-    "<extra_id_57>",
-    "<extra_id_58>",
-    "<extra_id_59>",
-    "<extra_id_60>",
-    "<extra_id_61>",
-    "<extra_id_62>",
-    "<extra_id_63>",
-    "<extra_id_64>",
-    "<extra_id_65>",
-    "<extra_id_66>",
-    "<extra_id_67>",
-    "<extra_id_68>",
-    "<extra_id_69>",
-    "<extra_id_70>",
-    "<extra_id_71>",
-    "<extra_id_72>",
-    "<extra_id_73>",
-    "<extra_id_74>",
-    "<extra_id_75>",
-    "<extra_id_76>",
-    "<extra_id_77>",
-    "<extra_id_78>",
-    "<extra_id_79>",
-    "<extra_id_80>",
-    "<extra_id_81>",
-    "<extra_id_82>",
-    "<extra_id_83>",
-    "<extra_id_84>",
-    "<extra_id_85>",
-    "<extra_id_86>",
-    "<extra_id_87>",
-    "<extra_id_88>",
-    "<extra_id_89>",
-    "<extra_id_90>",
-    "<extra_id_91>",
-    "<extra_id_92>",
-    "<extra_id_93>",
-    "<extra_id_94>",
-    "<extra_id_95>",
-    "<extra_id_96>",
-    "<extra_id_97>",
-    "<extra_id_98>",
-    "<extra_id_99>"
-  ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
-}

baseline_bs4_top2/checkpoint-20000/spiece.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
-size 791656