howard-hou commited on
Commit
87f587a
1 Parent(s): 18a2239

Upload RankingPrompterForPreTraining

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "D://huggingface_model/RankingPrompterForPreTraining-small",
3
  "architectures": [
4
  "RankingPrompterForPreTraining"
5
  ],
@@ -22,6 +22,7 @@
22
  "layer_norm_epsilon": 1e-06,
23
  "max_new_tokens": 64,
24
  "model_type": "umt5",
 
25
  "num_decoder_layers": 8,
26
  "num_heads": 6,
27
  "num_layers": 8,
@@ -32,7 +33,7 @@
32
  "tie_word_embeddings": false,
33
  "tokenizer_class": "T5Tokenizer",
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.32.0",
36
  "use_cache": true,
37
  "vocab_size": 256384
38
  }
 
1
  {
2
+ "_name_or_path": "..\\..\\..\\huggingface_model\\RankingPrompterForPreTraining-small\\",
3
  "architectures": [
4
  "RankingPrompterForPreTraining"
5
  ],
 
22
  "layer_norm_epsilon": 1e-06,
23
  "max_new_tokens": 64,
24
  "model_type": "umt5",
25
+ "num_answer_query": 128,
26
  "num_decoder_layers": 8,
27
  "num_heads": 6,
28
  "num_layers": 8,
 
33
  "tie_word_embeddings": false,
34
  "tokenizer_class": "T5Tokenizer",
35
  "torch_dtype": "float32",
36
+ "transformers_version": "4.32.1",
37
  "use_cache": true,
38
  "vocab_size": 256384
39
  }
configuration_rankingprompter.py CHANGED
@@ -14,6 +14,7 @@ class RankingPrompterConfig(PretrainedConfig):
14
  num_heads=6,
15
  relative_attention_num_buckets=32,
16
  relative_attention_max_distance=128,
 
17
  dropout_rate=0.1,
18
  layer_norm_epsilon=1e-6,
19
  initializer_factor=1.0,
@@ -48,6 +49,7 @@ class RankingPrompterConfig(PretrainedConfig):
48
  self.num_heads = num_heads
49
  self.relative_attention_num_buckets = relative_attention_num_buckets
50
  self.relative_attention_max_distance = relative_attention_max_distance
 
51
  self.dropout_rate = dropout_rate
52
  self.classifier_dropout = classifier_dropout
53
  self.layer_norm_epsilon = layer_norm_epsilon
 
14
  num_heads=6,
15
  relative_attention_num_buckets=32,
16
  relative_attention_max_distance=128,
17
+ num_answer_query=128,
18
  dropout_rate=0.1,
19
  layer_norm_epsilon=1e-6,
20
  initializer_factor=1.0,
 
49
  self.num_heads = num_heads
50
  self.relative_attention_num_buckets = relative_attention_num_buckets
51
  self.relative_attention_max_distance = relative_attention_max_distance
52
+ self.num_answer_query = num_answer_query
53
  self.dropout_rate = dropout_rate
54
  self.classifier_dropout = classifier_dropout
55
  self.layer_norm_epsilon = layer_norm_epsilon
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b90ef8ceeeffc7b033e65dfc28f3adf8d82cbdad204df0677ae0c0f45f4f0c24
3
  size 701403585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab6831f798bedd1b458b5cd3b77c941d58564bb70941bcc2aed16ad8cdee75d
3
  size 701403585