jccervera1069 commited on
Commit
3275c13
1 Parent(s): ba60a3d

Upload TFT5ForConditionalGeneration

Browse files
Files changed (4) hide show
  1. README.md +11 -4
  2. config.json +8 -1
  3. generation_config.json +13 -0
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
@@ -14,7 +15,10 @@ probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
-
 
 
 
18
 
19
  ## Model description
20
 
@@ -33,16 +37,19 @@ More information needed
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
- - optimizer: None
37
  - training_precision: float32
38
 
39
  ### Training results
40
 
 
 
 
41
 
42
 
43
  ### Framework versions
44
 
45
- - Transformers 4.20.0
46
  - TensorFlow 2.13.0
47
  - Datasets 2.10.0
48
- - Tokenizers 0.12.1
 
1
  ---
2
  license: apache-2.0
3
+ base_model: t5-small
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
 
15
 
16
  This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 2.9206
19
+ - Validation Loss: 2.5902
20
+ - Train Rougel: tf.Tensor(0.18895291, shape=(), dtype=float32)
21
+ - Epoch: 0
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': 2e-05, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
41
  - training_precision: float32
42
 
43
  ### Training results
44
 
45
+ | Train Loss | Validation Loss | Train Rougel | Epoch |
46
+ |:----------:|:---------------:|:----------------------------------------------:|:-----:|
47
+ | 2.9206 | 2.5902 | tf.Tensor(0.18895291, shape=(), dtype=float32) | 0 |
48
 
49
 
50
  ### Framework versions
51
 
52
+ - Transformers 4.31.0
53
  - TensorFlow 2.13.0
54
  - Datasets 2.10.0
55
+ - Tokenizers 0.13.3
config.json CHANGED
@@ -9,19 +9,26 @@
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "relu",
11
  "dropout_rate": 0.1,
 
12
  "eos_token_id": 1,
13
  "feed_forward_proj": "relu",
14
  "initializer_factor": 1.0,
15
  "is_encoder_decoder": true,
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
 
 
 
18
  "model_type": "t5",
19
  "n_positions": 512,
 
 
20
  "num_decoder_layers": 6,
21
  "num_heads": 8,
22
  "num_layers": 6,
23
  "output_past": true,
24
  "pad_token_id": 0,
 
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
  "task_specific_params": {
@@ -53,7 +60,7 @@
53
  "prefix": "translate English to Romanian: "
54
  }
55
  },
56
- "transformers_version": "4.20.0",
57
  "use_cache": true,
58
  "vocab_size": 32128
59
  }
 
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "relu",
11
  "dropout_rate": 0.1,
12
+ "early_stopping": true,
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
  "is_gated_act": false,
18
  "layer_norm_epsilon": 1e-06,
19
+ "length_penalty": 2.0,
20
+ "max_length": 200,
21
+ "min_length": 30,
22
  "model_type": "t5",
23
  "n_positions": 512,
24
+ "no_repeat_ngram_size": 3,
25
+ "num_beams": 4,
26
  "num_decoder_layers": 6,
27
  "num_heads": 8,
28
  "num_layers": 6,
29
  "output_past": true,
30
  "pad_token_id": 0,
31
+ "prefix": "summarize: ",
32
  "relative_attention_max_distance": 128,
33
  "relative_attention_num_buckets": 32,
34
  "task_specific_params": {
 
60
  "prefix": "translate English to Romanian: "
61
  }
62
  },
63
+ "transformers_version": "4.31.0",
64
  "use_cache": true,
65
  "vocab_size": 32128
66
  }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "early_stopping": true,
5
+ "eos_token_id": 1,
6
+ "length_penalty": 2.0,
7
+ "max_length": 200,
8
+ "min_length": 30,
9
+ "no_repeat_ngram_size": 3,
10
+ "num_beams": 4,
11
+ "pad_token_id": 0,
12
+ "transformers_version": "4.31.0"
13
+ }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9b6345cc5a50ef6f44dab75fbbd1d17d7e10b9d551759efdb6a4969488a645a
3
- size 242301696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71a27850a9bc15dbc3cfda75fd9fc8c29432024236198cd7dcd4dfadfb7d82d
3
+ size 373902664