pjbhaumik/km2-cross-encoder2

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: cross-encoder/ms-marco-MiniLM-L-6-v2
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
 # crossencoder-km1
-This model is a fine-tuned version of [cross-encoder/ms-marco-MiniLM-L-6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-6-v2) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0889
 ## Model description
@@ -35,7 +35,7 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
-- train_batch_size: 80
 - eval_batch_size: 80
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -47,16 +47,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 99.5928       | 1.0   | 13   | 102.9756        |
-| 98.0949       | 2.0   | 26   | 99.8849         |
-| 95.3147       | 3.0   | 39   | 92.5186         |
-| 76.7659       | 4.0   | 52   | 69.9192         |
-| 54.0033       | 5.0   | 65   | 21.3535         |
-| 20.7192       | 6.0   | 78   | 7.2049          |
-| 3.2855        | 7.0   | 91   | 2.0372          |
-| 0.9629        | 8.0   | 104  | 0.3949          |
-| 0.4633        | 9.0   | 117  | 0.1386          |
-| 0.2156        | 10.0  | 130  | 0.0975          |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: cross-encoder/stsb-TinyBERT-L-4
 tags:
 - generated_from_trainer
 model-index:
 # crossencoder-km1
+This model is a fine-tuned version of [cross-encoder/stsb-TinyBERT-L-4](https://huggingface.co/cross-encoder/stsb-TinyBERT-L-4) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0119
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
+- train_batch_size: 100
 - eval_batch_size: 80
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 7.4105        | 1.0   | 20   | 7.0307          |
+| 3.9992        | 2.0   | 40   | 3.3007          |
+| 1.1734        | 3.0   | 60   | 0.9568          |
+| 0.2736        | 4.0   | 80   | 0.2017          |
+| 0.1073        | 5.0   | 100  | 0.0679          |
+| 0.0364        | 6.0   | 120  | 0.0288          |
+| 0.0219        | 7.0   | 140  | 0.0221          |
+| 0.0129        | 8.0   | 160  | 0.0140          |
+| 0.0096        | 9.0   | 180  | 0.0118          |
+| 0.009         | 10.0  | 200  | 0.0108          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "cross-encoder/ms-marco-MiniLM-L-6-v2",
   "architectures": [
     "BertForSequenceClassification"
   ],
@@ -8,12 +8,12 @@
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 384,
   "id2label": {
     "0": "LABEL_0"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 1536,
   "label2id": {
     "LABEL_0": 0
   },
@@ -21,11 +21,10 @@
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 6,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "regression",
-  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "type_vocab_size": 2,

 {
+  "_name_or_path": "cross-encoder/stsb-TinyBERT-L-4",
   "architectures": [
     "BertForSequenceClassification"
   ],
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 312,
   "id2label": {
     "0": "LABEL_0"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 1200,
   "label2id": {
     "LABEL_0": 0
   },
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 4,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "regression",
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "type_vocab_size": 2,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98d4c651e4369c3d33de5e192606c39264aa45b3e9d9c2d055d3c8d3656b0ab5
-size 90866412

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3a0b56782cd84c8bc3c4c977b387109502129debe8bf1921ee0982103cdb4b3
+size 57410556

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:deafebc61456310c26a138c83b25911d8768755817dc0fc13ca717640fe240f9
 size 4155

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf8fa5b1057725a4bc8e0d36487737d0d0d0698fd98931692572c0b5b1b59b8c
 size 4155