Areepatw commited on
Commit
6b78fab
1 Parent(s): 4de4aea

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: bert-base-multilingual-uncased
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - super_glue
9
+ metrics:
10
+ - accuracy
11
+ - f1
12
+ model-index:
13
+ - name: mbert-multirc
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: super_glue
20
+ type: super_glue
21
+ config: multirc
22
+ split: validation
23
+ args: multirc
24
+ metrics:
25
+ - name: Accuracy
26
+ type: accuracy
27
+ value: 0.5759075907590759
28
+ - name: F1
29
+ type: f1
30
+ value: 0.5048127206005825
31
+ ---
32
+
33
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
34
+ should probably proofread and complete it, then remove this comment. -->
35
+
36
+ # mbert-multirc
37
+
38
+ This model is a fine-tuned version of [bert-base-multilingual-uncased](https://huggingface.co/bert-base-multilingual-uncased) on the super_glue dataset.
39
+ It achieves the following results on the evaluation set:
40
+ - Loss: 0.6812
41
+ - Accuracy: 0.5759
42
+ - F1: 0.5048
43
+
44
+ ## Model description
45
+
46
+ More information needed
47
+
48
+ ## Intended uses & limitations
49
+
50
+ More information needed
51
+
52
+ ## Training and evaluation data
53
+
54
+ More information needed
55
+
56
+ ## Training procedure
57
+
58
+ ### Training hyperparameters
59
+
60
+ The following hyperparameters were used during training:
61
+ - learning_rate: 1e-05
62
+ - train_batch_size: 16
63
+ - eval_batch_size: 16
64
+ - seed: 42
65
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
66
+ - lr_scheduler_type: linear
67
+ - lr_scheduler_warmup_ratio: 0.1
68
+ - num_epochs: 1
69
+
70
+ ### Training results
71
+
72
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 |
73
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|
74
+ | 0.6862 | 1.0 | 1703 | 0.6812 | 0.5759 | 0.5048 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.46.2
80
+ - Pytorch 2.5.1+cu121
81
+ - Datasets 3.1.0
82
+ - Tokenizers 0.20.3
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.46.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "bert-base-multilingual-uncased",
3
+ "dataset_name": "super_glue",
4
+ "subset_name": "multirc",
5
+ "eval_results": {
6
+ "eval_loss": 0.6812450289726257,
7
+ "eval_accuracy": 0.5759075907590759,
8
+ "eval_f1": 0.5048127206005825,
9
+ "eval_runtime": 73.5474,
10
+ "eval_samples_per_second": 65.917,
11
+ "eval_steps_per_second": 4.12,
12
+ "epoch": 1.0
13
+ },
14
+ "accuracy": 0.5759075907590759,
15
+ "f1": 0.5048127206005825
16
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e3cd03fe611f22d691e680611402cd351a8368e02ea7c68129024e0e2953f6
3
+ size 437958648
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec0bcdb75d6f7cf1875b56e45baf915e577cbcc1c1a410027465f58e71b8f64
3
+ size 5240
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff