akiFQC commited on
Commit
5f88309
1 Parent(s): d2ee7b8

add weights

Browse files
CEF1Evaluator_NLI-validation_results.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,Macro F1 score,Micro F1 score,Weighted F1 score
2
+ 0,2000,0.8329982997836369,0.8375893769152196,0.8397220994618456
3
+ 0,4000,0.8748631523421083,0.877170582226762,0.8781035228038937
4
+ 0,6000,0.8820278411731026,0.8845760980592441,0.8863110758809742
5
+ 0,8000,0.9044091342906085,0.9057711950970377,0.9062963985693472
6
+ 0,10000,0.9107679086815503,0.9116445352400409,0.9119410291654654
7
+ 0,12000,0.9115665127927763,0.9126659856996936,0.9129618799913063
8
+ 0,14000,0.918864685145166,0.9195607763023493,0.919858180337296
9
+ 0,16000,0.9216040314629798,0.9218590398365679,0.9219611176600517
10
+ 0,-1,0.9100076248301333,0.9113891726251276,0.9120817353487438
11
+ 1,2000,0.9194390956139591,0.9200715015321757,0.9205939217972634
12
+ 1,4000,0.9185142962179245,0.9195607763023493,0.920105679030943
13
+ 1,6000,0.9208509751453576,0.9218590398365679,0.9223203005095948
14
+ 1,8000,0.9237640789148914,0.9244126659856997,0.9247183179540795
15
+ 1,10000,0.9280237475657899,0.9287538304392237,0.9290009296396736
16
+ 1,12000,0.9290262560148626,0.9300306435137896,0.9303821739012206
17
+ 1,14000,0.9278119404085364,0.9284984678243106,0.9285618797839071
18
+ 1,16000,0.928274634560662,0.9287538304392237,0.9288574304035064
19
+ 1,-1,0.9292224965106985,0.9297752808988764,0.9298854364884377
20
+ 2,2000,0.9317667502094175,0.9323289070480082,0.9326311979688516
21
+ 2,4000,0.9316300064298523,0.932073544433095,0.9321788837608004
22
+ 2,6000,0.9339594129221932,0.9343718079673136,0.9345856616322523
23
+ 2,8000,0.9344685355631314,0.93488253319714,0.9351301368674186
24
+ 2,10000,0.9328423146776924,0.933605720122574,0.9340127392617862
25
+ 2,12000,0.9285830287395637,0.9290091930541369,0.9292837395999622
26
+ 2,14000,0.931181352354283,0.9315628192032687,0.9317430287548752
27
+ 2,16000,0.9353571804783342,0.9359039836567926,0.9361475549571838
28
+ 2,-1,0.9323081476150016,0.9330949948927477,0.9334404916701219
29
+ 3,2000,0.9348641756711965,0.9356486210418795,0.9360014132732422
30
+ 3,4000,0.9300953744501271,0.9307967313585291,0.9310883640460593
31
+ 3,6000,0.9281845389858999,0.9284984678243106,0.9286734388514554
32
+ 3,8000,0.9324754511309039,0.9330949948927477,0.9333753646312469
33
+ 3,10000,0.9329047150272097,0.9333503575076609,0.9336977325814476
34
+ 3,12000,0.9333746327956094,0.9338610827374872,0.9341597867262501
35
+ 3,14000,0.9343758829846527,0.93488253319714,0.9351112846933961
36
+ 3,16000,0.9319437286185271,0.9325842696629213,0.9328579411370967
37
+ 3,-1,0.9325021374847483,0.9330949948927477,0.9334258830555647
38
+ 4,2000,0.931510475622838,0.9318181818181818,0.9319933144454969
39
+ 4,4000,0.933075533127825,0.933605720122574,0.9339175403522527
40
+ 4,6000,0.9318318095001942,0.9323289070480082,0.9326460598298444
41
+ 4,8000,0.930807201375508,0.9313074565883555,0.9316409459113678
42
+ 4,10000,0.9288952367942497,0.9292645556690501,0.9295356054499537
43
+ 4,12000,0.9301385094861797,0.9305413687436159,0.9308744533589931
44
+ 4,14000,0.9314496565220546,0.9318181818181818,0.9321588646101233
45
+ 4,16000,0.9303977405404575,0.9307967313585291,0.931146034919378
46
+ 4,-1,0.9309454479593707,0.9313074565883555,0.9316510303699307
CESoftmaxAccuracyEvaluator_NLI-validation_results.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,Accuracy
2
+ 0,2000,0.8375893769152196
3
+ 0,4000,0.877170582226762
4
+ 0,6000,0.8845760980592441
5
+ 0,8000,0.9057711950970377
6
+ 0,10000,0.9116445352400409
7
+ 0,12000,0.9126659856996936
8
+ 0,14000,0.9195607763023493
9
+ 0,16000,0.9218590398365679
10
+ 0,-1,0.9113891726251276
11
+ 1,2000,0.9200715015321757
12
+ 1,4000,0.9195607763023493
13
+ 1,6000,0.9218590398365679
14
+ 1,8000,0.9244126659856997
15
+ 1,10000,0.9287538304392237
16
+ 1,12000,0.9300306435137896
17
+ 1,14000,0.9284984678243106
18
+ 1,16000,0.9287538304392237
19
+ 1,-1,0.9297752808988764
20
+ 2,2000,0.9323289070480082
21
+ 2,4000,0.932073544433095
22
+ 2,6000,0.9343718079673136
23
+ 2,8000,0.93488253319714
24
+ 2,10000,0.933605720122574
25
+ 2,12000,0.9290091930541369
26
+ 2,14000,0.9315628192032687
27
+ 2,16000,0.9359039836567926
28
+ 2,-1,0.9330949948927477
29
+ 3,2000,0.9356486210418795
30
+ 3,4000,0.9307967313585291
31
+ 3,6000,0.9284984678243106
32
+ 3,8000,0.9330949948927477
33
+ 3,10000,0.9333503575076609
34
+ 3,12000,0.9338610827374872
35
+ 3,14000,0.93488253319714
36
+ 3,16000,0.9325842696629213
37
+ 3,-1,0.9330949948927477
38
+ 4,2000,0.9318181818181818
39
+ 4,4000,0.933605720122574
40
+ 4,6000,0.9323289070480082
41
+ 4,8000,0.9313074565883555
42
+ 4,10000,0.9292645556690501
43
+ 4,12000,0.9305413687436159
44
+ 4,14000,0.9318181818181818
45
+ 4,16000,0.9307967313585291
46
+ 4,-1,0.9313074565883555
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tohoku-nlp/bert-base-japanese-v3",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "entailment",
13
+ "1": "natural",
14
+ "2": "contradiction"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "entailment": 0,
20
+ "natural": 1,
21
+ "contradiction": 2
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.39.2",
32
+ "type_vocab_size": 2,
33
+ "use_cache": true,
34
+ "vocab_size": 32768
35
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8189659e235adcd7bf0f079dab4443bb9afc50cf2c7592727d3d98977230d11c
3
+ size 444861444
settings.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "./output/tohoku_bert_v3_v0.1",
3
+ "datasets": [
4
+ "shunk031/jsnli"
5
+ ],
6
+ "model": "tohoku-nlp/bert-base-japanese-v3",
7
+ "eval_steps": 2000,
8
+ "batch_size": 32,
9
+ "num_epochs": 5,
10
+ "warmup_ratio": 0.1,
11
+ "weight_decay": 0.01,
12
+ "show_progress_bar": true,
13
+ "scheduler": "warmupcosine"
14
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "do_subword_tokenize": true,
48
+ "do_word_tokenize": true,
49
+ "jumanpp_kwargs": null,
50
+ "mask_token": "[MASK]",
51
+ "mecab_kwargs": {
52
+ "mecab_dic": "unidic_lite"
53
+ },
54
+ "model_max_length": 512,
55
+ "never_split": null,
56
+ "pad_token": "[PAD]",
57
+ "sep_token": "[SEP]",
58
+ "subword_tokenizer_type": "wordpiece",
59
+ "sudachi_kwargs": null,
60
+ "tokenizer_class": "BertJapaneseTokenizer",
61
+ "unk_token": "[UNK]",
62
+ "word_tokenizer_type": "mecab"
63
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff