add weights
Browse files- CEF1Evaluator_NLI-validation_results.csv +46 -0
- CESoftmaxAccuracyEvaluator_NLI-validation_results.csv +46 -0
- config.json +35 -0
- model.safetensors +3 -0
- settings.json +14 -0
- special_tokens_map.json +7 -0
- tokenizer_config.json +63 -0
- vocab.txt +0 -0
CEF1Evaluator_NLI-validation_results.csv
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,Macro F1 score,Micro F1 score,Weighted F1 score
|
2 |
+
0,2000,0.8329982997836369,0.8375893769152196,0.8397220994618456
|
3 |
+
0,4000,0.8748631523421083,0.877170582226762,0.8781035228038937
|
4 |
+
0,6000,0.8820278411731026,0.8845760980592441,0.8863110758809742
|
5 |
+
0,8000,0.9044091342906085,0.9057711950970377,0.9062963985693472
|
6 |
+
0,10000,0.9107679086815503,0.9116445352400409,0.9119410291654654
|
7 |
+
0,12000,0.9115665127927763,0.9126659856996936,0.9129618799913063
|
8 |
+
0,14000,0.918864685145166,0.9195607763023493,0.919858180337296
|
9 |
+
0,16000,0.9216040314629798,0.9218590398365679,0.9219611176600517
|
10 |
+
0,-1,0.9100076248301333,0.9113891726251276,0.9120817353487438
|
11 |
+
1,2000,0.9194390956139591,0.9200715015321757,0.9205939217972634
|
12 |
+
1,4000,0.9185142962179245,0.9195607763023493,0.920105679030943
|
13 |
+
1,6000,0.9208509751453576,0.9218590398365679,0.9223203005095948
|
14 |
+
1,8000,0.9237640789148914,0.9244126659856997,0.9247183179540795
|
15 |
+
1,10000,0.9280237475657899,0.9287538304392237,0.9290009296396736
|
16 |
+
1,12000,0.9290262560148626,0.9300306435137896,0.9303821739012206
|
17 |
+
1,14000,0.9278119404085364,0.9284984678243106,0.9285618797839071
|
18 |
+
1,16000,0.928274634560662,0.9287538304392237,0.9288574304035064
|
19 |
+
1,-1,0.9292224965106985,0.9297752808988764,0.9298854364884377
|
20 |
+
2,2000,0.9317667502094175,0.9323289070480082,0.9326311979688516
|
21 |
+
2,4000,0.9316300064298523,0.932073544433095,0.9321788837608004
|
22 |
+
2,6000,0.9339594129221932,0.9343718079673136,0.9345856616322523
|
23 |
+
2,8000,0.9344685355631314,0.93488253319714,0.9351301368674186
|
24 |
+
2,10000,0.9328423146776924,0.933605720122574,0.9340127392617862
|
25 |
+
2,12000,0.9285830287395637,0.9290091930541369,0.9292837395999622
|
26 |
+
2,14000,0.931181352354283,0.9315628192032687,0.9317430287548752
|
27 |
+
2,16000,0.9353571804783342,0.9359039836567926,0.9361475549571838
|
28 |
+
2,-1,0.9323081476150016,0.9330949948927477,0.9334404916701219
|
29 |
+
3,2000,0.9348641756711965,0.9356486210418795,0.9360014132732422
|
30 |
+
3,4000,0.9300953744501271,0.9307967313585291,0.9310883640460593
|
31 |
+
3,6000,0.9281845389858999,0.9284984678243106,0.9286734388514554
|
32 |
+
3,8000,0.9324754511309039,0.9330949948927477,0.9333753646312469
|
33 |
+
3,10000,0.9329047150272097,0.9333503575076609,0.9336977325814476
|
34 |
+
3,12000,0.9333746327956094,0.9338610827374872,0.9341597867262501
|
35 |
+
3,14000,0.9343758829846527,0.93488253319714,0.9351112846933961
|
36 |
+
3,16000,0.9319437286185271,0.9325842696629213,0.9328579411370967
|
37 |
+
3,-1,0.9325021374847483,0.9330949948927477,0.9334258830555647
|
38 |
+
4,2000,0.931510475622838,0.9318181818181818,0.9319933144454969
|
39 |
+
4,4000,0.933075533127825,0.933605720122574,0.9339175403522527
|
40 |
+
4,6000,0.9318318095001942,0.9323289070480082,0.9326460598298444
|
41 |
+
4,8000,0.930807201375508,0.9313074565883555,0.9316409459113678
|
42 |
+
4,10000,0.9288952367942497,0.9292645556690501,0.9295356054499537
|
43 |
+
4,12000,0.9301385094861797,0.9305413687436159,0.9308744533589931
|
44 |
+
4,14000,0.9314496565220546,0.9318181818181818,0.9321588646101233
|
45 |
+
4,16000,0.9303977405404575,0.9307967313585291,0.931146034919378
|
46 |
+
4,-1,0.9309454479593707,0.9313074565883555,0.9316510303699307
|
CESoftmaxAccuracyEvaluator_NLI-validation_results.csv
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,Accuracy
|
2 |
+
0,2000,0.8375893769152196
|
3 |
+
0,4000,0.877170582226762
|
4 |
+
0,6000,0.8845760980592441
|
5 |
+
0,8000,0.9057711950970377
|
6 |
+
0,10000,0.9116445352400409
|
7 |
+
0,12000,0.9126659856996936
|
8 |
+
0,14000,0.9195607763023493
|
9 |
+
0,16000,0.9218590398365679
|
10 |
+
0,-1,0.9113891726251276
|
11 |
+
1,2000,0.9200715015321757
|
12 |
+
1,4000,0.9195607763023493
|
13 |
+
1,6000,0.9218590398365679
|
14 |
+
1,8000,0.9244126659856997
|
15 |
+
1,10000,0.9287538304392237
|
16 |
+
1,12000,0.9300306435137896
|
17 |
+
1,14000,0.9284984678243106
|
18 |
+
1,16000,0.9287538304392237
|
19 |
+
1,-1,0.9297752808988764
|
20 |
+
2,2000,0.9323289070480082
|
21 |
+
2,4000,0.932073544433095
|
22 |
+
2,6000,0.9343718079673136
|
23 |
+
2,8000,0.93488253319714
|
24 |
+
2,10000,0.933605720122574
|
25 |
+
2,12000,0.9290091930541369
|
26 |
+
2,14000,0.9315628192032687
|
27 |
+
2,16000,0.9359039836567926
|
28 |
+
2,-1,0.9330949948927477
|
29 |
+
3,2000,0.9356486210418795
|
30 |
+
3,4000,0.9307967313585291
|
31 |
+
3,6000,0.9284984678243106
|
32 |
+
3,8000,0.9330949948927477
|
33 |
+
3,10000,0.9333503575076609
|
34 |
+
3,12000,0.9338610827374872
|
35 |
+
3,14000,0.93488253319714
|
36 |
+
3,16000,0.9325842696629213
|
37 |
+
3,-1,0.9330949948927477
|
38 |
+
4,2000,0.9318181818181818
|
39 |
+
4,4000,0.933605720122574
|
40 |
+
4,6000,0.9323289070480082
|
41 |
+
4,8000,0.9313074565883555
|
42 |
+
4,10000,0.9292645556690501
|
43 |
+
4,12000,0.9305413687436159
|
44 |
+
4,14000,0.9318181818181818
|
45 |
+
4,16000,0.9307967313585291
|
46 |
+
4,-1,0.9313074565883555
|
config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "tohoku-nlp/bert-base-japanese-v3",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"id2label": {
|
12 |
+
"0": "entailment",
|
13 |
+
"1": "natural",
|
14 |
+
"2": "contradiction"
|
15 |
+
},
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 3072,
|
18 |
+
"label2id": {
|
19 |
+
"entailment": 0,
|
20 |
+
"natural": 1,
|
21 |
+
"contradiction": 2
|
22 |
+
},
|
23 |
+
"layer_norm_eps": 1e-12,
|
24 |
+
"max_position_embeddings": 512,
|
25 |
+
"model_type": "bert",
|
26 |
+
"num_attention_heads": 12,
|
27 |
+
"num_hidden_layers": 12,
|
28 |
+
"pad_token_id": 0,
|
29 |
+
"position_embedding_type": "absolute",
|
30 |
+
"torch_dtype": "float32",
|
31 |
+
"transformers_version": "4.39.2",
|
32 |
+
"type_vocab_size": 2,
|
33 |
+
"use_cache": true,
|
34 |
+
"vocab_size": 32768
|
35 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8189659e235adcd7bf0f079dab4443bb9afc50cf2c7592727d3d98977230d11c
|
3 |
+
size 444861444
|
settings.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"output_dir": "./output/tohoku_bert_v3_v0.1",
|
3 |
+
"datasets": [
|
4 |
+
"shunk031/jsnli"
|
5 |
+
],
|
6 |
+
"model": "tohoku-nlp/bert-base-japanese-v3",
|
7 |
+
"eval_steps": 2000,
|
8 |
+
"batch_size": 32,
|
9 |
+
"num_epochs": 5,
|
10 |
+
"warmup_ratio": 0.1,
|
11 |
+
"weight_decay": 0.01,
|
12 |
+
"show_progress_bar": true,
|
13 |
+
"scheduler": "warmupcosine"
|
14 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"do_subword_tokenize": true,
|
48 |
+
"do_word_tokenize": true,
|
49 |
+
"jumanpp_kwargs": null,
|
50 |
+
"mask_token": "[MASK]",
|
51 |
+
"mecab_kwargs": {
|
52 |
+
"mecab_dic": "unidic_lite"
|
53 |
+
},
|
54 |
+
"model_max_length": 512,
|
55 |
+
"never_split": null,
|
56 |
+
"pad_token": "[PAD]",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"subword_tokenizer_type": "wordpiece",
|
59 |
+
"sudachi_kwargs": null,
|
60 |
+
"tokenizer_class": "BertJapaneseTokenizer",
|
61 |
+
"unk_token": "[UNK]",
|
62 |
+
"word_tokenizer_type": "mecab"
|
63 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|