Kajiura Teruno
commited on
Commit
•
5cdb012
1
Parent(s):
b6ed07c
tokenizer
Browse files- removed.jsonl +0 -0
- vocamaru_log.txt +31 -0
removed.jsonl
ADDED
File without changes
|
vocamaru_log.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[新しいモデルの保存先] sno_emp
|
2 |
+
[LOG] 特殊語彙 id=0 type=3 <pad> 0.0
|
3 |
+
[LOG] 特殊語彙 id=1 type=3 </s> 0.0
|
4 |
+
[LOG] 特殊語彙 id=2 type=2 <unk> 0.0
|
5 |
+
[全語彙数] 31741
|
6 |
+
[スコア統計] s
|
7 |
+
count 31741.000000
|
8 |
+
mean -11.467278
|
9 |
+
std 1.075664
|
10 |
+
min -20.360178
|
11 |
+
25% -12.145124
|
12 |
+
50% -11.626710
|
13 |
+
75% -10.941068
|
14 |
+
max -3.172917
|
15 |
+
[置き換える語彙数] 0
|
16 |
+
[重複語数] 5209 トリム数 0
|
17 |
+
全角ゴミ 125
|
18 |
+
半角ゴミ 508
|
19 |
+
記号ゴミ 2
|
20 |
+
[記号ゴミ] 635
|
21 |
+
数字ゴミ 465
|
22 |
+
[数字重複] 465
|
23 |
+
[消去可能な字句] 6309
|
24 |
+
[実際に置き換える語] 0
|
25 |
+
PreTrainedTokenizer(name_or_path='sno_emp', vocab_size=32100, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', '<extra_id_44>', '<extra_id_45>', '<extra_id_46>', '<extra_id_47>', '<extra_id_48>', '<extra_id_49>', '<extra_id_50>', '<extra_id_51>', '<extra_id_52>', '<extra_id_53>', '<extra_id_54>', '<extra_id_55>', '<extra_id_56>', '<extra_id_57>', '<extra_id_58>', '<extra_id_59>', '<extra_id_60>', '<extra_id_61>', '<extra_id_62>', '<extra_id_63>', '<extra_id_64>', '<extra_id_65>', '<extra_id_66>', '<extra_id_67>', '<extra_id_68>', '<extra_id_69>', '<extra_id_70>', '<extra_id_71>', '<extra_id_72>', '<extra_id_73>', '<extra_id_74>', '<extra_id_75>', '<extra_id_76>', '<extra_id_77>', '<extra_id_78>', '<extra_id_79>', '<extra_id_80>', '<extra_id_81>', '<extra_id_82>', '<extra_id_83>', '<extra_id_84>', '<extra_id_85>', '<extra_id_86>', '<extra_id_87>', '<extra_id_88>', '<extra_id_89>', '<extra_id_90>', '<extra_id_91>', '<extra_id_92>', '<extra_id_93>', '<extra_id_94>', '<extra_id_95>', '<extra_id_96>', '<extra_id_97>', '<extra_id_98>', '<extra_id_99>']})
|
26 |
+
<nl><nl> [261, 2121, 406, 561, 1699, 2121, 406, 561, 1699, 1]
|
27 |
+
<123> <100> <1> [261, 2121, 310, 281, 1699, 261, 2121, 701, 1699, 261, 2121, 279, 1699, 1]
|
28 |
+
<extra_id_0><extra_id_99> [32099, 32000, 1]
|
29 |
+
|
30 |
+
|
31 |
+
[1]
|