ecker commited on
Commit
f28d5f2
1 Parent(s): eec2046

Upload 2 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/models/ar+nar-llama-8.sft filter=lfs diff=lfs merge=lfs -text
data/models/ar+nar-llama-8.sft ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0920e7eef0884631f00513b700924538db0853d662530e4bcf7ac1d8666430b6
3
+ size 456274402
data/tokenizer.json ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<unk>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<bos>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</eos>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<mask>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": null,
44
+ "pre_tokenizer": null,
45
+ "post_processor": {
46
+ "type": "TemplateProcessing",
47
+ "single": [
48
+ {
49
+ "SpecialToken": {
50
+ "id": "<bos>",
51
+ "type_id": 0
52
+ }
53
+ },
54
+ {
55
+ "Sequence": {
56
+ "id": "A",
57
+ "type_id": 0
58
+ }
59
+ },
60
+ {
61
+ "SpecialToken": {
62
+ "id": "<eos>",
63
+ "type_id": 0
64
+ }
65
+ }
66
+ ],
67
+ "pair": [
68
+ {
69
+ "Sequence": {
70
+ "id": "A",
71
+ "type_id": 0
72
+ }
73
+ },
74
+ {
75
+ "Sequence": {
76
+ "id": "B",
77
+ "type_id": 1
78
+ }
79
+ }
80
+ ],
81
+ "special_tokens": {
82
+ "<bos>": {
83
+ "id": "<bos>",
84
+ "ids": [
85
+ 1
86
+ ],
87
+ "tokens": [
88
+ "<bos>"
89
+ ]
90
+ },
91
+ "<eos>": {
92
+ "id": "<eos>",
93
+ "ids": [
94
+ 2
95
+ ],
96
+ "tokens": [
97
+ "<eos>"
98
+ ]
99
+ }
100
+ }
101
+ },
102
+ "decoder": null,
103
+ "model": {
104
+ "type": "BPE",
105
+ "dropout": null,
106
+ "unk_token": "<unk>",
107
+ "continuing_subword_prefix": null,
108
+ "end_of_word_suffix": null,
109
+ "fuse_unk": false,
110
+ "byte_fallback": false,
111
+ "ignore_merges": false,
112
+ "vocab": {
113
+ "<unk>": 0,
114
+ "<bos>": 1,
115
+ "</eos>": 2,
116
+ "<mask>": 3,
117
+ " ": 4,
118
+
119
+ "ᵝ": 4,
120
+
121
+ "!": 5,
122
+ "\"": 6,
123
+ "(": 7,
124
+ "{": 7,
125
+ "[": 7,
126
+ ")": 8,
127
+ "}": 8,
128
+ "]": 8,
129
+ ",": 9,
130
+ "-": 10,
131
+ ".": 11,
132
+ "1": 12,
133
+ ":": 13,
134
+ ";": 14,
135
+ "?": 15,
136
+ "a": 16,
137
+ "ä": 16,
138
+ "ɒ": 16,
139
+ "b": 17,
140
+ "c": 18,
141
+ "d": 19,
142
+ "e": 20,
143
+ "f": 21,
144
+ "h": 22,
145
+ "i": 23,
146
+ "ĩ": 23,
147
+ "j": 24,
148
+ "k": 25,
149
+ "l": 26,
150
+ "m": 27,
151
+ "n": 28,
152
+ "ɴ": 28,
153
+ "ɲ": 28,
154
+ "o": 29,
155
+ "̞": 29,
156
+ "p": 30,
157
+ "ɸ": 30,
158
+ "q": 31,
159
+ "r": 32,
160
+ "ɽ": 32,
161
+ "ʁ": 32,
162
+ "s": 33,
163
+ "t": 34,
164
+ "u": 35,
165
+ "ø": 35,
166
+ "œ": 35,
167
+ "y": 35,
168
+ "ɣ": 35,
169
+ "ũ": 35,
170
+ "v": 36,
171
+ "w": 37,
172
+ "ʍ": 37,
173
+ "x": 38,
174
+ "z": 39,
175
+ "¡": 40,
176
+ "«": 41,
177
+ "»": 42,
178
+ "¿": 43,
179
+ "æ": 44,
180
+ "ç": 45,
181
+ "ð": 46,
182
+ "ŋ": 47,
183
+ "ɐ": 48,
184
+ "ɑ": 49,
185
+ "ɔ": 50,
186
+ "ɕ": 51,
187
+ "ə": 52,
188
+ "ɚ": 53,
189
+ "ɛ": 54,
190
+ "ɜ": 55,
191
+ "ɟ": 56,
192
+ "ɡ": 57,
193
+ "ɪ": 58,
194
+ "ɬ": 59,
195
+ "ɯ": 60,
196
+ "ɹ": 61,
197
+ "ɾ": 62,
198
+ "ʃ": 63,
199
+ "ʈ": 64,
200
+ "ʊ": 65,
201
+ "ʋ": 66,
202
+ "ʌ": 67,
203
+ "ʑ": 68,
204
+ "ʒ": 69,
205
+ "ʔ": 70,
206
+ "ʲ": 71,
207
+ "ˈ": 72,
208
+ "ˌ": 73,
209
+ "ˌ": 73,
210
+ "ː": 74,
211
+ "̃": 75,
212
+ "̩": 76,
213
+ "θ": 77,
214
+ "ᵻ": 78,
215
+ "—": 79,
216
+ "“": 80,
217
+ "”": 81,
218
+ "…": 82,
219
+ "ˈɛ": 83,
220
+ "iː": 84,
221
+ "aɪ": 85,
222
+ "nd": 86,
223
+ "ˈɪ": 87,
224
+ "eɪ": 88,
225
+ "ˈæ": 89,
226
+ "ðə": 90,
227
+ "oʊ": 91,
228
+ "ɑː": 92,
229
+ "ˈeɪ": 93,
230
+ "ən": 94,
231
+ "uː": 95,
232
+ "ˈʌ": 96,
233
+ "ˈaɪ": 97,
234
+ "st": 98,
235
+ "ˈɔ": 99,
236
+ "ˈoʊ": 100,
237
+ "ˈiː": 101,
238
+ "ˈɑː": 102,
239
+ "ænd": 103,
240
+ "ːɹ": 104,
241
+ "ɪŋ": 105,
242
+ "ɜː": 106,
243
+ "ɪn": 107,
244
+ "tə": 108,
245
+ "ʌv": 109,
246
+ "aʊ": 110,
247
+ "əl": 111,
248
+ "ˈuː": 112,
249
+ "tʃ": 113,
250
+ "ɪz": 114,
251
+ "ˈɜː": 115,
252
+ "ˌʌ": 116,
253
+ "æt": 117,
254
+ "dʒ": 118,
255
+ "ˈɔː": 119,
256
+ "ɪt": 120,
257
+ "ˈaʊ": 121,
258
+ "ɚɹ": 122,
259
+ "ˈɛn": 123,
260
+ "wʌ": 124,
261
+ "li": 125,
262
+ "hiː": 126,
263
+ "ˌɛ": 127,
264
+ "wɪ": 128,
265
+ "wʌz": 129,
266
+ "ðæt": 130,
267
+ "juː": 131,
268
+ "oːɹ": 132,
269
+ "ðɪ": 133,
270
+ "sˈɛ": 134,
271
+ "ˌɪ": 135,
272
+ "ˈɑːɹ": 136,
273
+ "nt": 137,
274
+ "ˈʊ": 138,
275
+ "ənt": 139,
276
+ "hɪz": 140,
277
+ "ˌɑː": 141,
278
+ "hæ": 142,
279
+ "ɔːɹ": 143,
280
+ "ˈɛɹ": 144,
281
+ "wɪð": 145,
282
+ "��d": 146,
283
+ "ˈoːɹ": 147,
284
+ "pɹ": 148,
285
+ "ˈɔːl": 149,
286
+ "mˌ": 150,
287
+ "ʃən": 151,
288
+ "kt": 152,
289
+ "ˌoʊ": 153,
290
+ "ˈɔːɹ": 154,
291
+ "fɹ": 155,
292
+ "æz": 156,
293
+ "ˌʌt": 157,
294
+ "ʃiː": 158,
295
+ "ˈɛl": 159,
296
+ "ˌaʊ": 160,
297
+ "ˈʌn": 161,
298
+ "əs": 162,
299
+ "hɜː": 163,
300
+ "lˈaɪ": 164,
301
+ "ˈæn": 165,
302
+ "ˈɪɹ": 166,
303
+ "ʊd": 167,
304
+ "ɹᵻ": 168,
305
+ "ld": 169,
306
+ "bˌʌt": 170,
307
+ "ks": 171,
308
+ "nˈoʊ": 172,
309
+ "hæd": 173,
310
+ "ɾɚ": 174,
311
+ "ɛɹ": 175,
312
+ "ˈɪŋ": 176,
313
+ "ɡɹ": 177,
314
+ "nˌɑː": 178,
315
+ "ɔn": 179,
316
+ "vɚ": 180,
317
+ "maɪ": 181,
318
+ "fɔːɹ": 182,
319
+ "ðɚ": 183,
320
+ "tʊ": 184,
321
+ "ðɛɹ": 185,
322
+ "nˌɑːt": 186,
323
+ "ˈʌm": 187,
324
+ "tɹ": 188,
325
+ "sˈiː": 189,
326
+ "ʌvðə": 190,
327
+ "mˈɪ": 191,
328
+ "hˈæ": 192,
329
+ "ˌɪm": 193,
330
+ "lˈeɪ": 194,
331
+ "ɪk": 195,
332
+ "sp": 196,
333
+ "hˌɪm": 197,
334
+ "ɐn": 198,
335
+ "ðeɪ": 199,
336
+ "lˈɪ": 200,
337
+ "ɾi": 201,
338
+ "lˈɛ": 202,
339
+ "bɹ": 203,
340
+ "kɹ": 204,
341
+ "lˈæ": 205,
342
+ "ˈɪl": 206,
343
+ "jˈuː": 207,
344
+ "ʌm": 208,
345
+ "mˌiː": 209,
346
+ "bᵻ": 210,
347
+ "wˈʌn": 211,
348
+ "ˌɪn": 212,
349
+ "ˈɪn": 213,
350
+ "ˈoʊn": 214,
351
+ "sˈɛd": 215,
352
+ "biː": 216,
353
+ "ˈɛd": 217,
354
+ "ˈaɪt": 218,
355
+ "baɪ": 219,
356
+ "fɹʌm": 220,
357
+ "ɪs": 221,
358
+ "ɚz": 222,
359
+ "ðɪs": 223,
360
+ "əns": 224,
361
+ "bəl": 225,
362
+ "ɪf": 226,
363
+ "ɪnðə": 227,
364
+ "əm": 228,
365
+ "ᵻz": 229,
366
+ "ˌuː": 230,
367
+ "wˈeɪ": 231,
368
+ "ft": 232,
369
+ "wiː": 233,
370
+ "stɹ": 234,
371
+ "lˈiː": 235,
372
+ "iːz": 236,
373
+ "pt": 237,
374
+ "jʊ": 238,
375
+ "ɚd": 239,
376
+ "ˌaɪ": 240,
377
+ "kw": 241,
378
+ "ˌɔn": 242,
379
+ "ˈaɪd": 243,
380
+ "ɪm": 244,
381
+ "ˈʌst": 245,
382
+ "ˈoʊld": 246,
383
+ "ts": 247,
384
+ "ˌɪtʃ": 248,
385
+ "sˌoʊ": 249,
386
+ "dˈɪ": 250,
387
+ "ɑːɹ": 251,
388
+ "hɐ": 252,
389
+ "sˈeɪ": 253,
390
+ "ɾᵻd": 254,
391
+ "wˌɪtʃ": 255
392
+ },
393
+ "merges": [
394
+ "ˈ ɛ",
395
+ "i ː",
396
+ "a ɪ",
397
+ "n d",
398
+ "ˈ ɪ",
399
+ "e ɪ",
400
+ "ˈ æ",
401
+ "ð ə",
402
+ "o ʊ",
403
+ "ɑ ː",
404
+ "ˈ eɪ",
405
+ "ə n",
406
+ "u ː",
407
+ "ˈ ʌ",
408
+ "ˈ aɪ",
409
+ "s t",
410
+ "ˈ ɔ",
411
+ "ˈ oʊ",
412
+ "ˈ iː",
413
+ "ˈ ɑː",
414
+ "æ nd",
415
+ "ː ɹ",
416
+ "ɪ ŋ",
417
+ "ɜ ː",
418
+ "ɪ n",
419
+ "t ə",
420
+ "ʌ v",
421
+ "a ʊ",
422
+ "ə l",
423
+ "ˈ uː",
424
+ "t ʃ",
425
+ "ɪ z",
426
+ "ˈ ɜː",
427
+ "ˌ ʌ",
428
+ "æ t",
429
+ "d ʒ",
430
+ "ˈɔ ː",
431
+ "ɪ t",
432
+ "ˈ aʊ",
433
+ "ɚ ɹ",
434
+ "ˈɛ n",
435
+ "w ʌ",
436
+ "l i",
437
+ "h iː",
438
+ "ˌ ɛ",
439
+ "w ɪ",
440
+ "wʌ z",
441
+ "ð æt",
442
+ "j uː",
443
+ "o ːɹ",
444
+ "ð ɪ",
445
+ "s ˈɛ",
446
+ "ˌ ɪ",
447
+ "ˈɑː ɹ",
448
+ "n t",
449
+ "ˈ ʊ",
450
+ "ən t",
451
+ "h ɪz",
452
+ "ˌ ɑː",
453
+ "h æ",
454
+ "ɔ ːɹ",
455
+ "ˈɛ ɹ",
456
+ "wɪ ð",
457
+ "ᵻ d",
458
+ "ˈ oːɹ",
459
+ "p ɹ",
460
+ "ˈɔː l",
461
+ "m ˌ",
462
+ "ʃ ən",
463
+ "k t",
464
+ "ˌ oʊ",
465
+ "ˈɔ ːɹ",
466
+ "f ɹ",
467
+ "æ z",
468
+ "ˌʌ t",
469
+ "ʃ iː",
470
+ "ˈɛ l",
471
+ "ˌ aʊ",
472
+ "ˈʌ n",
473
+ "ə s",
474
+ "h ɜː",
475
+ "l ˈaɪ",
476
+ "ˈæ n",
477
+ "ˈɪ ɹ",
478
+ "ʊ d",
479
+ "ɹ ᵻ",
480
+ "l d",
481
+ "b ˌʌt",
482
+ "k s",
483
+ "n ˈoʊ",
484
+ "hæ d",
485
+ "ɾ ɚ",
486
+ "ɛ ɹ",
487
+ "ˈɪ ŋ",
488
+ "ɡ ɹ",
489
+ "n ˌɑː",
490
+ "ɔ n",
491
+ "v ɚ",
492
+ "m aɪ",
493
+ "f ɔːɹ",
494
+ "ð ɚ",
495
+ "t ʊ",
496
+ "ð ɛɹ",
497
+ "nˌɑː t",
498
+ "ˈʌ m",
499
+ "t ɹ",
500
+ "s ˈiː",
501
+ "ʌv ðə",
502
+ "m ˈɪ",
503
+ "h ˈæ",
504
+ "ˌɪ m",
505
+ "l ˈeɪ",
506
+ "ɪ k",
507
+ "s p",
508
+ "h ˌɪm",
509
+ "ɐ n",
510
+ "ð eɪ",
511
+ "l ˈɪ",
512
+ "ɾ i",
513
+ "l ˈɛ",
514
+ "b ɹ",
515
+ "k ɹ",
516
+ "l ˈæ",
517
+ "ˈɪ l",
518
+ "j ˈuː",
519
+ "ʌ m",
520
+ "mˌ iː",
521
+ "b ᵻ",
522
+ "w ˈʌn",
523
+ "ˌ ɪn",
524
+ "ˈɪ n",
525
+ "ˈoʊ n",
526
+ "sˈɛ d",
527
+ "b iː",
528
+ "ˈɛ d",
529
+ "ˈaɪ t",
530
+ "b aɪ",
531
+ "fɹ ʌm",
532
+ "ɪ s",
533
+ "ɚ z",
534
+ "ðɪ s",
535
+ "ən s",
536
+ "b əl",
537
+ "ɪ f",
538
+ "ɪn ðə",
539
+ "ə m",
540
+ "ᵻ z",
541
+ "ˌ uː",
542
+ "w ˈeɪ",
543
+ "f t",
544
+ "w iː",
545
+ "st ɹ",
546
+ "l ˈiː",
547
+ "iː z",
548
+ "p t",
549
+ "j ʊ",
550
+ "ɚ d",
551
+ "ˌ aɪ",
552
+ "k w",
553
+ "ˌ ɔn",
554
+ "ˈaɪ d",
555
+ "ɪ m",
556
+ "ˈʌ st",
557
+ "ˈoʊ ld",
558
+ "t s",
559
+ "ˌɪ tʃ",
560
+ "s ˌoʊ",
561
+ "d ˈɪ",
562
+ "ɑː ɹ",
563
+ "h ɐ",
564
+ "s ˈeɪ",
565
+ "ɾ ᵻd",
566
+ "w ˌɪt��"
567
+ ]
568
+ }
569
+ }