cisco-ai
/

mini-bart-g2p

Text2Text Generation

Grapheme-to-Phoneme

Inference Endpoints

Model card Files Files and versions Community

mini-bart-g2p / tokenizer.json

vrdn23's picture

Upload tokenizer.json (#2)

8e38e44 over 1 year ago

3.21 kB

	{
	"version": "1.0",
	"truncation": {
	"direction": "Right",
	"max_length": 128,
	"strategy": "LongestFirst",
	"stride": 0
	},
	"padding": null,
	"added_tokens": [
	{
	"id": 0,
	"content": "<s>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 1,
	"content": "<pad>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 2,
	"content": "</s>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 3,
	"content": "<unk>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 4,
	"content": "<mask>",
	"single_word": false,
	"lstrip": true,
	"rstrip": false,
	"normalized": false,
	"special": true
	}
	],
	"normalizer": {
	"type": "Lowercase"
	},
	"pre_tokenizer": {
	"type": "Split",
	"pattern": {
	"String": ""
	},
	"behavior": "Removed",
	"invert": false
	},
	"post_processor": {
	"type": "RobertaProcessing",
	"sep": [
	"</s>",
	2
	],
	"cls": [
	"<s>",
	0
	],
	"trim_offsets": true,
	"add_prefix_space": false
	},
	"decoder": null,
	"model": {
	"type": "WordLevel",
	"vocab": {
	"<s>": 0,
	"<pad>": 1,
	"</s>": 2,
	"<unk>": 3,
	"<mask>": 4,
	"e": 5,
	"a": 6,
	"s": 7,
	"i": 8,
	"r": 9,
	"n": 10,
	"AH0": 11,
	"o": 12,
	"N": 13,
	"t": 14,
	"l": 15,
	"S": 16,
	"L": 17,
	"T": 18,
	"R": 19,
	"K": 20,
	"c": 21,
	"d": 22,
	"D": 23,
	"u": 24,
	"IH0": 25,
	"m": 26,
	"M": 27,
	"Z": 28,
	"h": 29,
	"g": 30,
	"p": 31,
	"ER0": 32,
	"IY0": 33,
	"b": 34,
	"B": 35,
	"P": 36,
	"EH1": 37,
	"AE1": 38,
	"AA1": 39,
	"y": 40,
	"k": 41,
	"IH1": 42,
	"F": 43,
	"f": 44,
	"G": 45,
	"w": 46,
	"V": 47,
	"v": 48,
	"NG": 49,
	"'": 50,
	"IY1": 51,
	"EY1": 52,
	"HH": 53,
	"W": 54,
	"SH": 55,
	"OW1": 56,
	"AO1": 57,
	"OW0": 58,
	"AH1": 59,
	"UW1": 60,
	"AY1": 61,
	"JH": 62,
	"z": 63,
	"CH": 64,
	"Y": 65,
	"AA0": 66,
	"ER1": 67,
	"EH2": 68,
	"IH2": 69,
	"TH": 70,
	"AY2": 71,
	"AE2": 72,
	"EY2": 73,
	"AA2": 74,
	"EH0": 75,
	"j": 76,
	"AW1": 77,
	"OW2": 78,
	"x": 79,
	"IY2": 80,
	"UW0": 81,
	"AO2": 82,
	"UH1": 83,
	"AE0": 84,
	"q": 85,
	"AO0": 86,
	"AH2": 87,
	"UW2": 88,
	"AY0": 89,
	"OY1": 90,
	"-": 91,
	"EY0": 92,
	"DH": 93,
	"AW2": 94,
	"ER2": 95,
	"ZH": 96,
	"UH2": 97,
	"AW0": 98,
	"UH0": 99,
	"OY2": 100,
	"OY0": 101,
	".": 102
	},
	"unk_token": "<unk>"
	}
	}