ndeclarke
/

wav2vec2-mms-1b-CV17.0-training_set_variations

@@ -1,20 +1,20 @@
 ---
-library_name: transformers
-license: cc-by-nc-4.0
 base_model: facebook/mms-1b-all
-tags:
-- generated_from_trainer
 datasets:
 - common_voice_17_0
 metrics:
 - wer
 - bleu
 model-index:
 - name: wav2vec2-mms-1b-CV17.0-training_set_variations
   results:
   - task:
-      name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
@@ -22,12 +22,12 @@ model-index:
       split: validation
       args: ta
     metrics:
-    - name: Wer
-      type: wer
       value: 0.9963132675846669
-    - name: Bleu
-      type: bleu
       value: 0.0
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

 ---
 base_model: facebook/mms-1b-all
 datasets:
 - common_voice_17_0
+library_name: transformers
+license: cc-by-nc-4.0
 metrics:
 - wer
 - bleu
+tags:
+- generated_from_trainer
 model-index:
 - name: wav2vec2-mms-1b-CV17.0-training_set_variations
   results:
   - task:
       type: automatic-speech-recognition
+      name: Automatic Speech Recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
       split: validation
       args: ta
     metrics:
+    - type: wer
       value: 0.9963132675846669
+      name: Wer
+    - type: bleu
       value: 0.0
+      name: Bleu
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

tokenizer_config.json CHANGED Viewed

@@ -39,9 +39,8 @@
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "processor_class": "Wav2Vec2Processor",
   "replace_word_delimiter_char": " ",
-  "target_lang": "tam-512",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "replace_word_delimiter_char": " ",
+  "target_lang": "tam-2048",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

vocab.json CHANGED Viewed

@@ -55,6 +55,62 @@
     "ௗ": 50,
     "ഥ": 51
   },
   "tam-32": {
     "&": 1,
     "[PAD]": 53,

     "ௗ": 50,
     "ഥ": 51
   },
+  "tam-2048": {
+    "&": 1,
+    "[PAD]": 53,
+    "[UNK]": 52,
+    "_": 2,
+    "|": 0,
+    "ஃ": 3,
+    "அ": 4,
+    "ஆ": 5,
+    "இ": 6,
+    "ஈ": 7,
+    "உ": 8,
+    "ஊ": 9,
+    "எ": 10,
+    "ஏ": 11,
+    "ஐ": 12,
+    "ஒ": 13,
+    "ஓ": 14,
+    "ஔ": 15,
+    "க": 16,
+    "ங": 17,
+    "ச": 18,
+    "ஜ": 19,
+    "ஞ": 20,
+    "ட": 21,
+    "ண": 22,
+    "த": 23,
+    "ந": 24,
+    "ன": 25,
+    "ப": 26,
+    "ம": 27,
+    "ய": 28,
+    "ர": 29,
+    "ற": 30,
+    "ல": 31,
+    "ள": 32,
+    "ழ": 33,
+    "வ": 34,
+    "ஷ": 35,
+    "ஸ": 36,
+    "ஹ": 37,
+    "ா": 38,
+    "ி": 39,
+    "ீ": 40,
+    "ு": 41,
+    "ூ": 42,
+    "ெ": 43,
+    "ே": 44,
+    "ை": 45,
+    "ொ": 46,
+    "ோ": 47,
+    "ௌ": 48,
+    "்": 49,
+    "ௗ": 50,
+    "ഥ": 51
+  },
   "tam-32": {
     "&": 1,
     "[PAD]": 53,