Spaces:

Pendrokar
/

xVASynth-TTS

Running on CPU Upgrade

App Files Files Community

Pendrokar commited on Feb 28

Commit

7f54c68

•

1 Parent(s): 7decaa4

requirements: git h2parser

Browse files

Files changed (29) hide show

requirements.txt +1 -0
resources/app/python/xvapitch/text/h2p_parser/__init__.py +0 -22
resources/app/python/xvapitch/text/h2p_parser/__main__.py +0 -185
resources/app/python/xvapitch/text/h2p_parser/cmudictext.py +0 -253
resources/app/python/xvapitch/text/h2p_parser/compat/__init__.py +0 -7
resources/app/python/xvapitch/text/h2p_parser/compat/cmudict.py +0 -19
resources/app/python/xvapitch/text/h2p_parser/data/__init__.py +0 -0
resources/app/python/xvapitch/text/h2p_parser/data/cmudict-0.7b.txt +0 -0
resources/app/python/xvapitch/text/h2p_parser/data/cmudict.dict +0 -0
resources/app/python/xvapitch/text/h2p_parser/data/dict.json +0 -1500
resources/app/python/xvapitch/text/h2p_parser/data/example.json +0 -16
resources/app/python/xvapitch/text/h2p_parser/dict_reader.py +0 -109
resources/app/python/xvapitch/text/h2p_parser/dictionary.py +0 -85
resources/app/python/xvapitch/text/h2p_parser/filter.py +0 -34
resources/app/python/xvapitch/text/h2p_parser/format_ph.py +0 -99
resources/app/python/xvapitch/text/h2p_parser/h2p.py +0 -123
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/PKG-INFO +0 -14
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/SOURCES.txt +0 -19
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/dependency_links.txt +0 -1
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/requires.txt +0 -2
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/top_level.txt +0 -1
resources/app/python/xvapitch/text/h2p_parser/pos_parser.py +0 -17
resources/app/python/xvapitch/text/h2p_parser/processors.py +0 -392
resources/app/python/xvapitch/text/h2p_parser/symbols.py +0 -82
resources/app/python/xvapitch/text/h2p_parser/text/__init__.py +0 -0
resources/app/python/xvapitch/text/h2p_parser/text/numbers.py +0 -166
resources/app/python/xvapitch/text/h2p_parser/utils/__init__.py +0 -0
resources/app/python/xvapitch/text/h2p_parser/utils/converter.py +0 -79
resources/app/python/xvapitch/text/h2p_parser/utils/parser.py +0 -133

requirements.txt CHANGED Viewed

@@ -73,3 +73,4 @@ webrtcvad==2.0.10
 wheel==0.36.2
 wrapt==1.14.1
 zipp==3.4.0

 wheel==0.36.2
 wrapt==1.14.1
 zipp==3.4.0
+git+https://github.com/ionite34/h2p-parser

resources/app/python/xvapitch/text/h2p_parser/__init__.py DELETED Viewed

@@ -1,22 +0,0 @@
-"""
-h2p_parser
-Heteronym to Phoneme Parser
-"""
-import sys
-if sys.version_info < (3, 9):
-    # In Python versions below 3.9, this is needed
-    from importlib_resources import files
-else:
-    # Since python 3.9+, importlib.resources.files is built-in
-    from importlib.resources import files
-__version__ = "1.0.0"
-# Data module
-DATA_PATH = files(__name__ + '.data')
-# Iterable collection of all files in data.
-DATA_FILES = DATA_PATH.iterdir()

resources/app/python/xvapitch/text/h2p_parser/__main__.py DELETED Viewed

@@ -1,185 +0,0 @@
-from collections import Counter
-from InquirerPy import inquirer
-from InquirerPy.utils import patched_print, color_print
-from InquirerPy.base.control import Choice
-from InquirerPy.validator import PathValidator
-from h2p_parser.utils import converter
-from h2p_parser.utils import parser
-def convert_h2p(input_file, output_file, delimiter):
-    """
-    Converts a h2p dictionary file from one format to another.
-    """
-    converter.bin_delim_to_json(input_file, output_file, delimiter)
-    print('Converted h2p_dict to json.')
-def prompt_action() -> str:
-    action = inquirer.select(
-        message='Select action:',
-        choices=[
-            "Convert",
-            "Parse",
-            Choice(value=None, name='Exit')
-        ],
-        default=0,
-    ).execute()
-    if not action:
-        exit(0)
-    return action
-def prompt_f_input():
-    """
-    Prompts for input file.
-    """
-    return inquirer.filepath(
-        message='Select input file:',
-        validate=PathValidator(is_file=True, message='Input must be a file.')
-    ).execute()
-def prompt_f_output():
-    """
-    Prompts for output file.
-    """
-    return inquirer.filepath(
-        message='Select output file:',
-        validate=PathValidator(is_file=True, message='Output must be a file.')
-    ).execute()
-def action_convert():
-    """
-    Converts a h2p dictionary file from one format to another.
-    """
-    # Select input file
-    input_file = prompt_f_input()
-    if not input_file:
-        return
-    # Select output file
-    output_file = prompt_f_output()
-    if not output_file:
-        return
-    # Ask for delimiter
-    delimiter = inquirer.text(
-        message='Enter delimiter:',
-        default='|'
-    ).execute()
-    if not delimiter:
-        return
-    # Run Process
-    convert_h2p(input_file, output_file, delimiter)
-def action_parse_file():
-    """
-    Parses a metadata.csv file and checks for dictionary coverage
-    :return:
-    """
-    # Select input file
-    input_file = prompt_f_input()
-    if not input_file:
-        return
-    # Ask for delimiter
-    delimiter = inquirer.text(
-        message='Enter delimiter:',
-        default='|'
-    ).execute()
-    if not delimiter:
-        return
-    # Run Process
-    result = parser.check_lines(parser.read_file(input_file, delimiter))
-    # Print results
-    color_print([("#e5c07b", "Unresolved Words")])
-    color_print([("#d21205", "[All]: "),
-                 ("#ffffff", f"{len(result.unres_all_words)}/{len(result.all_words)}")])
-    color_print([("#7e3b41", "[Unique]: "),
-                 ("#ffffff", f"{len(result.unres_words)}/{len(result.words)}")])
-    color_print([("#4ce5c8", "-" * 10)])
-    color_print([("#e5c07b", "Unresolved Lines")])
-    color_print([("#d21205", "[All]: "),
-                 ("#ffffff", f"{len(result.unres_all_lines)}/{len(result.all_lines)}")])
-    color_print([("#7e3b41", "[Unique]: "),
-                 ("#ffffff", f"{len(result.unres_lines)}/{len(result.lines)}")])
-    color_print([("#4ce5c8", "-" * 10)])
-    color_print([("#e5c07b", "Expected Coverage")])
-    color_print([("#d21205", "[Lines]: "),
-                 ("#ffffff", f"{result.line_coverage()}%")])
-    color_print([("#7e3b41", "[Words]: "),
-                 ("#ffffff", f"{result.word_coverage()}%")])
-    color_print([("#4ce5c8", "-" * 10)])
-    color_print([("#e5c07b", "H2p parser")])
-    color_print([("#d21205", "[Lines with Heteronyms]: "),
-                 ("#ffffff", f"{len(result.all_lines_cont_het)}/{len(result.all_lines)}"
-                             f" | {result.percent_line_het()}%")])
-    color_print([("#7e3b41", "[Words Resolved by H2p]: "),
-                 ("#ffffff", f"{result.n_words_het}/{result.n_words_res}"
-                             f" | {result.percent_word_h2p()}%")])
-    # Calcs
-    feature_res = result.n_words_fet
-    feature_percent = round(feature_res / result.n_words_res * 100, 2)
-    cmu_res = result.n_words_cmu
-    cmu_percent = round(cmu_res / result.n_words_res * 100, 2)
-    color_print([("#c8bd20", "[Transformed Resolves]: "),
-                 ("#ffffff", f"{feature_res}/{result.n_words_res}"
-                             f" | {feature_percent}%")])
-    color_print([("#25a0c8", "[Words in CMUDict]: "),
-                 ("#ffffff", f"{cmu_res}/{result.n_words_res}"
-                             f" | {cmu_percent}%")])
-    color_print([("#4ce5c8", "-" * 10)])
-    color_print([("#e5c07b", "Feature Usage")])
-    # Loop through feature results
-    for ft in result.ft_stats:
-        color_print([("#d21205", f"{ft}: "),
-                     ("#ffffff", f"{result.ft_stats[ft]}/{result.n_words_res}"
-                                 f" | {round(result.ft_stats[ft]/result.n_words_res*100, 2)}%")])
-    color_print([("#4ce5c8", "-" * 10)])
-    # Print 100 sampled unresolved words by frequency
-    color_print([("#e5c07b", "Top 100 most frequent unresolved words")])
-    # Count frequency of words
-    word_freq = Counter(result.unres_all_words)
-    # Sort by frequency
-    word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
-    # Print top 100
-    for word, freq in word_freq[:100]:
-        color_print([("#d21205", f"{word}: "),
-                     ("#ffffff", f"{freq}")])
-def entry():
-    """
-    Prints help information.
-    """
-    # Select action type
-    action = prompt_action()
-    if action == 'Convert':
-        action_convert()
-    elif action == 'Parse':
-        action_parse_file()
-if __name__ == "__main__":
-    entry()

resources/app/python/xvapitch/text/h2p_parser/cmudictext.py DELETED Viewed

@@ -1,253 +0,0 @@
-# Extended Grapheme to Phoneme conversion using CMU Dictionary and Heteronym parsing.
-from __future__ import annotations
-import re
-from typing import Optional
-import pywordsegment
-import nltk
-from nltk.stem import WordNetLemmatizer
-from nltk.stem.snowball import SnowballStemmer
-from .h2p import H2p
-from .h2p import replace_first
-from . import format_ph as ph
-from .dict_reader import DictReader
-from .text.numbers import normalize_numbers
-from .filter import filter_text
-from .processors import Processor
-from copy import deepcopy
-re_digit = re.compile(r"\((\d+)\)")
-re_bracket_with_digit = re.compile(r"\(.*\)")
-# Check that the nltk data is downloaded, if not, download it
-try:
-    nltk.data.find('corpora/wordnet.zip')
-    nltk.data.find('corpora/omw-1.4.zip')
-except LookupError:
-    nltk.download('wordnet')
-    nltk.download('omw-1.4')
-class CMUDictExt:
-    def __init__(self, cmu_dict_path: str = None, h2p_dict_path: str = None, cmu_multi_mode: int = 0,
-                 process_numbers: bool = True, phoneme_brackets: bool = True, unresolved_mode: str = 'keep'):
-        # noinspection GrazieInspection
-        """
-        Initialize CMUDictExt - Extended Grapheme to Phoneme conversion using CMU Dictionary with Heteronym parsing.
-        CMU multi-entry resolution modes:
-            - -2 : Raw entry (i.e. 'A' resolves to 'AH0' and 'A(1)' to 'EY1')
-            - -1 : Skip resolving any entry with multiple pronunciations.
-            - 0 : Resolve using default un-numbered pronunciation.
-            - 1 : Resolve using (1) numbered pronunciation.
-            - n : Resolve using (n) numbered pronunciation.
-            - If a higher number is specified than available for the word, the highest available number is used.
-        Unresolved word resolution modes:
-            - keep : Keep the text-form word in the output.
-            - remove : Remove the text-form word from the output.
-            - drop : Return the line as None if any word is unresolved.
-        :param cmu_dict_path: Path to CMU dictionary file (.txt)
-        :type: str
-        :param h2p_dict_path: Path to Custom H2p dictionary (.json)
-        :type: str
-        :param cmu_multi_mode: CMU resolution mode for entries with multiple pronunciations.
-        :type: int
-        """
-        # Check valid unresolved_mode argument
-        if unresolved_mode not in ['keep', 'remove', 'drop']:
-            raise ValueError('Invalid value for unresolved_mode: {}'.format(unresolved_mode))
-        self.unresolved_mode = unresolved_mode
-        self.cmu_dict_path = cmu_dict_path  # Path to CMU dictionary file (.txt), if None, uses built-in
-        self.h2p_dict_path = h2p_dict_path  # Path to Custom H2p dictionary (.json), if None, uses built-in
-        self.cmu_multi_mode = cmu_multi_mode  # CMU multi-entry resolution mode
-        self.process_numbers = process_numbers  # Normalize numbers to text form, if enabled
-        self.phoneme_brackets = phoneme_brackets  # If True, phonemes are wrapped in curly brackets.
-        self.dict = DictReader(self.cmu_dict_path).dict  # CMU Dictionary
-        self.h2p = H2p(self.h2p_dict_path, preload=True)  # H2p parser
-        self.lemmatize = WordNetLemmatizer().lemmatize  # WordNet Lemmatizer - used to find singular form
-        self.stem = SnowballStemmer('english').stem  # Snowball Stemmer - used to find stem root of words
-        self.segment = pywordsegment.WordSegmenter().segment  # Word Segmenter
-        self.p = Processor(self)  # Processor for processing text
-        # Features
-        # Auto pluralization and de-pluralization
-        self.ft_auto_plural = True
-        # Auto splits and infers possessive forms of original words
-        self.ft_auto_pos = True
-        # Auto splits 'll
-        self.ft_auto_ll = True
-        # Auto splits and infers hyphenated words
-        self.ft_auto_hyphenated = True
-        # Auto splits possible compound words
-        self.ft_auto_compound = True
-        # Analyzes word root stem and infers pronunciation separately
-        # i.e. 'generously' -> 'generous' + 'ly'
-        self.ft_stem = True
-        # Forces compound words using manual lookup
-        self.ft_auto_compound_l2 = True
-    def lookup(self, text: str, pos: str = None, ph_format: str = 'sds') -> str | list | None:
-        # noinspection GrazieInspection
-        """
-        Gets the CMU Dictionary entry for a word.
-        Options for ph_format:
-        - 'sds' space delimited string
-        - 'sds_b' space delimited string with curly brackets
-        - 'list' list of phoneme strings
-        :param pos: Part of speech tag (Optional)
-        :param ph_format: Format of the phonemes to return:
-        :type: str
-        :param text: Word to lookup
-        :type: str
-        """
-        def format_as(in_phoneme):
-            if ph_format == 'sds':
-                output = ph.to_sds(in_phoneme)
-            elif ph_format == 'sds_b':
-                output = ph.with_cb(ph.to_sds(in_phoneme))
-            elif ph_format == 'list':
-                output = ph.to_list(in_phoneme)
-            else:
-                raise ValueError('Invalid value for ph_format: {}'.format(ph_format))
-            return output
-        # Get the CMU Dictionary entry for the word
-        word = text.lower()
-        entry = deepcopy(self.dict.get(word))  # Ensure safe copy of entry
-        # Has entry, return it directly
-        if entry is not None:
-            return format_as(entry)
-        # Auto Possessive Processor
-        if self.ft_auto_pos:
-            res = self.p.auto_possessives(word)
-            if res is not None:
-                return format_as(res)
-        # Auto Contractions for "ll" or "d"
-        if self.ft_auto_ll:
-            res = self.p.auto_contractions(word)
-            if res is not None:
-                return format_as(res)
-        # Check for hyphenated words
-        if self.ft_auto_hyphenated:
-            res = self.p.auto_hyphenated(word)
-            if res is not None:
-                return format_as(res)
-        # Check for compound words
-        if self.ft_auto_compound:
-            res = self.p.auto_compound(word)
-            if res is not None:
-                return format_as(res)
-        # No entry, detect if this is a multi-word entry
-        if '(' in word and ')' in word and any(char.isdigit() for char in word):
-            # Parse the integer from the word using regex
-            num = int(re.findall(re_digit, word)[0])
-            # If found
-            if num is not None:
-                # Remove the integer and bracket from the word
-                actual_word = re.sub(re_bracket_with_digit, "", word)
-                # See if this is a valid entry
-                result = deepcopy(self.dict.get(actual_word))  # Ensure safe copy of entry
-                # If found:
-                if result is not None:
-                    # Translate the integer to index
-                    index = min(num - 1, 0)
-                    # Check if index is less than the number of pronunciations
-                    if index < len(result):
-                        # Return the entry using the provided num index
-                        return format_as(result[index])
-                    # If entry is higher
-                    else:
-                        # Return the highest available entry
-                        return format_as(result[-1])
-        # Auto de-pluralization
-        # This is placed near the end because we need to do a pos-tag process
-        if self.ft_auto_plural:
-            res = self.p.auto_plural(word, pos)
-            if res is not None:
-                return format_as(res)
-        # Stem check
-        # noinspection SpellCheckingInspection
-        """
-        Supported modes for words ending in:
-        "ing", "ingly", "ly"
-        """
-        if self.ft_stem:
-            res = self.p.auto_stem(word)
-            if res is not None:
-                return format_as(res)
-        # Force compounding
-        if self.ft_auto_compound_l2:
-            res = self.p.auto_compound_l2(word)
-            if res is not None:
-                return format_as(res)
-        # If not found
-        return None
-    def convert(self, text: str) -> str | None:
-        # noinspection GrazieInspection
-        """
-        Replace a grapheme text line with phonemes.
-        :param text: Text line to be converted
-        :type: str
-        """
-        # Check valid unresolved_mode argument
-        if self.unresolved_mode not in ['keep', 'remove', 'drop']:
-            raise ValueError('Invalid value for unresolved_mode: {}'.format(self.unresolved_mode))
-        ur_mode = self.unresolved_mode
-        # Normalize numbers, if enabled
-        if self.process_numbers:
-            text = normalize_numbers(text)
-        # Filter and Tokenize
-        f_text = filter_text(text, preserve_case=True)
-        words = self.h2p.tokenize(f_text)
-        # Run POS tagging
-        tags = self.h2p.get_tags(words)
-        # Loop through words and pos tags
-        for word, pos in tags:
-            # Skip punctuation
-            if word == '.':
-                continue
-            # If word not in h2p dict, check CMU dict
-            if not self.h2p.dict.contains(word):
-                entry = self.lookup(word, pos)
-                if entry is None:
-                    if ur_mode == 'drop':
-                        return None
-                    if ur_mode == 'remove':
-                        text = replace_first(word, '', text)
-                    continue
-                # Do replace
-                f_ph = ph.with_cb(ph.to_sds(entry))
-                text = replace_first(word, f_ph, text)
-                continue
-            # For word in h2p dict, get phonemes
-            phonemes = self.h2p.dict.get_phoneme(word, pos)
-            # Format phonemes
-            f_ph = ph.with_cb(ph.to_sds(phonemes))
-            # Replace word with phonemes
-            text = replace_first(word, f_ph, text)
-        # Return text
-        return text

resources/app/python/xvapitch/text/h2p_parser/compat/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-"""
-Compatibility module.
-This module contains compatibility wrappers for existing
-implementations of CMUDict and other dictionaries.
-"""

resources/app/python/xvapitch/text/h2p_parser/compat/cmudict.py DELETED Viewed

@@ -1,19 +0,0 @@
-# Compatibility layer for using CMUDictExt with CMUDict-like API calls.
-# Designed to be compatible with the implementation of CMUDict in:
-# https://github.com/NVIDIA/DeepLearningExamples/
-#
-# Example usage:
-#   from h2p_parser.compat.cmudict import CMUDict
-from h2p_parser.cmudictext import CMUDictExt
-class CMUDict(CMUDictExt):
-    def __init__(self, file_or_path=None, heteronyms_path=None, keep_ambiguous=True):
-        # Parameter Mapping:
-        # file_or_path => Mapped to cmu_dict_path
-        # heteronyms_path => Dropped as CMUDictExt uses H2p for heteronym parsing.
-        # keep_ambiguous => Mapped to cmu_multi_mode | True => -2, False => -1
-        super().__init__(file_or_path, heteronyms_path)
-        self._entries = {}
-        self.heteronyms = []

resources/app/python/xvapitch/text/h2p_parser/data/__init__.py DELETED Viewed

File without changes

resources/app/python/xvapitch/text/h2p_parser/data/cmudict-0.7b.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

resources/app/python/xvapitch/text/h2p_parser/data/cmudict.dict DELETED Viewed

The diff for this file is too large to render. See raw diff

resources/app/python/xvapitch/text/h2p_parser/data/dict.json DELETED Viewed

@@ -1,1500 +0,0 @@
-{
-    "absent": {
-        "DEFAULT": "AE1 B S AH0 N T",
-        "VERB": "AH1 B S AE1 N T"
-    },
-    "abstract": {
-        "DEFAULT": "AE1 B S T R AE2 K T",
-        "VERB": "AE0 B S T R AE1 K T"
-    },
-    "abstracts": {
-        "DEFAULT": "AE1 B S T R AE0 K T S",
-        "VERB": "AE0 B S T R AE1 K T S"
-    },
-    "abuse": {
-        "DEFAULT": "AH0 B Y UW1 S",
-        "VERB": "AH0 B Y UW1 Z"
-    },
-    "abuses": {
-        "DEFAULT": "AH0 B Y UW1 S IH0 Z",
-        "VERB": "AH0 B Y UW1 Z IH0 Z"
-    },
-    "accent": {
-        "DEFAULT": "AE1 K S EH2 N T",
-        "VERB": "AH0 K S EH1 N T"
-    },
-    "accents": {
-        "DEFAULT": "AE1 K S EH0 N T S",
-        "VERB": "AE1 K S EH0 N T S"
-    },
-    "addict": {
-        "DEFAULT": "AE1 D IH2 K T",
-        "VERB": "AH0 D IH1 K T"
-    },
-    "addicts": {
-        "DEFAULT": "AE1 D IH2 K T S",
-        "VERB": "AH0 D IH1 K T S"
-    },
-    "advocate": {
-        "DEFAULT": "AE1 D V AH0 K AH0 T",
-        "VERB": "AE1 D V AH0 K EY2 T"
-    },
-    "advocates": {
-        "DEFAULT": "AE1 D V AH0 K AH0 T S",
-        "VERB": "AE1 D V AH0 K EY2 T S"
-    },
-    "affect": {
-        "DEFAULT": "AE1 F EH0 K T",
-        "VERB": "AH0 F EH1 K T"
-    },
-    "affects": {
-        "DEFAULT": "AE1 F EH0 K T S",
-        "VERB": "AH0 F EH1 K T S"
-    },
-    "affix": {
-        "DEFAULT": "AE1 F IH0 K S",
-        "VERB": "AH0 F IH1 K S"
-    },
-    "affixes": {
-        "DEFAULT": "AE1 F IH0 K S IH0 Z",
-        "VERB": "AH0 F IH1 K S IH0 Z"
-    },
-    "agglomerate": {
-        "DEFAULT": "AH0 G L AA1 M ER0 AH0 T",
-        "VERB": "AH0 G L AA1 M ER0 EY2 T"
-    },
-    "aggregate": {
-        "DEFAULT": "AE1 G R AH0 G AH0 T",
-        "VERB": "AE1 G R AH0 G EY0 T"
-    },
-    "aggregates": {
-        "DEFAULT": "AE1 G R AH0 G IH0 T S",
-        "VERB": "AE1 G R AH0 G EY2 T S"
-    },
-    "allies": {
-        "DEFAULT": "AE1 L AY0 Z",
-        "VERB": "AH0 L AY1 Z"
-    },
-    "alloy": {
-        "DEFAULT": "AE1 L OY2",
-        "VERB": "AH0 L OY1"
-    },
-    "alloys": {
-        "DEFAULT": "AE1 L OY2 Z",
-        "VERB": "AH0 L OY1 Z"
-    },
-    "ally": {
-        "DEFAULT": "AE1 L AY0",
-        "VERB": "AH0 L AY1"
-    },
-    "alternate": {
-        "DEFAULT": "AO0 L T ER1 N AH0 T",
-        "VERB": "AO1 L T ER0 N EY2 T"
-    },
-    "analyses": {
-        "DEFAULT": "AE1 N AH0 L AY0 Z IH2 Z",
-        "VERB": "AH0 N AE1 L IH0 S IY2 Z"
-    },
-    "animate": {
-        "DEFAULT": "AE1 N AH0 M AH0 T",
-        "VERB": "AE1 N AH0 M EY2 T"
-    },
-    "annex": {
-        "DEFAULT": "AE1 N EH2 K S",
-        "VERB": "AH0 N EH1 K S"
-    },
-    "annexes": {
-        "DEFAULT": "AE1 N EH2 K S IH0 Z",
-        "VERB": "AH0 N EH1 K S IH0 Z"
-    },
-    "appropriate": {
-        "DEFAULT": "AH0 P R OW1 P R IY0 AH0 T",
-        "VERB": "AH0 P R OW1 P R IY0 EY2 T"
-    },
-    "approximate": {
-        "DEFAULT": "AH0 P R AA1 K S AH0 M AH0 T",
-        "VERB": "AH0 P R AA1 K S AH0 M EY2 T"
-    },
-    "articulate": {
-        "DEFAULT": "AA0 R T IH1 K Y AH0 L EY2 T",
-        "VERB": "AA0 R T IH1 K Y AH0 L AH0 T"
-    },
-    "aspirate": {
-        "DEFAULT": "AE1 S P ER0 AH0 T",
-        "VERB": "AE1 S P ER0 EY2 T"
-    },
-    "aspirates": {
-        "DEFAULT": "AE1 S P ER0 AH0 T S",
-        "VERB": "AE1 S P ER0 EY2 T S"
-    },
-    "associate": {
-        "DEFAULT": "AH0 S OW1 S IY0 AH0 T",
-        "VERB": "AH0 S OW1 S IY0 EY2 T"
-    },
-    "associates": {
-        "DEFAULT": "AH0 S OW1 S IY0 AH0 T S",
-        "VERB": "AH0 S OW1 S IY0 EY2 T S"
-    },
-    "attribute": {
-        "DEFAULT": "AE1 T R IH0 B Y UW0 T",
-        "VERB": "AH0 T R IH1 B Y UW2 T"
-    },
-    "attributes": {
-        "DEFAULT": "AE1 T R IH0 B Y UW0 T S",
-        "VERB": "AH0 T R IH1 B Y UW2 T S"
-    },
-    "baths": {
-        "DEFAULT": "B AE1 DH Z",
-        "VERB": "B AE1 TH S"
-    },
-    "blessed": {
-        "DEFAULT": "B L EH1 S T",
-        "VERB": "B L EH1 S IH0 D"
-    },
-    "certificate": {
-        "DEFAULT": "S ER0 T IH1 F IH0 K EY2 T",
-        "VERB": "S ER0 T IH1 F IH0 K AH0 T"
-    },
-    "certificates": {
-        "DEFAULT": "S ER0 T IH1 F IH0 K AH0 T S",
-        "VERB": "S ER0 T IH1 F IH0 K EY2 T S"
-    },
-    "close": {
-        "DEFAULT": "K L OW1 S",
-        "VERB": "K L OW1 Z"
-    },
-    "closer": {
-        "DEFAULT": "K L OW1 S ER0",
-        "NOUN": "K L OW1 Z ER0"
-    },
-    "closes": {
-        "DEFAULT": "K L OW1 S IH0 Z",
-        "VERB": "K L OW1 Z IH0 Z"
-    },
-    "collect": {
-        "DEFAULT": "K AA1 L EH0 K T",
-        "VERB": "K AH0 L EH1 K T"
-    },
-    "collects": {
-        "DEFAULT": "K AA1 L EH0 K T S",
-        "VERB": "K AH0 L EH1 K T S"
-    },
-    "combat": {
-        "DEFAULT": "K AA1 M B AE0 T",
-        "VERB": "K AH0 M B AE1 T"
-    },
-    "combats": {
-        "DEFAULT": "K AH1 M B AE0 T S",
-        "VERB": "K AH0 M B AE1 T S"
-    },
-    "combine": {
-        "DEFAULT": "K AA1 M B AY0 N",
-        "VERB": "K AH0 M B AY1 N"
-    },
-    "commune": {
-        "DEFAULT": "K AA1 M Y UW0 N",
-        "VERB": "K AH0 M Y UW1 N"
-    },
-    "communes": {
-        "DEFAULT": "K AA1 M Y UW0 N Z",
-        "VERB": "K AH0 M Y UW1 N Z"
-    },
-    "compact": {
-        "DEFAULT": "K AA1 M P AE0 K T",
-        "VERB": "K AH0 M P AE1 K T"
-    },
-    "compacts": {
-        "DEFAULT": "K AA1 M P AE0 K T S",
-        "VERB": "K AH0 M P AE1 K T S"
-    },
-    "complex": {
-        "ADJ": "K AH0 M P L EH1 K S",
-        "DEFAULT": " K AA1 M P L EH0 K S"
-    },
-    "compliment": {
-        "DEFAULT": "K AA1 M P L AH0 M AH0 N T",
-        "VERB": "K AA1 M P L AH0 M EH0 N T"
-    },
-    "compliments": {
-        "DEFAULT": "K AA1 M P L AH0 M AH0 N T S",
-        "VERB": "K AA1 M P L AH0 M EH0 N T S"
-    },
-    "compound": {
-        "DEFAULT": "K AA1 M P AW0 N D",
-        "VERB": "K AH0 M P AW1 N D"
-    },
-    "compounds": {
-        "DEFAULT": "K AA1 M P AW0 N D Z",
-        "VERB": "K AH0 M P AW1 N D Z"
-    },
-    "compress": {
-        "DEFAULT": "K AA1 M P R EH0 S",
-        "VERB": "K AH0 M P R EH1 S"
-    },
-    "compresses": {
-        "DEFAULT": "K AA1 M P R EH0 S AH0 Z",
-        "VERB": "K AH0 M P R EH1 S IH0 Z"
-    },
-    "concert": {
-        "DEFAULT": "K AA1 N S ER0 T",
-        "VERB": "K AH0 N S ER1 T"
-    },
-    "concerts": {
-        "DEFAULT": "K AA1 N S ER0 T S",
-        "VERB": "K AH0 N S ER1 T S"
-    },
-    "conduct": {
-        "DEFAULT": "K AA1 N D AH0 K T",
-        "VERB": "K AA0 N D AH1 K T"
-    },
-    "confederate": {
-        "DEFAULT": "K AH0 N F EH1 D ER0 AH0 T",
-        "VERB": "K AH0 N F EH1 D ER0 EY2 T"
-    },
-    "confederates": {
-        "DEFAULT": "K AH0 N F EH1 D ER0 AH0 T S",
-        "VERB": "K AH0 N F EH1 D ER0 EY2 T S"
-    },
-    "confines": {
-        "DEFAULT": "K AA1 N F AY2 N Z",
-        "VERB": "K AH0 N F AY1 N Z"
-    },
-    "conflict": {
-        "DEFAULT": "K AA1 N F L IH0 K T",
-        "VERB": "K AH0 N F L IH1 K T"
-    },
-    "conflicts": {
-        "DEFAULT": "K AA1 N F L IH0 K T S",
-        "VERB": "K AH0 N F L IH1 K T S"
-    },
-    "conglomerate": {
-        "DEFAULT": "K AH0 N G L AA1 M ER0 AH0 T",
-        "VERB": "K AH0 N G L AA1 M ER0 EY2 T"
-    },
-    "conglomerates": {
-        "DEFAULT": "K AH0 N G L AA1 M ER0 AH0 T S",
-        "VERB": "K AH0 N G L AA1 M ER0 EY2 T S"
-    },
-    "conscript": {
-        "DEFAULT": "K AA1 N S K R IH0 P T",
-        "VERB": "K AH0 N S K R IH1 P T"
-    },
-    "conscripts": {
-        "DEFAULT": "K AA1 N S K R IH0 P T S",
-        "VERB": "K AH0 N S K R IH1 P T S"
-    },
-    "console": {
-        "DEFAULT": "K AA1 N S OW0 L",
-        "VERB": "K AH0 N S OW1 L"
-    },
-    "consoles": {
-        "DEFAULT": "K AA1 N S OW0 L Z",
-        "VERB": "K AH0 N S OW1 L Z"
-    },
-    "consort": {
-        "DEFAULT": "K AA1 N S AO0 R T",
-        "VERB": "K AH0 N S AO1 R T"
-    },
-    "construct": {
-        "DEFAULT": "K AA1 N S T R AH0 K T",
-        "VERB": "K AH0 N S T R AH1 K T"
-    },
-    "constructs": {
-        "DEFAULT": "K AA1 N S T R AH0 K T S",
-        "VERB": "K AH0 N S T R AH1 K T S"
-    },
-    "consummate": {
-        "DEFAULT": "K AA0 N S AH1 M AH0 T",
-        "VERB": "K AA1 N S AH0 M EY2 T"
-    },
-    "content": {
-        "DEFAULT": "K AH0 N T EH1 N T",
-        "NOUN": "K AA1 N T EH0 N T"
-    },
-    "contents": {
-        "DEFAULT": "K AA1 N T EH0 N T S",
-        "VERB": "K AH0 N T EH1 N T S"
-    },
-    "contest": {
-        "DEFAULT": "K AA1 N T EH0 S T",
-        "VERB": "K AH0 N T EH1 S T"
-    },
-    "contests": {
-        "DEFAULT": "K AA1 N T EH0 S T S",
-        "VERB": "K AH0 N T EH1 S T S"
-    },
-    "contract": {
-        "DEFAULT": "K AA1 N T R AE2 K T",
-        "VERB": "K AH0 N T R AE1 K T"
-    },
-    "contracts": {
-        "DEFAULT": "K AA1 N T R AE2 K T S",
-        "VERB": "K AH0 N T R AE1 K T S"
-    },
-    "contrast": {
-        "DEFAULT": "K AA1 N T R AE0 S T",
-        "VERB": "K AH0 N T R AE1 S T"
-    },
-    "contrasts": {
-        "DEFAULT": "K AA1 N T R AE0 S T S",
-        "VERB": "K AH0 N T R AE1 S T S"
-    },
-    "converse": {
-        "DEFAULT": "K AA1 N V ER0 S",
-        "VERB": "K AH0 N V ER1 S"
-    },
-    "convert": {
-        "DEFAULT": "K AA1 N V ER0 T",
-        "VERB": "K AH0 N V ER1 T"
-    },
-    "converts": {
-        "DEFAULT": "K AA1 N V ER0 T S",
-        "VERB": "K AH0 N V ER1 T S"
-    },
-    "convict": {
-        "DEFAULT": "K AA1 N V IH0 K T",
-        "VERB": "K AH0 N V IH1 K T"
-    },
-    "convicts": {
-        "DEFAULT": "K AA1 N V IH0 K T S",
-        "VERB": "K AH0 N V IH1 K T S"
-    },
-    "coordinate": {
-        "DEFAULT": "K OW0 AO1 R D AH0 N AH0 T",
-        "VERB": "K OW0 AO1 R D AH0 N EY2 T"
-    },
-    "coordinates": {
-        "DEFAULT": "K OW0 AO1 R D AH0 N AH0 T S",
-        "VERB": "K OW0 AO1 R D AH0 N EY2 T S"
-    },
-    "counterbalance": {
-        "DEFAULT": "K AW2 N T ER0 B AE1 L AH0 N S",
-        "VERB": "K AW1 N T ER0 B AE2 L AH0 N S"
-    },
-    "counterbalances": {
-        "DEFAULT": "K AW1 N T ER0 B AE2 L AH0 N S IH0 Z",
-        "VERB": "K AW2 N T ER0 B AE1 L AH0 N S IH0 Z"
-    },
-    "crabbed": {
-        "DEFAULT": "K R AE1 B IH0 D",
-        "VERB": "K R AE1 B D"
-    },
-    "crooked": {
-        "DEFAULT": "K R UH1 K AH0 D",
-        "VERB": "K R UH1 K T"
-    },
-    "curate": {
-        "DEFAULT": "K Y UH1 R AH0 T",
-        "VERB": "K Y UH0 R AH1 T"
-    },
-    "cursed": {
-        "DEFAULT": "K ER1 S IH0 D",
-        "VERB": "K ER1 S T"
-    },
-    "decoy": {
-        "DEFAULT": "D IY1 K OY0",
-        "VERB": "D IY0 K OY1"
-    },
-    "decoys": {
-        "DEFAULT": "D IY1 K OY0 Z",
-        "VERB": "D IY0 K OY1 Z"
-    },
-    "decrease": {
-        "DEFAULT": "D IY1 K R IY2 S",
-        "VERB": "D IH0 K R IY1 S"
-    },
-    "decreases": {
-        "DEFAULT": "D IY1 K R IY2 S IH0 Z",
-        "VERB": "D IH0 K R IY1 S IH0 Z"
-    },
-    "defect": {
-        "DEFAULT": "D IY1 F EH0 K T",
-        "VERB": "D IH0 F EH1 K T"
-    },
-    "defects": {
-        "DEFAULT": "D IY1 F EH0 K T S",
-        "VERB": "D IH0 F EH1 K T S"
-    },
-    "degenerate": {
-        "DEFAULT": "D IH0 JH EH1 N ER0 AH0 T",
-        "VERB": "D IH0 JH EH1 N ER0 EY2 T"
-    },
-    "degenerates": {
-        "DEFAULT": "D IH0 JH EH1 N ER0 AH0 T S",
-        "VERB": "D IH0 JH EH1 N ER0 EY2 T S"
-    },
-    "delegate": {
-        "DEFAULT": "D EH1 L AH0 G AH0 T",
-        "VERB": "D EH1 L AH0 G EY2 T"
-    },
-    "delegates": {
-        "DEFAULT": "D EH1 L AH0 G AH0 T S",
-        "VERB": "D EH1 L AH0 G EY2 T S"
-    },
-    "deliberate": {
-        "DEFAULT": "D IH0 L IH1 B ER0 AH0 T",
-        "VERB": "D IH0 L IH1 B ER0 EY2 T"
-    },
-    "desert": {
-        "DEFAULT": "D EH1 Z ER0 T",
-        "VERB": "D IH0 Z ER1 T"
-    },
-    "deserts": {
-        "DEFAULT": "D EH1 Z ER0 T S",
-        "VERB": "D IH0 Z ER1 T S"
-    },
-    "desolate": {
-        "DEFAULT": "D EH1 S AH0 L AH0 T",
-        "VERB": "D EH1 S AH0 L EY2 T"
-    },
-    "diagnoses": {
-        "DEFAULT": "D AY2 AH0 G N OW1 S IY0 Z",
-        "VERB": "D AY1 AH0 G N OW2 Z IY0 Z"
-    },
-    "dictate": {
-        "DEFAULT": "D IH1 K T EY2 T",
-        "VERB": "D IH0 K T EY1 T"
-    },
-    "dictates": {
-        "DEFAULT": "D IH1 K T EY2 T S",
-        "VERB": "D IH0 K T EY1 T S"
-    },
-    "diffuse": {
-        "DEFAULT": "D IH0 F Y UW1 S",
-        "VERB": "D IH0 F Y UW1 Z"
-    },
-    "digest": {
-        "DEFAULT": "D AY1 JH EH0 S T",
-        "VERB": "D AY0 JH EH1 S T"
-    },
-    "digests": {
-        "DEFAULT": "D AY1 JH EH0 S T S",
-        "VERB": "D AY2 JH EH1 S T S"
-    },
-    "discard": {
-        "DEFAULT": "D IH1 S K AA0 R D",
-        "VERB": "D IH0 S K AA1 R D"
-    },
-    "discards": {
-        "DEFAULT": "D IH1 S K AA0 R D Z",
-        "VERB": "D IH0 S K AA1 R D Z"
-    },
-    "discharge": {
-        "DEFAULT": "D IH1 S CH AA2 R JH",
-        "VERB": "D IH0 S CH AA1 R JH"
-    },
-    "discharges": {
-        "DEFAULT": "D IH1 S CH AA2 R JH AH0 Z",
-        "VERB": "D IH0 S CH AA1 R JH AH0 Z"
-    },
-    "discount": {
-        "DEFAULT": "D IH1 S K AW0 N T",
-        "VERB": "D IH0 S K AW1 N T"
-    },
-    "discounts": {
-        "DEFAULT": "D IH1 S K AW2 N T S",
-        "VERB": "D IH0 S K AW1 N T S"
-    },
-    "discourse": {
-        "DEFAULT": "D IH1 S K AO0 R S",
-        "VERB": "D IH0 S K AO1 R S"
-    },
-    "discourses": {
-        "DEFAULT": "D IH1 S K AO0 R S IH0 Z",
-        "VERB": "D IH0 S K AO1 R S IH0 Z"
-    },
-    "document": {
-        "DEFAULT": "D AA1 K Y AH0 M AH0 N T",
-        "VERB": "D AA1 K Y UW0 M EH0 N T"
-    },
-    "documents": {
-        "DEFAULT": "D AA1 K Y AH0 M AH0 N T S",
-        "VERB": "D AA1 K Y UW0 M EH0 N T S"
-    },
-    "dogged": {
-        "DEFAULT": "D AO1 G D",
-        "VERB": "D AO1 G IH0 D"
-    },
-    "duplicate": {
-        "DEFAULT": "D UW1 P L AH0 K AH0 T",
-        "VERB": "D UW1 P L AH0 K EY2 T"
-    },
-    "duplicates": {
-        "DEFAULT": "D UW1 P L AH0 K AH0 T S",
-        "VERB": "D UW1 P L AH0 K EY2 T S"
-    },
-    "ejaculate": {
-        "DEFAULT": "IH0 JH AE1 K Y UW0 L AH0 T",
-        "VERB": "IH0 JH AE1 K Y UW0 L EY2 T"
-    },
-    "ejaculates": {
-        "DEFAULT": "IH0 JH AE1 K Y UW0 L AH0 T S",
-        "VERB": "IH0 JH AE1 K Y UW0 L EY2 T S"
-    },
-    "elaborate": {
-        "DEFAULT": "IH0 L AE1 B R AH0 T",
-        "VERB": "IH0 L AE1 B ER0 EY2 T"
-    },
-    "entrance": {
-        "DEFAULT": "EH1 N T R AH0 N S",
-        "VERB": "IH0 N T R AH1 N S"
-    },
-    "entrances": {
-        "DEFAULT": "EH1 N T R AH0 N S AH0 Z",
-        "VERB": "IH0 N T R AH1 N S AH0 Z"
-    },
-    "envelope": {
-        "DEFAULT": "EH1 N V AH0 L OW2 P",
-        "VERB": "IH0 N V EH1 L AH0 P"
-    },
-    "envelopes": {
-        "DEFAULT": "EH1 N V AH0 L OW2 P S",
-        "VERB": "IH0 N V EH1 L AH0 P S"
-    },
-    "escort": {
-        "DEFAULT": "EH1 S K AO0 R T",
-        "VERB": "EH0 S K AO1 R T"
-    },
-    "escorts": {
-        "DEFAULT": "EH1 S K AO0 R T S",
-        "VERB": "EH0 S K AO1 R T S"
-    },
-    "essay": {
-        "DEFAULT": "EH1 S EY2",
-        "VERB": "EH0 S EY1"
-    },
-    "essays": {
-        "DEFAULT": "EH1 S EY2 Z",
-        "VERB": "EH0 S EY1 Z"
-    },
-    "estimate": {
-        "DEFAULT": "EH1 S T AH0 M AH0 T",
-        "VERB": "EH1 S T AH0 M EY2 T"
-    },
-    "estimates": {
-        "DEFAULT": "EH1 S T AH0 M AH0 T S",
-        "VERB": "EH1 S T AH0 M EY2 T S"
-    },
-    "excess": {
-        "DEFAULT": "EH1 K S EH2 S",
-        "VERB": "IH0 K S EH1 S"
-    },
-    "excise": {
-        "DEFAULT": "EH1 K S AY0 Z",
-        "VERB": "EH0 K S AY1 S"
-    },
-    "excuse": {
-        "DEFAULT": "IH0 K S K Y UW1 S",
-        "VERB": "IH0 K S K Y UW1 Z"
-    },
-    "excuses": {
-        "DEFAULT": "IH0 K S K Y UW1 S IH0 Z",
-        "VERB": "IH0 K S K Y UW1 Z IH0 Z"
-    },
-    "expatriate": {
-        "DEFAULT": "EH0 K S P EY1 T R IY0 AH0 T",
-        "VERB": "EH0 K S P EY1 T R IY0 EY2 T"
-    },
-    "expatriates": {
-        "DEFAULT": "EH0 K S P EY1 T R IY0 AH0 T S",
-        "VERB": "EH0 K S P EY1 T R IY0 EY2 T S"
-    },
-    "exploit": {
-        "DEFAULT": "EH2 K S P L OY1 T",
-        "VERB": "EH1 K S P L OY2 T"
-    },
-    "exploits": {
-        "DEFAULT": "EH2 K S P L OY1 T S",
-        "VERB": "EH1 K S P L OY2 T S"
-    },
-    "export": {
-        "DEFAULT": "EH1 K S P AO0 R T",
-        "VERB": "IH0 K S P AO1 R T"
-    },
-    "exports": {
-        "DEFAULT": "EH1 K S P AO0 R T S",
-        "VERB": "IH0 K S P AO1 R T S"
-    },
-    "extract": {
-        "DEFAULT": "EH1 K S T R AE2 K T",
-        "VERB": "IH0 K S T R AE1 K T"
-    },
-    "extracts": {
-        "DEFAULT": "EH1 K S T R AE2 K T S",
-        "VERB": "IH0 K S T R AE1 K T S"
-    },
-    "ferment": {
-        "DEFAULT": "F ER1 M EH0 N T",
-        "VERB": "F ER0 M EH1 N T"
-    },
-    "ferments": {
-        "DEFAULT": "F ER1 M EH0 N T S",
-        "VERB": "F ER0 M EH1 N T S"
-    },
-    "fragment": {
-        "DEFAULT": "F R AE0 G M EH1 N T",
-        "VERB": "F R AE1 G M AH0 N T"
-    },
-    "fragments": {
-        "DEFAULT": "F R AE1 G M AH0 N T S",
-        "VERB": "F R AE0 G M EH1 N T S"
-    },
-    "frequent": {
-        "DEFAULT": "F R IY1 K W AH0 N T",
-        "VERB": "F R IY1 K W EH2 N T"
-    },
-    "graduate": {
-        "DEFAULT": "G R AE1 JH AH0 W AH0 T",
-        "VERB": "G R AE1 JH AH0 W EY2 T"
-    },
-    "graduates": {
-        "DEFAULT": "G R AE1 JH AH0 W AH0 T S",
-        "VERB": "G R AE1 JH AH0 W EY2 T S"
-    },
-    "house": {
-        "DEFAULT": "HH AW1 S",
-        "VERB": "HH AW1 Z"
-    },
-    "impact": {
-        "DEFAULT": "IH1 M P AE0 K T",
-        "VERB": "IH2 M P AE1 K T"
-    },
-    "impacts": {
-        "DEFAULT": "IH1 M P AE0 K T S",
-        "VERB": "IH2 M P AE1 K T S"
-    },
-    "implant": {
-        "DEFAULT": "IH1 M P L AE2 N T",
-        "VERB": "IH2 M P L AE1 N T"
-    },
-    "implants": {
-        "DEFAULT": "IH1 M P L AE2 N T S",
-        "VERB": "IH2 M P L AE1 N T S"
-    },
-    "implement": {
-        "DEFAULT": "IH1 M P L AH0 M AH0 N T",
-        "VERB": "IH1 M P L AH0 M EH0 N T"
-    },
-    "implements": {
-        "DEFAULT": "IH1 M P L AH0 M AH0 N T S",
-        "VERB": "IH1 M P L AH0 M EH0 N T S"
-    },
-    "import": {
-        "DEFAULT": "IH1 M P AO2 R T",
-        "VERB": "IH2 M P AO1 R T"
-    },
-    "imports": {
-        "DEFAULT": "IH1 M P AO2 R T S",
-        "VERB": "IH2 M P AO1 R T S"
-    },
-    "impress": {
-        "DEFAULT": "IH1 M P R EH0 S",
-        "VERB": "IH0 M P R EH1 S"
-    },
-    "imprint": {
-        "DEFAULT": "IH2 M P R IH1 N T",
-        "VERB": "IH1 M P R IH0 N T"
-    },
-    "imprints": {
-        "DEFAULT": "IH1 M P R IH0 N T S",
-        "VERB": "IH2 M P R IH1 N T S"
-    },
-    "incense": {
-        "DEFAULT": "IH1 N S EH2 N S",
-        "VERB": "IH2 N S EH1 N S"
-    },
-    "incline": {
-        "DEFAULT": "IH1 N K L AY0 N",
-        "VERB": "IH2 N K L AY1 N"
-    },
-    "inclines": {
-        "DEFAULT": "IH1 N K L AY0 N Z",
-        "VERB": "IH2 N K L AY1 N Z"
-    },
-    "incorporate": {
-        "DEFAULT": "IH2 N K AO1 R P ER0 AH0 T",
-        "VERB": "IH2 N K AO1 R P ER0 EY2 T"
-    },
-    "increase": {
-        "DEFAULT": "IH1 N K R IY2 S",
-        "VERB": "IH2 N K R IY1 S"
-    },
-    "increases": {
-        "DEFAULT": "IH1 N K R IY2 S IH0 Z",
-        "VERB": "IH2 N K R IY1 S IH0 Z"
-    },
-    "indent": {
-        "DEFAULT": "IH1 N D EH0 N T",
-        "VERB": "IH2 N D EH1 N T"
-    },
-    "indents": {
-        "DEFAULT": "IH1 N D EH0 N T S",
-        "VERB": "IH2 N D EH1 N T S"
-    },
-    "inebriate": {
-        "DEFAULT": "IH2 N EH1 B R IY0 AH0 T",
-        "VERB": "IH2 N EH1 B R IY0 EY2 T"
-    },
-    "inebriates": {
-        "DEFAULT": "IH2 N EH1 B R IY0 AH0 T S",
-        "VERB": "IH2 N EH1 B R IY0 EY2 T S"
-    },
-    "initiate": {
-        "DEFAULT": "IH2 N IH1 SH IY0 AH0 T",
-        "VERB": "IH2 N IH1 SH IY0 EY2 T"
-    },
-    "initiates": {
-        "DEFAULT": "IH2 N IH1 SH IY0 AH0 T S",
-        "VERB": "IH2 N IH1 SH IY0 EY2 T S"
-    },
-    "inlay": {
-        "DEFAULT": "IH1 N L EY2",
-        "VERB": "IH2 N L EY1"
-    },
-    "inlays": {
-        "DEFAULT": "IH1 N L EY2 Z",
-        "VERB": "IH2 N L EY1 Z"
-    },
-    "insert": {
-        "DEFAULT": "IH1 N S ER2 T",
-        "VERB": "IH2 N S ER1 T"
-    },
-    "inserts": {
-        "DEFAULT": "IH1 N S ER2 T S",
-        "VERB": "IH2 N S ER1 T S"
-    },
-    "inset": {
-        "DEFAULT": "IH1 N S EH2 T",
-        "VERB": "IH2 N S EH1 T"
-    },
-    "insets": {
-        "DEFAULT": "IH1 N S EH2 T S",
-        "VERB": "IH2 N S EH1 T S"
-    },
-    "instinct": {
-        "DEFAULT": "IH1 N S T IH0 NG K T",
-        "VERB": "IH2 N S T IH1 NG K T"
-    },
-    "insult": {
-        "DEFAULT": "IH1 N S AH2 L T",
-        "VERB": "IH2 N S AH1 L T"
-    },
-    "insults": {
-        "DEFAULT": "IH1 N S AH2 L T S",
-        "VERB": "IH2 N S AH1 L T S"
-    },
-    "interchange": {
-        "DEFAULT": "IH1 N T ER0 CH EY2 N JH",
-        "VERB": "IH2 T ER0 CH EY1 N JH"
-    },
-    "interchanges": {
-        "DEFAULT": "IH1 N T ER0 CH EY2 N JH IH0 Z",
-        "VERB": "IH2 T ER0 CH EY1 N JH IH0 Z"
-    },
-    "interdict": {
-        "DEFAULT": "IH1 N T ER0 D IH2 K T",
-        "VERB": "IH2 N T ER0 D IH1 K T"
-    },
-    "interdicts": {
-        "DEFAULT": "IH1 N T ER0 D IH2 K T S",
-        "VERB": "IH2 N T ER0 D IH1 K T S"
-    },
-    "intern": {
-        "DEFAULT": "IH1 N T ER0 N",
-        "VERB": "IH0 N T ER1 N"
-    },
-    "interns": {
-        "DEFAULT": "IH1 N T ER0 N Z",
-        "VERB": "IH0 N T ER1 N Z"
-    },
-    "intimate": {
-        "DEFAULT": "IH1 N T AH0 M AH0 T",
-        "VERB": "IH1 N T IH0 M EY2 T"
-    },
-    "intimates": {
-        "DEFAULT": "IH1 N T AH0 M AH0 T S",
-        "VERB": "IH1 N T IH0 M EY2 T S"
-    },
-    "intrigue": {
-        "DEFAULT": "IH1 N T R IY0 G",
-        "VERB": "IH2 N T R IY1 G"
-    },
-    "introvert": {
-        "DEFAULT": "IH1 N T R AO0 V ER2 T",
-        "VERB": "IH2 N T R AO0 V ER1 T"
-    },
-    "introverts": {
-        "DEFAULT": "IH1 N T R AO0 V ER2 T S",
-        "VERB": "IH2 N T R AO0 V ER1 T S"
-    },
-    "inverse": {
-        "DEFAULT": "IH2 N V ER1 S",
-        "VERB": "IH1 N V ER0 S"
-    },
-    "invite": {
-        "DEFAULT": "IH1 N V AY0 T",
-        "VERB": "IH2 N V AY1 T"
-    },
-    "invites": {
-        "DEFAULT": "IH1 N V AY0 T S",
-        "VERB": "IH2 N V AY1 T S"
-    },
-    "jagged": {
-        "DEFAULT": "JH AE1 G IH0 D",
-        "VERB": "JH AE1 G D"
-    },
-    "learned": {
-        "DEFAULT": "L ER1 N D",
-        "VERB": "L ER1 N IH0 D"
-    },
-    "legitimate": {
-        "DEFAULT": "L AH0 JH IH1 T AH0 M AH0 T",
-        "VERB": "L AH0 JH IH1 T AH0 M EY2 T"
-    },
-    "live": {
-        "DEFAULT": "L AY1 V",
-        "VERB": "L IH1 V"
-    },
-    "lives": {
-        "DEFAULT": "L AY1 V Z",
-        "VERB": "L IH1 V Z"
-    },
-    "mandate": {
-        "DEFAULT": "M AE2 N D EY1 T",
-        "VERB": "M AE1 N D EY2 T"
-    },
-    "misconduct": {
-        "DEFAULT": "M IH2 S K AA0 N D AH1 K T",
-        "VERB": "M IH2 S K AA1 N D AH0 K T"
-    },
-    "misprint": {
-        "DEFAULT": "M IH1 S P R IH0 N T",
-        "VERB": "M IH2 S P R IH1 N T"
-    },
-    "misprints": {
-        "DEFAULT": "M IH1 S P R IH0 N T S",
-        "VERB": "M IH2 S P R IH1 N T S"
-    },
-    "misuse": {
-        "DEFAULT": "M IH0 S Y UW1 Z",
-        "VERB": "M IH0 S Y UW1 S"
-    },
-    "misuses": {
-        "DEFAULT": "M IH0 S Y UW1 S IH0 Z",
-        "VERB": "M IH0 S Y UW1 Z IH0 Z"
-    },
-    "moderate": {
-        "DEFAULT": "M AA1 D ER0 AH0 T",
-        "VERB": "M AA1 D ER0 EY2 T"
-    },
-    "moderates": {
-        "DEFAULT": "M AA1 D ER0 AH0 T S",
-        "VERB": "M AA1 D ER0 EY2 T S"
-    },
-    "mouth": {
-        "DEFAULT": "M AW1 DH",
-        "VERB": "M AW1 TH"
-    },
-    "mouths": {
-        "DEFAULT": "M AW1 TH S",
-        "VERB": "M AW1 DH Z"
-    },
-    "object": {
-        "DEFAULT": "AA1 B JH EH0 K T",
-        "VERB": "AH0 B JH EH1 K T"
-    },
-    "objects": {
-        "DEFAULT": "AA1 B JH EH0 K T S",
-        "VERB": "AH0 B JH EH1 K T S"
-    },
-    "ornament": {
-        "DEFAULT": "AO1 R N AH0 M AH0 N T",
-        "VERB": "AO1 R N AH0 M EH0 N T"
-    },
-    "ornaments": {
-        "DEFAULT": "AO1 R N AH0 M AH0 N T S",
-        "VERB": "AO1 R N AH0 M EH0 N T S"
-    },
-    "overcharge": {
-        "DEFAULT": "OW1 V ER0 CH AA2 R JH",
-        "VERB": "OW2 V ER0 CH AA1 R JH"
-    },
-    "overcharges": {
-        "DEFAULT": "OW1 V ER0 CH AA2 R JH IH0 Z",
-        "VERB": "OW2 V ER0 CH AA1 R JH IH0 Z"
-    },
-    "overflow": {
-        "DEFAULT": "OW1 V ER0 F L OW2",
-        "VERB": "OW2 V ER0 F L OW1"
-    },
-    "overflows": {
-        "DEFAULT": "OW1 V ER0 F L OW2 Z",
-        "VERB": "OW2 V ER0 F L OW1 Z"
-    },
-    "overhang": {
-        "DEFAULT": "OW1 V ER0 HH AE2 NG",
-        "VERB": "OW2 V ER0 HH AE1 NG"
-    },
-    "overhangs": {
-        "DEFAULT": "OW1 V ER0 HH AE2 NG Z",
-        "VERB": "OW2 V ER0 HH AE1 NG Z"
-    },
-    "overhaul": {
-        "DEFAULT": "OW1 V ER0 HH AO2 L",
-        "VERB": "OW2 V ER0 HH AO1 L"
-    },
-    "overhauls": {
-        "DEFAULT": "OW1 V ER0 HH AO2 L Z",
-        "VERB": "OW2 V ER0 HH AO1 L Z"
-    },
-    "overlap": {
-        "DEFAULT": "OW1 V ER0 L AE2 P",
-        "VERB": "OW2 V ER0 L AE1 P"
-    },
-    "overlaps": {
-        "DEFAULT": "OW1 V ER0 L AE2 P S",
-        "VERB": "OW2 V ER0 L AE1 P S"
-    },
-    "overlay": {
-        "DEFAULT": "OW1 V ER0 L EY2",
-        "VERB": "OW2 V ER0 L EY1"
-    },
-    "overlays": {
-        "DEFAULT": "OW1 V ER0 L EY2 Z",
-        "VERB": "OW2 V ER0 L EY1 Z"
-    },
-    "overwork": {
-        "DEFAULT": "OW1 V ER0 W ER2 K",
-        "VERB": "OW2 V ER0 W ER1 K"
-    },
-    "perfect": {
-        "DEFAULT": "P ER1 F IH2 K T",
-        "VERB": "P ER0 F EH1 K T"
-    },
-    "perfume": {
-        "DEFAULT": "P ER1 F Y UW0 M",
-        "VERB": "P ER0 F Y UW1 M"
-    },
-    "perfumes": {
-        "DEFAULT": "P ER1 F Y UW0 M Z",
-        "VERB": "P ER0 F Y UW1 M Z"
-    },
-    "permit": {
-        "DEFAULT": "P ER1 M IH2 T",
-        "VERB": "P ER0 M IH1 T"
-    },
-    "permits": {
-        "DEFAULT": "P ER1 M IH2 T S",
-        "VERB": "P ER0 M IH1 T S"
-    },
-    "pervert": {
-        "DEFAULT": "P ER1 V ER0 T",
-        "VERB": "P ER0 V ER1 T"
-    },
-    "perverts": {
-        "DEFAULT": "P ER1 V ER0 T S",
-        "VERB": "P ER0 V ER1 T S"
-    },
-    "pontificate": {
-        "DEFAULT": "P AA0 N T IH1 F AH0 K EY2 T",
-        "VERB": "P AA0 N T IH1 F AH0 K AH0 T"
-    },
-    "pontificates": {
-        "DEFAULT": "P AA0 N T IH1 F AH0 K AH0 T S",
-        "VERB": "P AA0 N T IH1 F AH0 K EY2 T S"
-    },
-    "precipitate": {
-        "DEFAULT": "P R IH0 S IH1 P IH0 T EY2 T",
-        "VERB": "P R IH0 S IH1 P IH0 T AH0 T"
-    },
-    "predicate": {
-        "DEFAULT": "P R EH1 D AH0 K EY2 T",
-        "VERB": "P R EH1 D IH0 K AH0 T"
-    },
-    "predicates": {
-        "DEFAULT": "P R EH1 D IH0 K AH0 T S",
-        "VERB": "P R EH1 D AH0 K EY2 T S"
-    },
-    "prefix": {
-        "DEFAULT": "P R IY1 F IH0 K S",
-        "VERB": "P R IY2 F IH1 K S"
-    },
-    "prefixes": {
-        "DEFAULT": "P R IY1 F IH0 K S IH0 JH",
-        "VERB": "P R IY2 F IH1 K S IH0 JH"
-    },
-    "presage": {
-        "DEFAULT": "P R EH1 S IH0 JH",
-        "VERB": "P R EH2 S IH1 JH"
-    },
-    "presages": {
-        "DEFAULT": "P R EH1 S IH0 JH IH0 JH",
-        "VERB": "P R EH2 S IH1 JH IH0 JH"
-    },
-    "present": {
-        "DEFAULT": "P R EH1 Z AH0 N T",
-        "VERB": "P R IY0 Z EH1 N T"
-    },
-    "presents": {
-        "DEFAULT": "P R EH1 Z AH0 N T S",
-        "VERB": "P R IY0 Z EH1 N T S"
-    },
-    "proceeds": {
-        "DEFAULT": "P R OW1 S IY0 D Z",
-        "VERB": "P R AH0 S IY1 D Z"
-    },
-    "process": {
-        "DEFAULT": "P R AA1 S EH2 S",
-        "VERB": "P R AO2 S EH1 S"
-    },
-    "processes": {
-        "DEFAULT": "P R AO2 S EH1 S AH0 Z",
-        "VERB": "P R AA1 S EH0 S AH0 Z"
-    },
-    "processing": {
-        "DEFAULT": "P R AA1 S EH0 S IH0 NG",
-        "VERB": "P R AA0 S EH1 S IH0 NG"
-    },
-    "produce": {
-        "DEFAULT": "P R OW1 D UW0 S",
-        "VERB": "P R AH0 D UW1 S"
-    },
-    "progress": {
-        "DEFAULT": "P R AA1 G R EH2 S",
-        "VERB": "P R AH0 G R EH1 S"
-    },
-    "progresses": {
-        "DEFAULT": "P R AA1 G R EH2 S AH0 Z",
-        "VERB": "P R OW0 G R EH1 S AH0 Z"
-    },
-    "project": {
-        "DEFAULT": "P R AA1 JH EH0 K T",
-        "VERB": "P R AA0 JH EH1 K T"
-    },
-    "projects": {
-        "DEFAULT": "P R AA1 JH EH0 K T S",
-        "VERB": "P R AA0 JH EH1 K T S"
-    },
-    "prospect": {
-        "DEFAULT": "P R AA1 S P EH0 K T",
-        "VERB": "P R AH2 S P EH1 K T"
-    },
-    "prospects": {
-        "DEFAULT": "P R AA1 S P EH0 K T S",
-        "VERB": "P R AH2 S P EH1 K T S"
-    },
-    "prostrate": {
-        "DEFAULT": "P R AA1 S T R EY0 T",
-        "VERB": "P R AA0 S T R EY1 T"
-    },
-    "protest": {
-        "DEFAULT": "P R OW1 T EH2 S T",
-        "VERB": "P R AH0 T EH1 S T"
-    },
-    "protests": {
-        "DEFAULT": "P R OW1 T EH2 S T S",
-        "VERB": "P R AH0 T EH1 S T S"
-    },
-    "purport": {
-        "DEFAULT": "P ER1 P AO2 R T",
-        "VERB": "P ER0 P AO1 R T"
-    },
-    "quadruple": {
-        "DEFAULT": "K W AA0 D R UW1 P AH0 L",
-        "VERB": "K W AA1 D R UW0 P AH0 L"
-    },
-    "quadruples": {
-        "DEFAULT": "K W AA1 D R UW0 P AH0 L Z",
-        "VERB": "K W AA0 D R UW1 P AH0 L Z"
-    },
-    "ragged": {
-        "DEFAULT": "R AE1 G AH0 D",
-        "VERB": "R AE1 G D"
-    },
-    "rampage": {
-        "DEFAULT": "R AE1 M P EY2 JH",
-        "VERB": "R AE2 M P EY1 JH"
-    },
-    "rampages": {
-        "DEFAULT": "R AE1 M P EY2 JH IH0 Z",
-        "VERB": "R AE2 M P EY1 JH IH0 Z"
-    },
-    "read": {
-        "DEFAULT": "R IY1 D",
-        "VBD": "R EH1 D",
-        "VBN": "R EH1 D",
-        "VBP": "R EH1 D"
-    },
-    "rebel": {
-        "DEFAULT": "R IH0 B EH1 L",
-        "VERB": "R EH1 B AH0 L"
-    },
-    "rebels": {
-        "DEFAULT": "R EH1 B AH0 L Z",
-        "VERB": "R IH0 B EH1 L Z"
-    },
-    "rebound": {
-        "DEFAULT": "R IY1 B AW0 N D",
-        "VERB": "R IY0 B AW1 N D"
-    },
-    "rebounds": {
-        "DEFAULT": "R IY1 B AW0 N D Z",
-        "VERB": "R IY0 B AW1 N D Z"
-    },
-    "recall": {
-        "DEFAULT": "R IY1 K AO2 L",
-        "VERB": "R IH0 K AO1 L"
-    },
-    "recalls": {
-        "DEFAULT": "R IY1 K AO2 L Z",
-        "VERB": "R IH0 K AO1 L Z"
-    },
-    "recap": {
-        "DEFAULT": "R IY1 K AE2 P",
-        "VERB": "R IH0 K AE1 P"
-    },
-    "recapped": {
-        "DEFAULT": "R IY1 K AE2 P T",
-        "VERB": "R IH0 K AE1 P T"
-    },
-    "recapping": {
-        "DEFAULT": "R IY1 K AE2 P IH0 NG",
-        "VERB": "R IH0 K AE1 P IH0 NG"
-    },
-    "recaps": {
-        "DEFAULT": "R IY1 K AE2 P S",
-        "VERB": "R IH0 K AE1 P S"
-    },
-    "record": {
-        "DEFAULT": "R EH1 K ER0 D",
-        "VERB": "R IH0 K AO1 R D"
-    },
-    "records": {
-        "DEFAULT": "R EH1 K ER0 D Z",
-        "VERB": "R IH0 K AO1 R D Z"
-    },
-    "recount": {
-        "DEFAULT": " R IH1 K AW0 N T",
-        "VERB": "R IY2 K AW1 N T"
-    },
-    "recounts": {
-        "DEFAULT": " R IH1 K AW0 N T S",
-        "VERB": "R IY2 K AW1 N T S"
-    },
-    "refill": {
-        "DEFAULT": "R IY1 F IH0 L",
-        "VERB": "R IY0 F IH1 L"
-    },
-    "refills": {
-        "DEFAULT": "R IY1 F IH0 L Z",
-        "VERB": "R IY0 F IH1 L Z"
-    },
-    "refit": {
-        "DEFAULT": "R IY1 F IH0 T",
-        "VERB": "R IY0 F IH1 T"
-    },
-    "refits": {
-        "DEFAULT": "R IY1 F IH0 T S",
-        "VERB": "R IY0 F IH1 T S"
-    },
-    "refresh": {
-        "DEFAULT": "R IH1 F R EH0 SH",
-        "VERB": "R IH0 F R EH1 SH"
-    },
-    "refund": {
-        "DEFAULT": "R IY1 F AH2 N D",
-        "VERB": "R IH0 F AH1 N D"
-    },
-    "refunds": {
-        "DEFAULT": "R IY1 F AH2 N D Z",
-        "VERB": "R IH0 F AH1 N D Z"
-    },
-    "refuse": {
-        "DEFAULT": "R EH1 F Y UW2 Z",
-        "VERB": "R IH0 F Y UW1 Z"
-    },
-    "regenerate": {
-        "DEFAULT": "R IY0 JH EH1 N ER0 AH0 T",
-        "VERB": "R IY0 JH EH1 N ER0 EY2 T"
-    },
-    "rehash": {
-        "DEFAULT": "R IY1 HH AE0 SH",
-        "VERB": "R IY0 HH AE1 SH"
-    },
-    "rehashes": {
-        "DEFAULT": "R IY1 HH AE0 SH IH0 Z",
-        "VERB": "R IY0 HH AE1 SH IH0 Z"
-    },
-    "reincarnate": {
-        "DEFAULT": "R IY2 IH0 N K AA1 R N AH0 T",
-        "VERB": "R IY2 IH0 N K AA1 R N EY2 T"
-    },
-    "reject": {
-        "DEFAULT": "R IY1 JH EH0 K T",
-        "VERB": "R IH0 JH EH1 K T"
-    },
-    "rejects": {
-        "DEFAULT": "R IY1 JH EH0 K T S",
-        "VERB": "R IH0 JH EH1 K T S"
-    },
-    "relay": {
-        "DEFAULT": "R IY1 L EY2",
-        "VERB": "R IY2 L EY1"
-    },
-    "relaying": {
-        "DEFAULT": "R IY1 L EY2 IH0 NG",
-        "VERB": "R IY2 L EY1 IH0 NG"
-    },
-    "relays": {
-        "DEFAULT": "R IY1 L EY2 Z",
-        "VERB": "R IY2 L EY1 Z"
-    },
-    "remake": {
-        "DEFAULT": "R IY1 M EY0 K",
-        "VERB": "R IY2 M EY1 K"
-    },
-    "remakes": {
-        "DEFAULT": "R IY1 M EY0 K S",
-        "VERB": "R IY2 M EY1 K S"
-    },
-    "replay": {
-        "DEFAULT": "R IY1 P L EY0",
-        "VERB": "R IY0 P L EY1"
-    },
-    "replays": {
-        "DEFAULT": "R IY1 P L EY0 Z",
-        "VERB": "R IY0 P L EY1 Z"
-    },
-    "reprint": {
-        "DEFAULT": "R IY1 P R IH0 N T",
-        "VERB": "R IY0 P R IH1 N T"
-    },
-    "reprints": {
-        "DEFAULT": "R IY1 P R IH0 N T S",
-        "VERB": "R IY0 P R IH1 N T S"
-    },
-    "rerun": {
-        "DEFAULT": "R IY1 R AH0 N",
-        "VERB": "R IY2 R AH1 N"
-    },
-    "reruns": {
-        "DEFAULT": "R IY1 R AH0 N Z",
-        "VERB": "R IY2 R AH1 N Z"
-    },
-    "resume": {
-        "DEFAULT": "R EH1 Z AH0 M EY2",
-        "VERB": "R IY0 Z UW1 M"
-    },
-    "retake": {
-        "DEFAULT": "R IY1 T EY0 K",
-        "VERB": "R IY0 T EY1 K"
-    },
-    "retakes": {
-        "DEFAULT": "R IY1 T EY0 K S",
-        "VERB": "R IY0 T EY1 K S"
-    },
-    "rethink": {
-        "DEFAULT": "R IY1 TH IH0 NG K",
-        "VERB": "R IY2 TH IH1 NG K"
-    },
-    "rethinks": {
-        "DEFAULT": "R IY1 TH IH0 NG K S",
-        "VERB": "R IY2 TH IH1 NG K S"
-    },
-    "retread": {
-        "DEFAULT": "R IY1 T R EH0 D",
-        "VERB": "R IY2 T R EH1 D"
-    },
-    "retreads": {
-        "DEFAULT": "R IY1 T R EH0 D Z",
-        "VERB": "R IY2 T R EH1 D Z"
-    },
-    "rewrite": {
-        "DEFAULT": "R IY1 R AY2 T",
-        "VERB": "R IY0 R AY1 T"
-    },
-    "rewrites": {
-        "DEFAULT": "R IY1 R AY2 T S",
-        "VERB": "R IY0 R AY1 T S"
-    },
-    "segment": {
-        "DEFAULT": "S EH2 G M EH1 N T",
-        "VERB": "S EH1 G M AH0 N T"
-    },
-    "segments": {
-        "DEFAULT": "S EH1 G M AH0 N T S",
-        "VERB": "S EH2 G M EH1 N T S"
-    },
-    "separate": {
-        "DEFAULT": "S EH1 P ER0 IH0 T",
-        "VERB": "S EH1 P ER0 EY2 T"
-    },
-    "separates": {
-        "DEFAULT": "S EH1 P ER0 IH0 T S",
-        "VERB": "S EH1 P ER0 EY2 T S"
-    },
-    "subcontract": {
-        "DEFAULT": "S AH2 B K AA0 N T R AE1 K T",
-        "VERB": "S AH0 B K AA1 N T R AE2 K T"
-    },
-    "subcontracts": {
-        "DEFAULT": "S AH0 B K AA1 N T R AE2 K T S",
-        "VERB": "S AH2 B K AA0 N T R AE1 K T S"
-    },
-    "subject": {
-        "DEFAULT": "S AH1 B JH IH0 K T",
-        "VERB": "S AH0 B JH EH1 K T"
-    },
-    "subjects": {
-        "DEFAULT": "S AH1 B JH IH0 K T S",
-        "VERB": "S AH0 B JH EH1 K T S"
-    },
-    "subordinate": {
-        "DEFAULT": "S AH0 B AO1 R D AH0 N AH0 T",
-        "VERB": "S AH0 B AO1 R D AH0 N EY2 T"
-    },
-    "subordinates": {
-        "DEFAULT": "S AH0 B AO1 R D AH0 N AH0 T S",
-        "VERB": "S AH0 B AO1 R D AH0 N EY2 T S"
-    },
-    "supplement": {
-        "DEFAULT": "S AH1 P L AH0 M AH0 N T",
-        "VERB": "S AH1 P L AH0 M EH0 N T"
-    },
-    "supplements": {
-        "DEFAULT": "S AH1 P L AH0 M AH0 N T S",
-        "VERB": "S AH1 P L AH0 M EH0 N T S"
-    },
-    "surmise": {
-        "DEFAULT": "S ER1 M AY0 Z",
-        "VERB": "S ER0 M AY1 Z"
-    },
-    "surmises": {
-        "DEFAULT": "S ER1 M AY0 Z IH0 Z",
-        "VERB": "S ER0 M AY1 Z IH0 Z"
-    },
-    "survey": {
-        "DEFAULT": "S ER1 V EY2",
-        "VERB": "S ER0 V EY1"
-    },
-    "surveys": {
-        "DEFAULT": "S ER1 V EY2 Z",
-        "VERB": "S ER0 V EY1 Z"
-    },
-    "suspect": {
-        "DEFAULT": "S AH1 S P EH2 K T",
-        "VERB": "S AH0 S P EH1 K T"
-    },
-    "suspects": {
-        "DEFAULT": "S AH1 S P EH2 K T S",
-        "VERB": "S AH0 S P EH1 K T S"
-    },
-    "syndicate": {
-        "DEFAULT": "S IH1 N D IH0 K AH0 T",
-        "VERB": "S IH1 N D AH0 K EY2 T"
-    },
-    "syndicates": {
-        "DEFAULT": "S IH1 N D IH0 K AH0 T S",
-        "VERB": "S IH1 N D IH0 K EY2 T S"
-    },
-    "torment": {
-        "DEFAULT": "T AO0 R M EH1 N T",
-        "VERB": "T AO1 R M EH2 N T"
-    },
-    "transfer": {
-        "DEFAULT": "T R AE1 N S F ER0",
-        "VERB": "T R AE0 N S F ER1"
-    },
-    "transfers": {
-        "DEFAULT": "T R AE1 N S F ER0 Z",
-        "VERB": "T R AE0 N S F ER1 Z"
-    },
-    "transplant": {
-        "DEFAULT": "T R AE1 N S P L AE0 N T",
-        "VERB": "T R AE0 N S P L AE1 N T"
-    },
-    "transplants": {
-        "DEFAULT": "T R AE1 N S P L AE0 N T S",
-        "VERB": "T R AE0 N S P L AE1 N T S"
-    },
-    "transport": {
-        "DEFAULT": "T R AE1 N S P AO0 R T",
-        "VERB": "T R AE0 N S P AO1 R T"
-    },
-    "transports": {
-        "DEFAULT": "T R AE1 N S P AO0 R T S",
-        "VERB": "T R AE0 N S P AO1 R T S"
-    },
-    "triplicate": {
-        "DEFAULT": "T R IH1 P L IH0 K AH0 T",
-        "VERB": "T R IH1 P L IH0 K EY2 T"
-    },
-    "triplicates": {
-        "DEFAULT": "T R IH1 P L IH0 K AH0 T S",
-        "VERB": "T R IH1 P L IH0 K EY2 T S"
-    },
-    "undercut": {
-        "DEFAULT": "AH1 N D ER0 K AH2 T",
-        "VERB": "AH2 N D ER0 K AH1 T"
-    },
-    "underestimate": {
-        "DEFAULT": "AH1 N D ER0 EH1 S T AH0 M AH0 T",
-        "VERB": "AH1 N D ER0 EH1 S T AH0 M EY2 T"
-    },
-    "underestimates": {
-        "DEFAULT": "AH1 N D ER0 EH1 S T AH0 M AH0 T S",
-        "VERB": "AH1 N D ER0 EH1 S T AH0 M EY2 T S"
-    },
-    "underline": {
-        "DEFAULT": "AH1 N D ER0 L AY2 N",
-        "VERB": "AH2 N D ER0 L AY1 N"
-    },
-    "underlines": {
-        "DEFAULT": "AH1 N D ER0 L AY2 N Z",
-        "VERB": "AH2 N D ER0 L AY1 N Z"
-    },
-    "undertaking": {
-        "DEFAULT": "AH1 N D ER0 T EY2 K IH0 NG",
-        "VERB": "AH2 N D ER0 T EY1 K IH0 NG"
-    },
-    "undertakings": {
-        "DEFAULT": "AH1 N D ER0 T EY2 K IH0 NG Z",
-        "VERB": "AH2 N D ER0 T EY1 K IH0 NG Z"
-    },
-    "unused": {
-        "DEFAULT": "AH0 N Y UW1 S T",
-        "VERB": "AH0 N Y UW1 Z D"
-    },
-    "upgrade": {
-        "DEFAULT": "AH1 P G R EY0 D",
-        "VERB": "AH0 P G R EY1 D"
-    },
-    "upgrades": {
-        "DEFAULT": "AH1 P G R EY0 D Z",
-        "VERB": "AH0 P G R EY1 D Z"
-    },
-    "uplift": {
-        "DEFAULT": "AH1 P L IH0 F T",
-        "VERB": "AH2 P L IH1 F T"
-    },
-    "upset": {
-        "DEFAULT": "AH1 P S EH2 T",
-        "VERB": "AH0 P S EH1 T"
-    },
-    "upsets": {
-        "DEFAULT": "AH1 P S EH2 T S",
-        "VERB": "AH0 P S EH1 T S"
-    },
-    "use": {
-        "DEFAULT": "Y UW1 S",
-        "VERB": "Y UW1 Z"
-    },
-    "used": {
-        "DEFAULT": "Y UW1 S T",
-        "VBN": "Y UW1 Z D"
-    },
-    "uses": {
-        "DEFAULT": "Y UW1 S IH0 Z",
-        "VERB": "Y UW1 Z IH0 Z"
-    }
-}

resources/app/python/xvapitch/text/h2p_parser/data/example.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-    "absent": {
-        "VERB": "AH1 B S AE1 N T",
-        "DEFAULT": "AE1 B S AH0 N T"
-    },
-    "reject": {
-        "VERB": "R IH0 JH EH1 K T",
-        "DEFAULT": "R IY1 JH EH0 K T"
-    },
-    "read": {
-        "VBD": "R EH1 D",
-        "VBN": "R EH1 D",
-        "VBP": "R EH1 D",
-        "DEFAULT": "R IY1 D"
-    }
-}

resources/app/python/xvapitch/text/h2p_parser/dict_reader.py DELETED Viewed

@@ -1,109 +0,0 @@
-# This reads a CMUDict formatted dictionary as a dictionary object
-import re
-from python.xvapitch.text.h2p_parser.format_ph import format_ph as ph
-from . import DATA_PATH
-_dict_primary = 'cmudict.dict'
-def read_dict(filename: str) -> list:
-    # Read the file
-    with open(filename, encoding='utf-8', mode='r') as f:
-        # Read the file into lines
-        lines = f.readlines()
-    # Remove any line starting with ";;;"
-    lines = [line for line in lines if not line.startswith(';;;')]
-    return lines
-def parse_dict(lines: list) -> dict:
-    # Create a dictionary to store the parsed data
-    parsed_dict = {}
-    # Detect file format
-    # We will read the first 10 lines to determine the format
-    # Default to SSD format unless we find otherwise
-    dict_form = 'SSD'
-    for line in lines[:10]:
-        # Strip new lines
-        line = line.strip()
-        if line == '':
-            continue
-        """
-        Format 1 (Double Space Delimited):
-        - Comment allowed to start with ";;;"
-        WORD  W ER1 D
-        Format 2 (Single Space Delimited):
-        - Comment allowed at end of any line using "#"
-        WORD W ER1 D # Comment
-        """
-        if '  ' in line:
-            dict_form = 'DSD'
-            break
-    # Iterate over the lines
-    for line in lines:
-        # Skip empty lines and lines with no space
-        line = line.strip()
-        if line == '' and ' ' not in line:
-            continue
-        # Split depending on format
-        if dict_form == 'DSD':
-            pairs = line.split('  ')
-        else:
-            space_index = line.find(' ')
-            line_split = line[:space_index], line[space_index + 1:]
-            pairs = line_split[0], line_split[1].split('#')[0]
-        word = str.lower(pairs[0])  # Get word and lowercase it
-        phonemes = ph.to_list(pairs[1])   # Convert to list of phonemes
-        phonemes = [phonemes]  # Wrap in nested list
-        word_num = 0
-        word_orig = None
-        # Detect if this is a multi-word entry
-        if ('(' in word) and (')' in word) and any(char.isdigit() for char in word):
-            # Parse the integer from the word using regex
-            result = int(re.findall(r"\((\d+)\)", word)[0])
-            # If found
-            if result is not None:
-                # Set the original word
-                word_orig = word
-                # Remove the integer and bracket from the word
-                word = re.sub(r"\(.*\)", "", word)
-                # Set the word number to the result
-                word_num = result
-        # Check existing key
-        if word in parsed_dict:
-            # If word number is 0, ignore
-            if word_num == 0:
-                continue
-            # If word number is not 0, add phoneme to existing key at index
-            parsed_dict[word].extend(phonemes)
-            # Also add the original word if it exists
-            if word_orig is not None:
-                parsed_dict[word_orig] = phonemes
-        else:
-            # Create a new key
-            parsed_dict[word] = phonemes
-    # Return the dictionary
-    return parsed_dict
-class DictReader:
-    def __init__(self, filename=None):
-        self.filename = filename
-        self.dict = {}
-        # If filename is None, use the default dictionary
-        # default = 'data' uses the dictionary file in the data module
-        # default = 'nltk' uses the nltk cmudict
-        if filename is not None:
-            self.dict = parse_dict(read_dict(filename))
-        else:
-            with DATA_PATH.joinpath(_dict_primary) as f:
-                self.dict = parse_dict(read_dict(f))

resources/app/python/xvapitch/text/h2p_parser/dictionary.py DELETED Viewed

@@ -1,85 +0,0 @@
-# dictionary.py
-# Defines a dictionary class that can be used to store and retrieve from the json file
-import sys
-if sys.version_info < (3, 9):
-    # In Python versions below 3.9, this is needed
-    import importlib_resources as pkg_resources
-else:
-    # Since python 3.9+, importlib.resources.files is built-in
-    import importlib.resources as pkg_resources
-from os.path import exists
-import json
-from python.xvapitch.text.h2p_parser.pos_parser import pos_parser as pos_parser
-# Method to get data path
-def get_data_path():
-    data_path = pkg_resources.files('h2p_parser.data')
-    if data_path is None:
-        raise FileNotFoundError("Data folder not found")
-    return data_path
-# Dictionary class
-class Dictionary:
-    def __init__(self, file_name=None):
-        # If a file name is not provided, use the default file name
-        self.file_name = file_name
-        if file_name is None:
-            self.file_name = 'dict.json'
-            self.use_default = True
-        else:
-            self.file_name = file_name
-            self.use_default = False
-        self.dictionary = {}
-        self.dictionary = self.load_dictionary(file_name)
-    # Loads the dictionary from the json file
-    def load_dictionary(self, path=None):
-        if path is None:
-            data_path = get_data_path()
-            dict_path = data_path.joinpath(self.file_name)
-            with open(str(dict_path)) as def_file:
-                read_dict = json.load(def_file)
-        else:
-            if not exists(path):
-                raise FileNotFoundError(f'Dictionary {self.file_name} file not found')
-            with open(path) as file:
-                try:
-                    read_dict = json.load(file)
-                except json.decoder.JSONDecodeError:
-                    raise ValueError(f'Dictionary {self.file_name} file is not valid JSON')
-        # Check dictionary has at least one entry
-        if len(read_dict) == 0:
-            raise ValueError('Dictionary is empty or invalid')
-        return read_dict
-    # Check if a word is in the dictionary
-    def contains(self, word):
-        word = word.lower()
-        return word in self.dictionary
-    # Get the phonetic pronunciation of a word using Part of Speech tag
-    def get_phoneme(self, word, pos):
-        # Get the sub-dictionary at dictionary[word]
-        sub_dict = self.dictionary[word.lower()]
-        # First, check if the exact pos is a key
-        if pos in sub_dict:
-            return sub_dict[pos]
-        # If not, use the parent pos of the pos tag
-        parent_pos = pos_parser.get_parent_pos(pos)
-        if parent_pos is not None:
-            # Check if the sub_dict contains the parent pos
-            if parent_pos in sub_dict:
-                return sub_dict[parent_pos]
-        # If not, check if the sub_dict contains a DEFAULT key
-        if 'DEFAULT' in sub_dict:
-            return sub_dict['DEFAULT']
-        # If no matches, return None
-        return None

resources/app/python/xvapitch/text/h2p_parser/filter.py DELETED Viewed

@@ -1,34 +0,0 @@
-from unicodedata import normalize
-import re
-# Pre-compile regex
-re_filter = re.compile(r"[^ A-Za-z'.,?!()\-]")
-re_filter_with_num = re.compile(r"[^ A-Za-z\d'.,?!()\-]")
-re_multi_space = re.compile(r"\s\s+")
-# Filters text before parsing
-# @param text: text to be filtered
-# @return: filtered text
-def filter_text(text: str, allow_num: bool = False, preserve_case: bool = False) -> str:
-    """
-    Filters text before parsing
-    :param preserve_case:
-    :param allow_num: True if numbers are allowed
-    :param text: Input raw text
-    :return: Text after stripped accents, lower-cased, and invalid punctuation removed
-    """
-    # Strip accents
-    text = normalize('NFD', text)
-    # To lowercase
-    if not preserve_case:
-        text = text.lower()
-    # Remove all invalid punctuation
-    if allow_num:
-        text = re.sub(re_filter_with_num, '', text)
-    else:
-        text = re.sub(re_filter, "", text)
-    # Remove all spaces more than 1
-    text = re.sub(re_multi_space, " ", text)
-    # Return
-    return text

resources/app/python/xvapitch/text/h2p_parser/format_ph.py DELETED Viewed

@@ -1,99 +0,0 @@
-from typing import overload
-# Converts and outputs various formats of phonemes
-@overload
-def to_sds(ph: str) -> str: ...
-@overload
-def to_sds(ph: list) -> str: ...
-def to_sds(ph: list or str) -> str or None:
-    """
-    Converts phonemes to space delimited string format
-    :param ph: Phoneme as str or list, supports nested lists
-    :return: Phoneme as space delimited string
-    """
-    # Return None if None
-    if ph is None:
-        return None
-    # Return directly if str
-    if isinstance(ph, str):
-        return ph
-    # If is list, convert each element
-    if isinstance(ph, list):
-        # If list empty, return None
-        if len(ph) == 0:
-            return None
-        # Case for further lists
-        if isinstance(ph[0], list):
-            return to_sds(ph[0])  # Recursive call
-        # Case if str at index 0, and size 1, return directly
-        elif isinstance(ph[0], str) and len(ph) == 1:
-            return ph[0]
-        # Case if str at index 0, above size 1, return with join
-        elif isinstance(ph[0], str):
-            return ' '.join(ph)
-        # Case for none
-        elif ph[0] is None:
-            return None
-        else:
-            raise TypeError('to_sds() encountered an unexpected nested element type')
-    # Error if no matches
-    raise TypeError('to_sds() expects a list or string')
-@overload
-def to_list(ph: str) -> list: ...
-@overload
-def to_list(ph: list) -> list: ...
-def to_list(ph: str or list) -> list or None:
-    """
-    Converts phonemes to list format
-    :param ph: Phoneme as str or list, supports nested lists
-    :return: Phoneme as list
-    """
-    # Return None if None
-    if ph is None:
-        return None
-    # Return directly if list and index 0 is str
-    if isinstance(ph, list) and len(ph) > 0 and isinstance(ph[0], str):
-        return ph
-    # If space delimited string, convert to list
-    if isinstance(ph, str):
-        return ph.split(' ')
-    # If nested list, convert each element
-    if isinstance(ph, list):
-        # If list empty or has None, return None
-        if len(ph) == 0 or ph[0] is None:
-            return None
-        # Case for further lists
-        if isinstance(ph[0], list):
-            return to_list(ph[0])  # Recursive call
-    # Error if no matches
-    raise TypeError('to_list() expects a list or string')
-# Surrounds text with curly brackets
-def with_cb(text: str) -> str:
-    """
-    Surrounds text with curly brackets
-    :param text: Text to surround
-    :return: Surrounded text
-    """
-    return '{' + text + '}'

resources/app/python/xvapitch/text/h2p_parser/h2p.py DELETED Viewed

@@ -1,123 +0,0 @@
-import nltk
-import re
-from nltk.tokenize import TweetTokenizer
-from nltk import pos_tag
-from nltk import pos_tag_sents
-from .dictionary import Dictionary
-from .filter import filter_text as ft
-from . import format_ph as ph
-# Check that the nltk data is downloaded, if not, download it
-try:
-    nltk.data.find('taggers/averaged_perceptron_tagger.zip')
-except LookupError:
-    nltk.download('averaged_perceptron_tagger')
-# Method to use Regex to replace the first instance of a word with its phonemes
-def replace_first(target, replacement, text):
-    # Skip if target invalid
-    if target is None or target == '':
-        return text
-    # Replace the first instance of a word with its phonemes
-    return re.sub(r'(?i)\b' + target + r'\b', replacement, text, 1)
-class H2p:
-    def __init__(self, dict_path=None, preload=False, phoneme_format=''):
-        """
-        Creates a H2p parser
-        Supported phoneme formats:
-            - Space delimited
-            - Space delimited surrounded by { }
-        :param dict_path: Path to a heteronym dictionary json file. Built-in dictionary will be used if None
-        :type dict_path: str
-        :param preload: Preloads the tokenizer and tagger during initialization
-        :type preload: bool
-        """
-        # Supported phoneme formats
-        self.phoneme_format = phoneme_format
-        self.dict = Dictionary(dict_path)
-        self.tokenize = TweetTokenizer().tokenize
-        self.get_tags = pos_tag
-        if preload:
-            self.preload()
-    # Method to preload tokenizer and pos_tag
-    def preload(self):
-        tokens = self.tokenize('a')
-        assert tokens == ['a']
-        assert pos_tag(tokens)[0][0] == 'a'
-    # Method to check if a text line contains a heteronym
-    def contains_het(self, text):
-        # Filter the text
-        text = ft(text)
-        # Tokenize
-        words = self.tokenize(text)
-        # Check match with dictionary
-        hets = []
-        for word in words:
-            if self.dict.contains(word):
-                hets.append(word)
-        return len(hets)>0, hets
-    # Method to replace heteronyms in a text line to phonemes
-    def replace_het(self, text):
-        # Filter the text
-        working_text = ft(text, preserve_case=True)
-        # Tokenize
-        words = self.tokenize(working_text)
-        # Get pos tags
-        tags = pos_tag(words)
-        # Loop through words and pos tags
-        for word, pos in tags:
-            # Skip if word not in dictionary
-            if not self.dict.contains(word):
-                continue
-            # Get phonemes
-            phonemes = self.dict.get_phoneme(word, pos)
-            # Format phonemes
-            f_ph = ph.with_cb(ph.to_sds(phonemes))
-            # Replace word with phonemes
-            text = replace_first(word, f_ph, text)
-        return text
-    # Replaces heteronyms in a list of text lines
-    # Slightly faster than replace_het() called on each line
-    def replace_het_list(self, text_list):
-        # Filter the text
-        working_text_list = [ft(text, preserve_case=True) for text in text_list]
-        # Tokenize
-        list_sentence_words = [self.tokenize(text) for text in working_text_list]
-        # Get pos tags list
-        tags_list = pos_tag_sents(list_sentence_words)
-        # Loop through lines
-        for index in range(len(tags_list)):
-            # Loop through words and pos tags in tags_list index
-            for word, pos in tags_list[index]:
-                # Skip if word not in dictionary
-                if not self.dict.contains(word):
-                    continue
-                # Get phonemes
-                phonemes = self.dict.get_phoneme(word, pos)
-                # Format phonemes
-                f_ph = ph.with_cb(ph.to_sds(phonemes))
-                # Replace word with phonemes
-                text_list[index] = replace_first(word, f_ph, text_list[index])
-        return text_list
-    # Method to tag a text line, returns a list of tags
-    def tag(self, text):
-        # Filter the text
-        working_text = ft(text, preserve_case=True)
-        # Tokenize
-        words = self.tokenize(working_text)
-        # Get pos tags
-        tags = pos_tag(words)
-        # Only return element 1 of each list
-        return [tag[1] for tag in tags]

resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/PKG-INFO DELETED Viewed

@@ -1,14 +0,0 @@
-Metadata-Version: 2.1
-Name: h2p-parser
-Version: 1.0.0
-Summary: Heteronym to Phoneme Parser
-Home-page: https://github.com/ionite34/h2p-parser
-Author: ionite
-Author-email: dev@ionite.io
-License: Apache 2.0
-Platform: UNKNOWN
-Requires-Python: >=3.7
-License-File: LICENSE
-UNKNOWN

resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/SOURCES.txt DELETED Viewed

@@ -1,19 +0,0 @@
-LICENSE
-README.md
-setup.py
-h2p_parser/__init__.py
-h2p_parser/__main__.py
-h2p_parser/cmudictext.py
-h2p_parser/dict_reader.py
-h2p_parser/dictionary.py
-h2p_parser/filter.py
-h2p_parser/format_ph.py
-h2p_parser/h2p.py
-h2p_parser/pos_parser.py
-h2p_parser/processors.py
-h2p_parser/symbols.py
-h2p_parser/h2p_parser.egg-info/PKG-INFO
-h2p_parser/h2p_parser.egg-info/SOURCES.txt
-h2p_parser/h2p_parser.egg-info/dependency_links.txt
-h2p_parser/h2p_parser.egg-info/requires.txt
-h2p_parser/h2p_parser.egg-info/top_level.txt

resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/dependency_links.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/requires.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- nltk
2	- inflect

resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

resources/app/python/xvapitch/text/h2p_parser/pos_parser.py DELETED Viewed

@@ -1,17 +0,0 @@
-# Part of Speech Tag Operations
-# Method to get the parent part of speech (VERB) or (NOUN) from a pos tag
-# from __future__ import annotations
-# def get_parent_pos(pos: str) -> str | None:
-def get_parent_pos(pos):
-    # Get the parent part of speech from a pos tag
-    if pos.startswith('VB'):
-        return 'VERB'
-    elif pos.startswith('NN'):
-        return 'NOUN'
-    elif pos.startswith('RB'):
-        return 'ADVERB'
-    else:
-        return None

resources/app/python/xvapitch/text/h2p_parser/processors.py DELETED Viewed

@@ -1,392 +0,0 @@
-# Transformations of text sequences for matching
-from __future__ import annotations
-from typing import TYPE_CHECKING
-from .symbols import consonants
-import re
-if TYPE_CHECKING:
-    from .cmudictext import CMUDictExt
-_re_digit = re.compile(r'\d+')
-class Processor:
-    def __init__(self, cde: CMUDictExt):
-        self._lookup = cde.lookup
-        self._cmu_get = cde.dict.get
-        self._segment = cde.segment
-        self._tag = cde.h2p.tag
-        self._stem = cde.stem
-        # Number of times respective methods were called
-        self.stat_hits = {
-            'plural': 0,
-            'possessives': 0,
-            'contractions': 0,
-            'hyphenated': 0,
-            'compound': 0,
-            'compound_l2': 0,
-            'stem': 0
-        }
-        # Number of times respective methods returned value (not None)
-        self.stat_resolves = {
-            'plural': 0,
-            'possessives': 0,
-            'contractions': 0,
-            'hyphenated': 0,
-            'compound': 0,
-            'compound_l2': 0,
-            'stem': 0
-        }
-        # Holds events when features encountered unexpected language syntax
-        self.stat_unexpected = {
-            'plural': [],
-            'possessives': [],
-            'contractions': [],
-            'hyphenated': [],
-            'compound': [],
-            'compound_l2': [],
-            'stem': []
-        }
-    def auto_possessives(self, word: str) -> str | None:
-        """
-        Auto-possessives
-        :param word: Input of possible possessive word
-        :return: Phoneme of word as SDS, or None if unresolvable
-        """
-        if not word.endswith("'s"):
-            return None
-        # If the word ends with "'s", register a hit
-        self.stat_hits['possessives'] += 1
-        """
-        There are 3 general cases:
-        1. Base words ending in one of 6 special consonants (sibilants)
-            - i.e. Tess's, Rose's, Butch's, Midge's, Rush's, Garage's
-            - With consonants ending of [s], [z], [ch], [j], [sh], [zh]
-            - In ARPAbet: {S}, {Z}, {CH}, {JH}, {SH}, {ZH}
-            - These require a suffix of {IH0 Z}
-        2. Base words ending in vowels and voiced consonants:
-            - i.e. Fay's, Hugh's, Bob's, Ted's, Meg's, Sam's, Dean's, Claire's, Paul's, Bing's
-            - In ARPAbet: {IY0}, {EY1}, {UW1}, {B}, {D}, {G}, {M}, {N}, {R}, {L}, {NG}
-            - Vowels need a wildcard match of any numbered variant
-            - These require a suffix of {Z}
-        3. Base words ending in voiceless consonants:
-            - i.e. Hope's, Pat's, Clark's, Ruth's
-            - In ARPAbet: {P}, {T}, {K}, {TH}
-            - These require a suffix of {S}
-        """
-        # Method to return phoneme and increment stat
-        def _resolve(phoneme: str) -> str:
-            self.stat_resolves['possessives'] += 1
-            return phoneme
-        core = word[:-2]  # Get core word without possessive
-        ph = self._lookup(core, ph_format='list')  # find core word using recursive search
-        if ph is None:
-            return None  # Core word not found
-        # [Case 1]
-        if ph[-1] in {'S', 'Z', 'CH', 'JH', 'SH', 'ZH'}:
-            ph += 'IH0' + 'Z'
-            return _resolve(ph)
-        # [Case 2]
-        """
-        Valid for case 2:
-        'AA', 'AO', 'EY', 'OW', 'UW', 'AE', 'AW', 'EH', 'IH',
-        'OY', 'AH', 'AY', 'ER', 'IY', 'UH', 'UH',
-        'B', 'D', 'G', 'M', 'N', 'R', 'L', 'NG'
-        To simplify matching, we will check for the listed single-letter variants and 'NG'
-        and then check for any numbered variant
-        """
-        if ph[-1] in {'B', 'D', 'G', 'M', 'N', 'R', 'L', 'NG'} or ph[-1][-1].isdigit():
-            ph += 'Z'
-            return _resolve(ph)
-        # [Case 3]
-        if ph[-1] in ['P', 'T', 'K', 'TH']:
-            ph += 'S'
-            return _resolve(ph)
-        return None  # No match found
-    def auto_contractions(self, word: str) -> str | None:
-        """
-        Auto contracts form and finds phonemes
-        :param word:
-        :return:
-        """
-        """
-        Supported contractions:
-        - 'll
-        - 'd
-        """
-        # First, check if the word is a contraction
-        parts = word.split("\'")  # Split on [']
-        if len(parts) == 1 or parts[1] not in {'ll', 'd'}:
-            return None  # No contraction found
-        if len(parts) > 2:
-            self.stat_unexpected['contraction'] += word
-            return None  # More than 2 parts, can't be a contraction
-        # If initial check passes, register a hit
-        self.stat_hits['contractions'] += 1
-        # Get the core word
-        core = parts[0]
-        # Get the phoneme for the core word recursively
-        ph = self._lookup(core, ph_format='list')
-        if ph is None:
-            return None  # Core word not found
-        # Add the phoneme with the appropriate suffix
-        if parts[1] == 'll':
-            ph += 'L'
-        elif parts[1] == 'd':
-            ph += 'D'
-        # Return the phoneme
-        self.stat_resolves['contractions'] += 1
-        return ph
-    def auto_hyphenated(self, word: str) -> str | None:
-        """
-        Splits hyphenated words and attempts to resolve components
-        :param word:
-        :return:
-        """
-        # First, check if the word is a hyphenated word
-        if '-' not in word:
-            return None  # No hyphen found
-        # If initial check passes, register a hit
-        self.stat_hits['hyphenated'] += 1
-        # Split the word into parts
-        parts = word.split('-')
-        # Get the phonemes for each part
-        ph = []
-        for part in parts:
-            ph_part = self._lookup(part, ph_format='sds')
-            if ph_part is None:
-                return None  # Part not found
-            ph.append(ph_part)
-        # Join the phonemes
-        ph = ' '.join(ph)
-        # Return the phoneme
-        self.stat_resolves['hyphenated'] += 1
-        return ph
-    def auto_compound(self, word: str) -> str | None:
-        """
-        Splits compound words and attempts to resolve components
-        :param word:
-        :return:
-        """
-        # Split word into parts
-        parts = self._segment(word)
-        if len(parts) == 1:
-            return None  # No compound found
-        # If initial check passes, register a hit
-        self.stat_hits['compound'] += 1
-        # Get the phonemes for each part
-        ph = []
-        for part in parts:
-            ph_part = self._lookup(part, ph_format='sds')
-            if ph_part is None:
-                return None  # Part not found
-            ph.append(ph_part)
-        # Join the phonemes
-        ph = ' '.join(ph)
-        # Return the phoneme
-        self.stat_resolves['compound'] += 1
-        return ph
-    def auto_plural(self, word: str, pos: str = None) -> str | None:
-        """
-        Finds singular form of plurals and attempts to resolve separately
-        Optionally a pos tag can be provided.
-        If no tags are provided, there will be a single word pos inference,
-        which is not ideal.
-        :param pos:
-        :param word:
-        :return:
-        """
-        # First, check if the word is a replaceable plural
-        # Needs to end in 's' or 'es'
-        if word[-1] != 's':
-            return None  # No plural found
-        # Now check if the word is a plural using pos
-        if pos is None:
-            pos = self._tag(word)
-        if pos is None or len(pos) == 0 or (pos[0] != 'NNS' and pos[0] != 'NNPS'):
-            return None  # No tag found
-        # If initial check passes, register a hit
-        self.stat_hits['plural'] += 1
-        """
-        Case 1:
-        > Word ends in 'oes'
-        > Remove the 'es' to get the singular
-        """
-        if len(word) > 3 and word[-3:] == 'oes':
-            singular = word[:-2]
-            # Look up the possessive form (since the pronunciation is the same)
-            ph = self.auto_possessives(singular + "'s")
-            if ph is not None:
-                self.stat_resolves['plural'] += 1
-                return ph  # Return the phoneme
-        """
-        Case 2:
-        > Word ends in 's'
-        > Remove the 's' to get the singular
-        """
-        if len(word) > 1 and word[-1] == 's':
-            singular = word[:-1]
-            # Look up the possessive form (since the pronunciation is the same)
-            ph = self.auto_possessives(singular + "'s")
-            if ph is not None:
-                self.stat_resolves['plural'] += 1
-                return ph  # Return the phoneme
-        # If no matches, return None
-        return None
-    def auto_stem(self, word: str) -> str | None:
-        """
-        Attempts to resolve using the root stem of a word.
-        Supported modes:
-            - "ing"
-            - "ingly"
-            - "ly"
-        :param word:
-        :return:
-        """
-        # noinspection SpellCheckingInspection
-        """
-        'ly' has no special rules, always add phoneme 'L IY0'
-        'ing' relevant rules:
-        > If the original verb ended in [e], remove it and add [ing]
-            - i.e. take -> taking, make -> making
-            - We will search once with the original verb, and once with [e] added
-                - 1st attempt: tak, mak
-                - 2nd attempt: take, make
-        > If the input word has a repeated consonant before [ing], it's likely that
-        the original verb has only 1 of the consonants
-            - i.e. running -> run, stopping -> stop
-            - We will search for repeated consonants, and perform 2 attempts:
-                - 1st attempt: without the repeated consonant (run, stop)
-                - 2nd attempt: with the repeated consonant (runn, stopp)
-        """
-        # Discontinue if word is too short
-        if len(word) < 3 or (not word.endswith('ly') and not word.endswith('ing')):
-            return None
-        # Register a hit
-        self.stat_hits['stem'] += 1  # Register hit
-        # For ly case
-        if word.endswith('ly'):
-            # Get the root word
-            root = word[:-2]
-            # Recursively get the root
-            ph_root = self._lookup(root, ph_format='sds')
-            # If not exist, return None
-            if ph_root is None:
-                return None
-            ph_ly = 'L IY0'
-            ph_joined = ' '.join([ph_root, ph_ly])
-            self.stat_resolves['stem'] += 1
-            return ph_joined
-        # For ing case
-        if word.endswith('ing'):
-            # Get the root word
-            root = word[:-3]
-            # Recursively get the root
-            ph_root = self._lookup(root, ph_format='sds')
-            # If not exist, return None
-            if ph_root is None:
-                return None
-            ph_ly = 'IH0 NG'
-            ph_joined = ' '.join([ph_root, ph_ly])
-            self.stat_resolves['stem'] += 1
-            return ph_joined
-    def auto_component(self, word: str) -> str | None:
-        """
-        Searches for target word as component of a larger word
-        :param word:
-        :return:
-        """
-        """
-        This processing step checks for words as a component of a larger word
-        - i.e. 'synth' is not in the cmu dictionary
-        - Stage 1: We will search for any word beginning with 'synth' (10 matches)
-            - This is because most unseen short words are likely shortened versions
-            - We will split
-        - Stage 2: Search for any word containing 'synth' (13 matches)
-        """
-        raise NotImplementedError
-    def auto_compound_l2(self, word: str, recursive: bool = True) -> str | None:
-        """
-        Searches for target word as a compound word.
-        > Does not use n-gram splitting like auto_compound()
-        > Splits words manually into every possible combination
-        > Returns the match with the highest length of both words
-        :param recursive: True to enable recursive lookups, otherwise only use base CMU dictionary
-        :param word:
-        :return:
-        """
-        # Word must be fully alphabetic
-        if not word.isalpha() or len(word) < 3:
-            return None
-        self.stat_hits['compound_l2'] += 1  # Register hit
-        # Define lookup mode
-        def _lu(search_word: str) -> str | None:
-            if recursive:
-                return self._lookup(search_word, ph_format='sds')
-            else:
-                return self._cmu_get(search_word)
-        # Check if the last part is a single character
-        # And that it is repeated in the last char of the first part
-        # This is likely silent so remove it
-        # i.e. 'Derakk' -> 'Derak'
-        # If the word contains a repeated consonant at the end, remove it
-        # First check repeated last 2 letters
-        if word[-2:][0] == word[-2:][1]:
-            # Remove the last char from the word
-            word = word[:-1]
-        # Holds all matches as tuples
-        # (len1, len2, p1, p2, ph1, ph2)
-        matches = []
-        # Splits the word into every possible combination
-        for i in range(1, len(word)):
-            p1 = word[:i]
-            p2 = word[i:]
-            # Looks up both words
-            ph1 = _lu(p1)
-            if ph1 is None:
-                continue  # Skip if not found
-            ph2 = _lu(p2)
-            if ph2 is None:
-                continue  # Skip if not found
-            # If both words exist, add to list as tuple
-            matches.append((len(p1), len(p2), p1, p2, ph1, ph2))
-        # Pick the match with the highest length of both words
-        if len(matches) == 0:
-            return None
-        else:
-            # Sort by the minimum of len1 and len2
-            matches.sort(key=lambda x: min(x[0], x[1]))
-            # Get the highest minimum length match
-            match = matches[-1]
-            # Otherwise, return the full joined match
-            self.stat_resolves['compound_l2'] += 1  # Register resolve
-            return match[4] + ' ' + match[5]

resources/app/python/xvapitch/text/h2p_parser/symbols.py DELETED Viewed

@@ -1,82 +0,0 @@
-# Holds symbols for graphemes, phonemes, and pos-tags.
-# noinspection SpellCheckingInspection,GrazieInspection
-"""
-POS tag list:
-CC      coordinating conjunction
-CD	    cardinal digit
-DT	    determiner
-EX	    existential there ("there is" -> "there exists")
-FW	    foreign word
-IN	    preposition/subordinating conjunction
-JJ	    adjective  ('big')
-JJR	    adjective, comparative	('bigger')
-JJS	    adjective, superlative	('biggest')
-LS	    list marker	("1)", "2)", "3)")
-MD	    modal	('could', 'will')
-NN	    noun, singular
-NNS	    noun plural
-NNP	    proper noun, singular	'Harrison'
-NNPS	proper noun, plural	'Americans'
-PDT	    predeterminer	('all' in 'all the kids')
-POS	    possessive ending	(parent's)
-PRP	    personal pronoun	(I, he, she)
-PRP$	possessive pronoun	(my, his, hers)
-RB	    adverb	('very', 'silently')
-RBR     adverb, comparative	('better')
-RBS     adverb, superlative	('best')
-RP      particle	('give up')
-TO      to	("go 'to' the store.")
-UH	    interjection	("errrrrrrrm")
-VB	    verb, base form	take
-VBD	    verb, past tense	took
-VBG	    verb, gerund/present participle	taking
-VBN	    verb, past participle	taken
-VBP	    verb, sing. present, non-3d	take
-VBZ	    verb, 3rd person sing. present	takes
-WDT	    wh-determiner	which
-WP	    wh-pronoun	who, what
-WP$	    possessive wh-pronoun	whose
-WRB	    wh-abverb	where, when
-"""
-from __future__ import annotations
-# noinspection SpellCheckingInspection,GrazieInspection
-graphemes = list("abcdefghijklmnopqrstuvwxyz")
-phonemes = ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
-            'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 'DH',
-            'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH',
-            'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG',
-            'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH',
-            'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH']
-pos_tags = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS',
-            'NNP', 'NNPS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'TO', 'UH',
-            'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']
-pos_type_tags = ['VERB', 'NOUN', 'PRON', 'ADJ', 'ADV']
-pos_type_short_tags = ['V', 'N', 'P', 'A', 'R']
-pos_type_form_dict = {'V': 'VERB', 'N': 'NOUN', 'P': 'PRON', 'A': 'ADJ', 'R': 'ADV'}
-graphemes_set = set(graphemes)
-phonemes_set = set(phonemes)
-pos_tags_set = set(pos_tags)
-pos_type_tags_set = set(pos_type_tags)
-pos_type_short_tags_set = set(pos_type_short_tags)
-punctuation = {'.', ',', ':', ';', '?', '!', '-', '_', '\'', '\"', '`', '~', '@', '#', '$'}
-consonants = {'B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M', 'N', 'NG', 'P', 'R',
-              'S', 'SH', 'T', 'TH', 'V', 'W', 'Y', 'Z', 'ZH'}
-# Method to convert from short type tags to full type tags.
-def to_full_type_tag(short_type_tag: str) -> str | None:
-    if short_type_tag == 'V':
-        return 'VERB'
-    elif short_type_tag == 'N':
-        return 'NOUN'
-    elif short_type_tag == 'P':
-        return 'PRON'
-    elif short_type_tag == 'A':
-        return 'ADJ'
-    elif short_type_tag == 'R':
-        return 'ADV'
-    else:
-        return None

resources/app/python/xvapitch/text/h2p_parser/text/__init__.py DELETED Viewed

File without changes

resources/app/python/xvapitch/text/h2p_parser/text/numbers.py DELETED Viewed

@@ -1,166 +0,0 @@
-# Provides parsing of numbers to text
-"""
-This module provides parsing of numeric types in English to text.
-Modified from https://github.com/keithito/tacotron
-"""
-import inflect
-import re
-_magnitudes = ['trillion', 'billion', 'million', 'thousand', 'hundred', 'm', 'b', 't']
-_magnitudes_key = {'m': 'million', 'b': 'billion', 't': 'trillion'}
-_measurements = '(f|c|k|d|m|km|ft)'
-_measurements_key = {'f': 'fahrenheit',
-                     'c': 'celsius',
-                     'k': 'thousand',
-                     'm': 'meters',
-                     'km': 'kilometers',
-                     'ft': 'feet'}
-_currency_key = {'$': 'dollar', '£': 'pound', '€': 'euro', '₩': 'won'}
-_inflect = inflect.engine()
-_comma_number_re = re.compile(r'([0-9][0-9,]+[0-9])')
-_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
-_currency_re = re.compile(r'([$€£₩])([0-9.,]*[0-9]+)(?:[ ]?({})(?=[^a-zA-Z]|$))?'.format("|".join(_magnitudes)),
-                          re.IGNORECASE)
-_measurement_re = re.compile(r'([0-9.,]*[0-9]+(\s)?{}\b)'.format(_measurements), re.IGNORECASE)
-_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
-_range_re = re.compile(r'(?<=[0-9])+(-)(?=[0-9])+.*?')
-_roman_re = re.compile(r'\b(?=[MDCLXVI]+\b)M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{2,3})\b')  # avoid I
-_multiply_re = re.compile(r'(\b[0-9]+)(x)([0-9]+)')
-_number_re = re.compile(r"[0-9]+'s|[0-9]+s|[0-9]+")
-def _remove_commas(m):
-    return m.group(1).replace(',', '')
-def _expand_decimal_point(m):
-    return m.group(1).replace('.', ' point ')
-def _expand_currency(m):
-    currency = _currency_key[m.group(1)]
-    quantity = m.group(2)
-    magnitude = m.group(3)
-    # remove commas from quantity to be able to convert to numerical
-    quantity = quantity.replace(',', '')
-    # check for million, billion, etc...
-    if magnitude is not None and magnitude.lower() in _magnitudes:
-        if len(magnitude) == 1:
-            magnitude = _magnitudes_key[magnitude.lower()]
-        return "{} {} {}".format(_expand_hundreds(quantity), magnitude, currency + 's')
-    parts = quantity.split('.')
-    if len(parts) > 2:
-        return quantity + " " + currency + "s"  # Unexpected format
-    dollars = int(parts[0]) if parts[0] else 0
-    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
-    if dollars and cents:
-        dollar_unit = currency if dollars == 1 else currency + 's'
-        cent_unit = 'cent' if cents == 1 else 'cents'
-        return "{} {}, {} {}".format(
-            _expand_hundreds(dollars), dollar_unit,
-            _inflect.number_to_words(cents), cent_unit)
-    elif dollars:
-        dollar_unit = currency if dollars == 1 else currency + 's'
-        return "{} {}".format(_expand_hundreds(dollars), dollar_unit)
-    elif cents:
-        cent_unit = 'cent' if cents == 1 else 'cents'
-        return "{} {}".format(_inflect.number_to_words(cents), cent_unit)
-    else:
-        return 'zero' + ' ' + currency + 's'
-def _expand_hundreds(text):
-    number = float(text)
-    if 1000 < number < 10000 and (number % 100 == 0) and (number % 1000 != 0):
-        return _inflect.number_to_words(int(number / 100)) + " hundred"
-    else:
-        return _inflect.number_to_words(text)
-def _expand_ordinal(m):
-    return _inflect.number_to_words(m.group(0))
-def _expand_measurement(m):
-    _, number, measurement = re.split(r'(\d+(?:\.\d+)?)', m.group(0))
-    number = _inflect.number_to_words(number)
-    measurement = "".join(measurement.split())
-    measurement = _measurements_key[measurement.lower()]
-    # if measurement is plural, and number is singular, remove the 's'
-    if number == "one" and str.endswith(measurement, "s"):
-        # Remove the 's' from the end of the measurement
-        measurement = measurement[:-1]
-    return "{} {}".format(number, measurement)
-def _expand_range(m):
-    return ' to '
-def _expand_multiply(m):
-    left = m.group(1)
-    right = m.group(3)
-    return "{} by {}".format(left, right)
-def _expand_roman(m):
-    # from https://stackoverflow.com/questions/19308177/converting-roman-numerals-to-integers-in-python
-    roman_numerals = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
-    result = 0
-    num = m.group(0)
-    for i, c in enumerate(num):
-        if (i + 1) == len(num) or roman_numerals[c] >= roman_numerals[num[i + 1]]:
-            result += roman_numerals[c]
-        else:
-            result -= roman_numerals[c]
-    return str(result)
-def _expand_number(m):
-    _, number, suffix = re.split(r"(\d+(?:'?\d+)?)", m.group(0))
-    number = int(number)
-    if number > 1000 < 10000 and (number % 100 == 0) and (number % 1000 != 0):
-        text = _inflect.number_to_words(number // 100) + " hundred"
-    elif 1000 < number < 3000:
-        if number == 2000:
-            text = 'two thousand'
-        elif 2000 < number < 2010:
-            text = 'two thousand ' + _inflect.number_to_words(number % 100)
-        elif number % 100 == 0:
-            text = _inflect.number_to_words(number // 100) + ' hundred'
-        else:
-            number = _inflect.number_to_words(number, andword='', zero='oh', group=2).replace(', ', ' ')
-            number = re.sub(r'-', ' ', number)
-            text = number
-    else:
-        number = _inflect.number_to_words(number, andword='and')
-        number = re.sub(r'-', ' ', number)
-        number = re.sub(r',', '', number)
-        text = number
-    if suffix in ("'s", "s"):
-        if text[-1] == 'y':
-            text = text[:-1] + 'ies'
-        else:
-            text = text + suffix
-    return text
-def normalize_numbers(text):
-    text = re.sub(_comma_number_re, _remove_commas, text)
-    text = re.sub(_currency_re, _expand_currency, text)
-    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
-    text = re.sub(_ordinal_re, _expand_ordinal, text)
-    # text = re.sub(_range_re, _expand_range, text)
-    text = re.sub(_measurement_re, _expand_measurement, text)
-    text = re.sub(_roman_re, _expand_roman, text)
-    text = re.sub(_multiply_re, _expand_multiply, text)
-    text = re.sub(_number_re, _expand_number, text)
-    return text

resources/app/python/xvapitch/text/h2p_parser/utils/__init__.py DELETED Viewed

File without changes

resources/app/python/xvapitch/text/h2p_parser/utils/converter.py DELETED Viewed

@@ -1,79 +0,0 @@
-# Converts dictionary files
-import json
-import os
-from .. import symbols
-from .. import format_ph as ph
-from tqdm import tqdm
-def from_binary_delim(path, delimiter) -> dict:
-    # Converts a delimited binary state heteronym look-up dictionary to a dict format
-    # Expected format: WORD|(Space Seperated Phonemes Case)|(Space Seperated Phonemes Default)|(Case)
-    # Example: "REJECT|R IH0 JH EH1 K T|R IY1 JH EH0 K T|V"
-    # Hashtag comments are allowed but only at the start of a file
-    # Import file
-    result_dict = {}
-    num_lines = sum(1 for line in open(path, 'r'))
-    with open(path, 'r') as f:
-        skipped_comments = False
-        for line in tqdm(f, total=num_lines):
-            # Skip comments
-            if not skipped_comments:
-                if line.startswith('#') or line == '\n':
-                    continue
-                else:
-                    skipped_comments = True
-            # Skip empty or newline lines
-            if line.strip() == '' or line.strip() == '\n':
-                continue
-            # Parse line using passed delimiter
-            tokens = line.strip().split(delimiter)
-            # Check for correct number of tokens
-            if len(tokens) != 4:
-                raise ValueError('Invalid number of tokens in line: ' + line)
-            # Get word (token 0) and check validity (no spaces)
-            word = tokens[0].lower()
-            if ' ' in word:
-                raise ValueError('Invalid word in line: ' + line)
-            # Get phonemes and check validity (alphanumeric)
-            ph_case = tokens[1]
-            ph_default = tokens[2]
-            if not ph_case.replace(' ', '').isalnum() or not ph_default.replace(' ', '').isalnum():
-                raise ValueError('Invalid phonemes in line: ' + line)
-            # Get case (token 3) and check validity (alphanumeric)
-            case = tokens[3]
-            if not case.isalnum():
-                raise ValueError('Invalid case in line: ' + line)
-            # Check if case is a full case or full type case
-            if case in symbols.pos_tags_set or case in symbols.pos_type_tags_set:
-                # Add to dictionary directly
-                # Build sub-dictionary for each case
-                sub_dict = result_dict.get(word, {})
-                sub_dict[case] = ph.to_sds(ph_case)
-                sub_dict['DEFAULT'] = ph.to_sds(ph_default)
-                result_dict[word] = sub_dict
-            # Check if case is a short type case
-            elif case in symbols.pos_type_short_tags_set:
-                # Need to convert to full type case
-                sub_dict = result_dict.get(word, {})
-                case_short = symbols.pos_type_form_dict[case]
-                sub_dict[case_short] = ph.to_sds(ph_case)
-                sub_dict['DEFAULT'] = ph.to_sds(ph_default)
-                result_dict[word] = sub_dict
-            else:
-                raise ValueError('Invalid case in line: ' + line)
-    return result_dict
-# Method to write a dict to a json file
-def to_json(path, dict_to_write):
-    # Writes a dictionary to a json file
-    with open(path, 'w') as f:
-        json.dump(dict_to_write, f, indent=4, sort_keys=True)
-# Combined method to convert binary delimited files to json
-def bin_delim_to_json(path, output_path, delimiter):
-    to_json(output_path, from_binary_delim(path, delimiter))

resources/app/python/xvapitch/text/h2p_parser/utils/parser.py DELETED Viewed

@@ -1,133 +0,0 @@
-# Parses annotation files for conversion of sentences to phonemes
-from __future__ import annotations
-from h2p_parser import cmudictext
-from h2p_parser.filter import filter_text
-from h2p_parser.text.numbers import normalize_numbers
-from h2p_parser.symbols import punctuation
-# Reads a file into a list of lines
-from tqdm import tqdm
-def read_file(file_name, delimiter) -> list:
-    with open(file_name, 'r', encoding="utf-8") as f:
-        result = []
-        for line in f:
-            line = line.split(delimiter)
-            # Take the second element
-            result.append(line[1].lower())
-        return result
-# Method that checks if a single line is resolvable
-# Checks a list of lines for unresolvable words
-# Returns a list of lines with unresolvable words, or None if no unresolvable words
-def check_lines(lines: list) -> ParseResult:
-    cde = cmudictext.CMUDictExt()
-    # Holds result
-    result = ParseResult()
-    # Loop with nqdm
-    for line in tqdm(lines, desc='Checking lines'):
-        # Add
-        result.all_lines.append(line)
-        result.lines.add(line)
-        # If line contains het, add to result
-        if cde.h2p.contains_het(line):
-            result.all_lines_cont_het.append(line)
-        # Filter the line
-        f_line = filter_text(line)
-        # Number converter
-        f_line = normalize_numbers(f_line)
-        # Tokenize
-        tokens = cde.h2p.tokenize(f_line)
-        for word in tokens:
-            # Skip word if punctuation
-            if word in punctuation:
-                continue
-            # Add word to result
-            result.all_words.append(word)
-            result.words.add(word)
-            # Check if word is resolvable
-            h2p_res = cde.h2p.contains_het(word)
-            cmu_res = cde.dict.get(word) is not None
-            fet_res = cde.lookup(word) is not None
-            if not h2p_res and not cmu_res and not fet_res:
-                # If word ends in "'s", remove it and add the base word
-                if word.endswith("'s"):
-                    word = word[:-2]
-                result.unres_all_lines.append(line)
-                result.unres_all_words.append(word)
-                result.unres_lines.add(line)
-                result.unres_words.add(word)
-            elif h2p_res:
-                result.n_words_res += 1
-                result.n_words_het += 1
-            elif cmu_res:
-                result.n_words_res += 1
-                result.n_words_cmu += 1
-            elif fet_res:
-                result.n_words_res += 1
-                result.n_words_fet += 1
-    # Also pass stats
-    result.ft_stats = cde.p.stat_resolves
-    return result
-# Class to hold the result of a parse
-class ParseResult:
-    def __init__(self):
-        self.all_lines = []
-        self.all_lines_cont_het = []
-        self.unres_all_lines = []
-        self.lines = set()
-        self.unres_lines = set()
-        # Words
-        self.all_words = []
-        self.unres_all_words = []
-        self.words = set()
-        self.unres_words = set()
-        # Numerical stats
-        self.n_words_res = 0  # Number of total resolved words
-        self.n_words_cmu = 0  # Resolved words from CMU
-        self.n_words_fet = 0  # Resolved words from Features
-        self.n_words_het = 0  # Resolved words from H2p
-        # Stats from cmudictext
-        self.ft_stats = None
-    # Get percentage of lines covered
-    def line_unique_coverage(self) -> float:
-        dec = 1 - len(self.unres_lines) / len(self.lines)
-        return round(dec * 100, 2)
-    # Get percentage of words covered
-    def word_unique_coverage(self) -> float:
-        dec = 1 - len(self.unres_words) / len(self.words)
-        return round(dec * 100, 2)
-    # Get percentage of lines covered (All)
-    def line_coverage(self) -> float:
-        dec = 1 - len(self.unres_all_lines) / len(self.all_lines)
-        return round(dec * 100, 2)
-    # Get percentage of words covered (All)
-    def word_coverage(self) -> float:
-        dec = 1 - len(self.unres_all_words) / len(self.all_words)
-        return round(dec * 100, 2)
-    # Get percentage of heteronyms containing lines
-    def percent_line_het(self) -> float:
-        dec = len(self.all_lines_cont_het) / len(self.all_lines)
-        return round(dec * 100, 2)
-    # Get percentage of words resolved by H2p
-    def percent_word_h2p(self) -> float:
-        dec = self.n_words_het / self.n_words_res
-        return round(dec * 100, 2)
-    # Get percentage of words resolved by CMU
-    def percent_word_cmu(self) -> float:
-        dec = self.n_words_cmu / self.n_words_res
-        return round(dec * 100, 2)