Spaces:
Runtime error
Runtime error
from typing import Dict, List, Set | |
from spacy.cli.download import get_compatibility | |
def metrics_options() -> List[str]: | |
return [ | |
"descriptive_stats", | |
"readability", | |
"dependency_distance", | |
"pos_proportions", | |
"coherence", | |
"quality", | |
"information_theory", | |
] | |
def language_options() -> Dict[str, str]: | |
return { | |
"Catalan": "ca", | |
"Chinese": "zh", | |
"Croatian": "hr", | |
"Danish": "da", | |
"Dutch": "nl", | |
"English": "en", | |
"Finnish": "fi", | |
"French": "fr", | |
"German": "de", | |
"Greek": "el", | |
"Italian": "it", | |
"Japanese": "ja", | |
"Korean": "ko", | |
"Lithuanian": "lt", | |
"Macedonian": "mk", | |
"Multi-language": "xx", | |
"Norwegian Bokmål": "nb", | |
"Polish": "pl", | |
"Portuguese": "pt", | |
"Romanian": "ro", | |
"Russian": "ru", | |
"Spanish": "es", | |
"Swedish": "sv", | |
"Ukrainian": "uk", | |
} | |
################# | |
# Model options # | |
################# | |
def all_model_size_options_pretty_to_short() -> Dict[str, str]: | |
return { | |
"Small": "sm", | |
"Medium": "md", | |
"Large": "lg", | |
# "Transformer": "trf" # Disabled for now | |
} | |
def all_model_size_options_short_to_pretty() -> Dict[str, str]: | |
return { | |
short: pretty | |
for pretty, short in all_model_size_options_pretty_to_short().items() | |
} | |
def available_model_size_options(lang) -> List[str]: | |
short_to_pretty = all_model_size_options_short_to_pretty() | |
if lang == "all": | |
return sorted(list(short_to_pretty.values())) | |
return sorted( | |
[ | |
short_to_pretty[short] | |
for short in ModelAvailabilityChecker.available_model_sizes_for_language( | |
lang | |
) | |
] | |
) | |
class ModelAvailabilityChecker: | |
def available_models() -> List[str]: | |
return list(get_compatibility().keys()) | |
def extract_language_and_size() -> List[List[str]]: | |
# [["ca", "sm"], ["en", "lg"], ...] | |
return list( | |
[ | |
list(map(m.split("_").__getitem__, [0, -1])) | |
for m in ModelAvailabilityChecker.available_models() | |
] | |
) | |
def model_is_available(lang: str, size: str) -> bool: | |
lang_and_size = set( | |
[ | |
"_".join(lang_size) | |
for lang_size in ModelAvailabilityChecker.extract_language_and_size() | |
] | |
) | |
return f"{lang}_{size}" in lang_and_size | |
def available_model_sizes_for_language(lang: str) -> Set[str]: | |
return set([ | |
size | |
for (lang_, size) in ModelAvailabilityChecker.extract_language_and_size() | |
if lang_ == lang and size in all_model_size_options_pretty_to_short().values() | |
]) | |