File size: 406 Bytes
6c0177c |
1 2 3 4 5 6 7 8 9 |
from huggingface_hub import snapshot_download
def load_tokenizer_from_hub(repo_name: str = "Kyudan/TeXBLEU-Tokenizer") -> Tokenizer:
# Hugging Face Hub์์ ํ ํฌ๋์ด์ ๋ค์ด๋ก๋
snapshot_dir = snapshot_download(repo_id=repo_name)
# ๋ค์ด๋ก๋ํ ํ ํฌ๋์ด์ ๋ถ๋ฌ์ค๊ธฐ
tokenizer = Tokenizer.from_file(f"{snapshot_dir}/tokenizer.json")
return tokenizer |