File size: 406 Bytes
6c0177c
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
from huggingface_hub import snapshot_download

def load_tokenizer_from_hub(repo_name: str = "Kyudan/TeXBLEU-Tokenizer") -> Tokenizer:
    # Hugging Face Hub์—์„œ ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ
        snapshot_dir = snapshot_download(repo_id=repo_name)
    
    # ๋‹ค์šด๋กœ๋“œํ•œ ํ† ํฌ๋‚˜์ด์ € ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
        tokenizer = Tokenizer.from_file(f"{snapshot_dir}/tokenizer.json")
        return tokenizer