NorHsangPha commited on
Commit
4e8b6c1
1 Parent(s): 703afb1

Update: update fasttext model

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. __pycache__/lid.cpython-310.pyc +0 -0
  3. fasttext/model.bin +0 -3
  4. lid.py +15 -8
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ fasttext/models
__pycache__/lid.cpython-310.pyc CHANGED
Binary files a/__pycache__/lid.cpython-310.pyc and b/__pycache__/lid.cpython-310.pyc differ
 
fasttext/model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
3
- size 1176355829
 
 
 
 
lid.py CHANGED
@@ -1,16 +1,23 @@
 
1
  import fasttext
2
  from huggingface_hub import hf_hub_download
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
 
6
- # Download the model from Hugging Face Hub
7
- # model_path = hf_hub_download(
8
- # # repo_id="cis-lmu/glotlid", filename="model.bin", cache_dir="./glotlid"
9
- # repo_id="facebook/fasttext-language-identification",
10
- # filename="model.bin",
11
- # cache_dir="fasttext",
12
- # )
13
- model = fasttext.load_model("fasttext/model.bin")
 
 
 
 
 
 
14
 
15
 
16
  def identify_languages(title):
 
1
+ import os
2
  import fasttext
3
  from huggingface_hub import hf_hub_download
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
 
7
+
8
+ def load_model(repo_id: str) -> fasttext.FastText._FastText:
9
+ model_path = hf_hub_download(
10
+ repo_id,
11
+ filename="model.bin",
12
+ cache_dir="fasttext/models",
13
+ )
14
+
15
+ return fasttext.load_model(model_path)
16
+
17
+
18
+ # repo_id="cis-lmu/glotlid"
19
+ repo_id = "facebook/fasttext-language-identification"
20
+ model = load_model(repo_id)
21
 
22
 
23
  def identify_languages(title):