NorHsangPha
commited on
Commit
•
4e8b6c1
1
Parent(s):
703afb1
Update: update fasttext model
Browse files- .gitignore +1 -0
- __pycache__/lid.cpython-310.pyc +0 -0
- fasttext/model.bin +0 -3
- lid.py +15 -8
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fasttext/models
|
__pycache__/lid.cpython-310.pyc
CHANGED
Binary files a/__pycache__/lid.cpython-310.pyc and b/__pycache__/lid.cpython-310.pyc differ
|
|
fasttext/model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
|
3 |
-
size 1176355829
|
|
|
|
|
|
|
|
lid.py
CHANGED
@@ -1,16 +1,23 @@
|
|
|
|
1 |
import fasttext
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
import matplotlib.pyplot as plt
|
4 |
import seaborn as sns
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
def identify_languages(title):
|
|
|
1 |
+
import os
|
2 |
import fasttext
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
6 |
|
7 |
+
|
8 |
+
def load_model(repo_id: str) -> fasttext.FastText._FastText:
|
9 |
+
model_path = hf_hub_download(
|
10 |
+
repo_id,
|
11 |
+
filename="model.bin",
|
12 |
+
cache_dir="fasttext/models",
|
13 |
+
)
|
14 |
+
|
15 |
+
return fasttext.load_model(model_path)
|
16 |
+
|
17 |
+
|
18 |
+
# repo_id="cis-lmu/glotlid"
|
19 |
+
repo_id = "facebook/fasttext-language-identification"
|
20 |
+
model = load_model(repo_id)
|
21 |
|
22 |
|
23 |
def identify_languages(title):
|