sseung0703
commited on
Commit
•
ea793a2
1
Parent(s):
4de2b91
update
Browse files- app.py +0 -10
- app/docs.py +0 -98
- multiapp.py +0 -10
app.py
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from app import docs
|
3 |
-
from multiapp import MultiApp
|
4 |
-
def main():
|
5 |
-
img = st.sidebar.image("images/huggingface-marathi-roberta.png", width=230)
|
6 |
-
app = MultiApp()
|
7 |
-
app.add_app("Text Classification", docs.app)
|
8 |
-
app.run()
|
9 |
-
if __name__ == "__main__":
|
10 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/docs.py
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
|
3 |
-
import streamlit as st
|
4 |
-
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
|
5 |
-
|
6 |
-
with open("config.json") as f:
|
7 |
-
cfg = json.loads(f.read())
|
8 |
-
|
9 |
-
|
10 |
-
@st.cache(allow_output_mutation=True, show_spinner=False)
|
11 |
-
def load_model(input_text, model_name_or_path):
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
13 |
-
model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)
|
14 |
-
|
15 |
-
nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
16 |
-
result = nlp(input_text)
|
17 |
-
return result
|
18 |
-
|
19 |
-
|
20 |
-
def app():
|
21 |
-
st.title("मराठी Marathi News Classifier")
|
22 |
-
|
23 |
-
st.markdown(
|
24 |
-
"This demo uses the below fine-tuned models for marathi news classification:\
|
25 |
-
"
|
26 |
-
"- [IndicNLP Marathi News Classifier](https://huggingface.co/flax-community/mr-indicnlp-classifier) fine-tuned on "
|
27 |
-
"[IndicNLP Marathi News Dataset](https://github.com/ai4bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset)\
|
28 |
-
"
|
29 |
-
"> `IndicNLP` model predicts one of these 3 classes - `['lifestyle', 'entertainment', 'sports']`\
|
30 |
-
"
|
31 |
-
"- [iNLTK Marathi News Classifier](https://huggingface.co/flax-community/mr-inltk-classifier) fine-tuned on "
|
32 |
-
"[Marathi News Dataset](https://www.kaggle.com/disisbig/marathi-news-dataset)\
|
33 |
-
"
|
34 |
-
"> `iNLTK` model predicts one of these 3 classes - `['state', 'entertainment', 'sports']`"
|
35 |
-
)
|
36 |
-
|
37 |
-
classifier = st.sidebar.selectbox("Select a Model", index=0, options=["IndicNLP", "iNLTK"])
|
38 |
-
|
39 |
-
st.sidebar.markdown(
|
40 |
-
"**IndicNLP Classes**\
|
41 |
-
"
|
42 |
-
"- lifestyle\
|
43 |
-
"
|
44 |
-
"- entertainment\
|
45 |
-
"
|
46 |
-
"- sports\
|
47 |
-
"
|
48 |
-
"\
|
49 |
-
"
|
50 |
-
"**iNLTK Classes**\
|
51 |
-
"
|
52 |
-
"- state\
|
53 |
-
"
|
54 |
-
"- entertainment\
|
55 |
-
"
|
56 |
-
"- sports"
|
57 |
-
)
|
58 |
-
|
59 |
-
sample_texts = [
|
60 |
-
"रोहित शर्माने सरावाला सुरुवात करण्यापूर्वी भारतीय खेळाडूला दिला कानमंत्र, म्हणाला...",
|
61 |
-
"जॉनी लीवर यांनी नम्रता संभेरावला दिलं खास गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना",
|
62 |
-
"Custom",
|
63 |
-
]
|
64 |
-
model_name_or_path = cfg["models"][classifier]
|
65 |
-
|
66 |
-
text_to_classify = st.selectbox("Select a Text", options=sample_texts, index=len(sample_texts) - 1)
|
67 |
-
|
68 |
-
if text_to_classify == "Custom":
|
69 |
-
text_to_classify = st.text_input("Enter custom text:")
|
70 |
-
|
71 |
-
predict_button = st.button("Predict")
|
72 |
-
|
73 |
-
if predict_button:
|
74 |
-
with st.spinner("Generating prediction..."):
|
75 |
-
result = load_model(text_to_classify, model_name_or_path)
|
76 |
-
|
77 |
-
st.markdown("## Predicted Label: `{}`".format(result[0]["label"]))
|
78 |
-
st.markdown("## Confidence: `{}`%".format(round(result[0]["score"], 3) * 100))
|
79 |
-
|
80 |
-
st.markdown("- - -")
|
81 |
-
st.markdown(
|
82 |
-
"❓ Can't figure out where to get a sample text other than the predefined ones? ❓\
|
83 |
-
"
|
84 |
-
"\
|
85 |
-
"
|
86 |
-
"We have provided Marathi newspaper links (section wise) below. Head over to any section of your choice, "
|
87 |
-
"copy any headline and paste below to see if the model is predicting the respective class correctly or not?\
|
88 |
-
"
|
89 |
-
"- [entertainment](https://maharashtratimes.com/entertainment/articlelist/19359255.cms)\
|
90 |
-
"
|
91 |
-
"- [sports](https://maharashtratimes.com/sports/articlelist/2429056.cms)\
|
92 |
-
"
|
93 |
-
"- [lifestyle](https://maharashtratimes.com/lifestyle-news/articlelist/2429025.cms)\
|
94 |
-
"
|
95 |
-
"- [state](https://maharashtratimes.com/maharashtra/articlelist/2429066.cms)\
|
96 |
-
"
|
97 |
-
"> 📒 NOTE: Both models are not trained on above headlines! Feel free to use any headline from any newspaper"
|
98 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
multiapp.py
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
class MultiApp:
|
3 |
-
def __init__(self):
|
4 |
-
self.apps = []
|
5 |
-
def add_app(self, title, func):
|
6 |
-
self.apps.append({"title": title, "function": func})
|
7 |
-
def run(self):
|
8 |
-
st.sidebar.header("Tasks")
|
9 |
-
app = st.sidebar.radio("", self.apps, format_func=lambda app: app["title"])
|
10 |
-
app["function"]()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|