|
import json |
|
|
|
import streamlit as st |
|
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline |
|
|
|
with open("config.json") as f: |
|
cfg = json.loads(f.read()) |
|
|
|
|
|
@st.cache(allow_output_mutation=True, show_spinner=False) |
|
def load_model(input_text, model_name_or_path): |
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) |
|
model = RobertaForSequenceClassification.from_pretrained(model_name_or_path) |
|
|
|
nlp = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
result = nlp(input_text) |
|
return result |
|
|
|
|
|
def app(): |
|
st.title("मराठी Marathi News Classifier") |
|
|
|
st.markdown( |
|
"This demo uses the below fine-tuned models for marathi news classification:\ |
|
" |
|
"- [IndicNLP Marathi News Classifier](https://huggingface.co/flax-community/mr-indicnlp-classifier) fine-tuned on " |
|
"[IndicNLP Marathi News Dataset](https://github.com/ai4bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset)\ |
|
" |
|
"> `IndicNLP` model predicts one of these 3 classes - `['lifestyle', 'entertainment', 'sports']`\ |
|
" |
|
"- [iNLTK Marathi News Classifier](https://huggingface.co/flax-community/mr-inltk-classifier) fine-tuned on " |
|
"[Marathi News Dataset](https://www.kaggle.com/disisbig/marathi-news-dataset)\ |
|
" |
|
"> `iNLTK` model predicts one of these 3 classes - `['state', 'entertainment', 'sports']`" |
|
) |
|
|
|
classifier = st.sidebar.selectbox("Select a Model", index=0, options=["IndicNLP", "iNLTK"]) |
|
|
|
st.sidebar.markdown( |
|
"**IndicNLP Classes**\ |
|
" |
|
"- lifestyle\ |
|
" |
|
"- entertainment\ |
|
" |
|
"- sports\ |
|
" |
|
"\ |
|
" |
|
"**iNLTK Classes**\ |
|
" |
|
"- state\ |
|
" |
|
"- entertainment\ |
|
" |
|
"- sports" |
|
) |
|
|
|
sample_texts = [ |
|
"रोहित शर्माने सरावाला सुरुवात करण्यापूर्वी भारतीय खेळाडूला दिला कानमंत्र, म्हणाला...", |
|
"जॉनी लीवर यांनी नम्रता संभेरावला दिलं खास गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना", |
|
"Custom", |
|
] |
|
model_name_or_path = cfg["models"][classifier] |
|
|
|
text_to_classify = st.selectbox("Select a Text", options=sample_texts, index=len(sample_texts) - 1) |
|
|
|
if text_to_classify == "Custom": |
|
text_to_classify = st.text_input("Enter custom text:") |
|
|
|
predict_button = st.button("Predict") |
|
|
|
if predict_button: |
|
with st.spinner("Generating prediction..."): |
|
result = load_model(text_to_classify, model_name_or_path) |
|
|
|
st.markdown("## Predicted Label: `{}`".format(result[0]["label"])) |
|
st.markdown("## Confidence: `{}`%".format(round(result[0]["score"], 3) * 100)) |
|
|
|
st.markdown("- - -") |
|
st.markdown( |
|
"❓ Can't figure out where to get a sample text other than the predefined ones? ❓\ |
|
" |
|
"\ |
|
" |
|
"We have provided Marathi newspaper links (section wise) below. Head over to any section of your choice, " |
|
"copy any headline and paste below to see if the model is predicting the respective class correctly or not?\ |
|
" |
|
"- [entertainment](https://maharashtratimes.com/entertainment/articlelist/19359255.cms)\ |
|
" |
|
"- [sports](https://maharashtratimes.com/sports/articlelist/2429056.cms)\ |
|
" |
|
"- [lifestyle](https://maharashtratimes.com/lifestyle-news/articlelist/2429025.cms)\ |
|
" |
|
"- [state](https://maharashtratimes.com/maharashtra/articlelist/2429066.cms)\ |
|
" |
|
"> 📒 NOTE: Both models are not trained on above headlines! Feel free to use any headline from any newspaper" |
|
) |
|
|