File size: 3,768 Bytes
22df56c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json

import streamlit as st
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline

with open("config.json") as f:
    cfg = json.loads(f.read())


@st.cache(allow_output_mutation=True, show_spinner=False)
def load_model(input_text, model_name_or_path):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)

    nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
    result = nlp(input_text)
    return result


def app():
    st.title("मराठी Marathi News Classifier")

    st.markdown(
        "This demo uses the below fine-tuned models for marathi news classification:\
"
        "- [IndicNLP Marathi News Classifier](https://huggingface.co/flax-community/mr-indicnlp-classifier) fine-tuned on "
        "[IndicNLP Marathi News Dataset](https://github.com/ai4bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset)\
"
        "> `IndicNLP` model predicts one of these 3 classes - `['lifestyle', 'entertainment', 'sports']`\
"
        "- [iNLTK Marathi News Classifier](https://huggingface.co/flax-community/mr-inltk-classifier) fine-tuned on "
        "[Marathi News Dataset](https://www.kaggle.com/disisbig/marathi-news-dataset)\
"
        "> `iNLTK` model predicts one of these 3 classes - `['state', 'entertainment', 'sports']`"
    )

    classifier = st.sidebar.selectbox("Select a Model", index=0, options=["IndicNLP", "iNLTK"])

    st.sidebar.markdown(
        "**IndicNLP Classes**\
"
        "- lifestyle\
"
        "- entertainment\
"
        "- sports\
"
        "\
"
        "**iNLTK Classes**\
"
        "- state\
"
        "- entertainment\
"
        "- sports"
    )

    sample_texts = [
        "रोहित शर्माने सरावाला सुरुवात करण्यापूर्वी भारतीय खेळाडूला दिला कानमंत्र, म्हणाला...",
        "जॉनी लीवर यांनी नम्रता संभेरावला दिलं खास गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना",
        "Custom",
    ]
    model_name_or_path = cfg["models"][classifier]

    text_to_classify = st.selectbox("Select a Text", options=sample_texts, index=len(sample_texts) - 1)

    if text_to_classify == "Custom":
        text_to_classify = st.text_input("Enter custom text:")

    predict_button = st.button("Predict")

    if predict_button:
        with st.spinner("Generating prediction..."):
            result = load_model(text_to_classify, model_name_or_path)

            st.markdown("## Predicted Label: `{}`".format(result[0]["label"]))
            st.markdown("## Confidence: `{}`%".format(round(result[0]["score"], 3) * 100))

    st.markdown("- - -")
    st.markdown(
        "❓ Can't figure out where to get a sample text other than the predefined ones? ❓\
"
        "\
"
        "We have provided Marathi newspaper links (section wise) below. Head over to any section of your choice, "
        "copy any headline and paste below to see if the model is predicting the respective class correctly or not?\
"
        "- [entertainment](https://maharashtratimes.com/entertainment/articlelist/19359255.cms)\
"
        "- [sports](https://maharashtratimes.com/sports/articlelist/2429056.cms)\
"
        "- [lifestyle](https://maharashtratimes.com/lifestyle-news/articlelist/2429025.cms)\
"
        "- [state](https://maharashtratimes.com/maharashtra/articlelist/2429066.cms)\
"
        "> 📒 NOTE: Both models are not trained on above headlines! Feel free to use any headline from any newspaper"
    )