import gradio as gr from transformers import pipeline from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split from sklearn import metrics import pandas as pd # Load the provided dataset file_path = 'data.csv' df = pd.read_csv(file_path) # Split data into training and test sets X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Sentiment'], test_size=0.2, random_state=42) # Define models nb_model = make_pipeline(TfidfVectorizer(), MultinomialNB()) svm_model = make_pipeline(TfidfVectorizer(), SVC(probability=True)) rf_model = make_pipeline(TfidfVectorizer(), RandomForestClassifier()) # Train models nb_model.fit(X_train, y_train) svm_model.fit(X_train, y_train) rf_model.fit(X_train, y_train) # Define sentences to choose from sentences = [ "The announced restructuring will significantly decrease the company's indebtedness.", "UPM-Kymmene upgraded to `in-line' from `underperform' by Goldman Sachs.", "$AAPL shares are breaking out of the recent resistance level.", "Profitability (in EBIT %) was 13.6%, compared to 14.3% in Q2 2009.", "The Finnish bank has issued a profit warning.", "TeliaSonera's underlying results however included 457 mln SKr in positive one-offs, hence the adjusted underlying EBITDA actually amounts to 7.309 bln SKr, clearly below expectations, analysts said." ] # Function to map BERT labels def map_bert_label(label): if label in ["1 star", "2 stars"]: return "negative" elif label == "3 stars": return "neutral" elif label in ["4 stars", "5 stars"]: return "positive" # Function to map RoBERTa labels def map_roberta_label(label): label_mapping = {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"} return label_mapping[label] # Function to analyze sentiment def analyze_sentiment(sentence): # Define model paths model_paths = { "FinBert": "ProsusAI/finbert", "BERT": "nlptown/bert-base-multilingual-uncased-sentiment", "RoBERTa": "cardiffnlp/twitter-roberta-base-sentiment" } # Analyze sentiment using transformers models results = {} for model_name, model_path in model_paths.items(): sentiment_analyzer = pipeline("sentiment-analysis", model=model_path) result = sentiment_analyzer(sentence[:512])[0] # Analyze first 512 characters for brevity if model_name == "BERT": result['label'] = map_bert_label(result['label']) elif model_name == "RoBERTa": result['label'] = map_roberta_label(result['label']) results[model_name] = result # Analyze sentiment using sklearn models results["Naive Bayes"] = {"label": nb_model.predict([sentence])[0], "score": nb_model.predict_proba([sentence]).max()} results["SVM"] = {"label": svm_model.predict([sentence])[0], "score": svm_model.predict_proba([sentence]).max()} results["Random Forest"] = {"label": rf_model.predict([sentence])[0], "score": rf_model.predict_proba([sentence]).max()} return sentence, results # Create Gradio interface dropdown = gr.Dropdown(choices=sentences, label="Select Sentence") text_output = gr.Textbox(label="Selected Sentence", lines=2) sentiment_output = gr.JSON(label="Sentiment Scores") gr.Interface( fn=analyze_sentiment, inputs=[dropdown], outputs=[text_output, sentiment_output], title="Compare Sentiment Analysis Across Models", description="Select a sentence to see sentiment analysis results from multiple models." ).launch(share=True)