import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", page_title="Spark NLP Demos App", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(model): documentAssembler = DocumentAssembler()\ .setInputCol("text")\ .setOutputCol("document") use = UniversalSentenceEncoder.pretrained("tfhub_use", "en")\ .setInputCols(["document"])\ .setOutputCol("sentence_embeddings") sentimentdl = SentimentDLModel.pretrained(model, "en")\ .setInputCols(["sentence_embeddings"])\ .setOutputCol("sentiment") nlpPipeline = Pipeline(stages=[documentAssembler, use, sentimentdl]) return nlpPipeline def fit_data(pipeline, data): empty_df = spark.createDataFrame([['']]).toDF('text') pipeline_model = pipeline.fit(empty_df) model = LightPipeline(pipeline_model) results = model.fullAnnotate(data)[0] return results['sentiment'][0].result # Set up the page layout st.markdown('
State-of-the-Art Sentiment Detection with Spark NLP
', unsafe_allow_html=True) # Sidebar content model = st.sidebar.selectbox( "Choose the pretrained model", ["sentimentdl_use_imdb", "sentimentdl_use_twitter"], help="For more info about the models visit: https://sparknlp.org/models" ) # Reference notebook link in sidebar link = """ Open In Colab """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(link, unsafe_allow_html=True) # Load examples folder_path = f"inputs/{model}" examples = [ lines[1].strip() for filename in os.listdir(folder_path) if filename.endswith('.txt') for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()] if len(lines) >= 2 ] selected_text = None result_type = 'tweet' if 'imdb' in model.lower() or 't5' in model.lower(): selected_text = st.selectbox("Select a sample IMDB review", examples) result_type = 'review' else: selected_text = st.selectbox("Select a sample Tweet", examples) custom_input = st.text_input("Try it for yourself!") if custom_input: selected_text = custom_input elif selected_text: selected_text = selected_text st.write('Selected Text') st.write(selected_text) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline(model) output = fit_data(pipeline, selected_text) # Display output sentence if output in ['pos', 'positive', 'POSITIVE']: st.markdown("""

This seems like a {} {}. 😃

""".format('positive', result_type), unsafe_allow_html=True) elif output in ['neg', 'negative', 'NEGATIVE']: st.markdown("""

This seems like a {} {}. 😠""".format('negative', result_type), unsafe_allow_html=True)