import os | |
import streamlit as st | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.cluster import KMeans | |
from sklearn.preprocessing import StandardScaler | |
from transformers import pipeline | |
import nltk | |
import numpy as np | |
from utils import read_poems_from_directory, emotion_labels_with_colors | |
# Download nltk data for tokenization | |'punkt') | |
# Initialize emotion classifier pipelines | |
models = { | |
"Model 1": pipeline('sentiment-analysis', model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True), | |
"Model 2": pipeline('sentiment-analysis', model="cardiffnlp/twitter-roberta-base-emotion", return_all_scores=True) | |
} | |
poems_directory = "./poems" | |
os.makedirs(poems_directory, exist_ok=True) | |
def analyze_poems_page(): | |
st.header("Analyze Poems") | |
# Sidebar for file upload and listing files | |
st.sidebar.title("Upload New Poem") | |
uploaded_file = st.sidebar.file_uploader("Choose a text file", type="txt") | |
if uploaded_file is not None: | |
with open(os.path.join(poems_directory,, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
st.sidebar.success(f"Uploaded {}") | |
st.sidebar.title("Available Poems") | |
poem_files = [f for f in os.listdir(poems_directory) if f.endswith(".txt")] | |
st.sidebar.write("\n".join(poem_files)) | |
# Sidebar input for user-specified labels | |
user_labels_input = st.sidebar.text_input("Enter emotion labels (comma-separated)", | |
"happiness,sadness,anger,fear,disgust,surprise,love,joy,anxiety,contentment,frustration,loneliness,excitement,guilt,shame,envy,jealousy,pride,gratitude,empathy,compassion,boredom,relief,curiosity,awe,confusion,nostalgia,hope,despair,embarrassment") | |
user_labels = [label.strip() for label in user_labels_input.split(",")] | |
if st.button("Analyze Poems"): | |
if os.path.isdir(poems_directory): | |
# Read poems from the specified directory | |
poems = read_poems_from_directory(poems_directory) | |
if poems: | |
def analyze_emotions(poem, model): | |
lines = nltk.sent_tokenize(poem) | |
emotions = [] | |
for line in lines: | |
result = model(line) | |
emotions.append(result) | |
return emotions | |
def process_emotions(emotions): | |
emotion_scores = [] | |
all_labels = set() | |
for line_emotions in emotions: | |
line_score = {emo['label']: emo['score'] for emo in line_emotions[0]} | |
all_labels.update(line_score.keys()) | |
emotion_scores.append(line_score) | |
return emotion_scores, all_labels | |
def plot_emotional_arc(processed_emotions, labels, model_name): | |
st.subheader(model_name) | |
plt.figure(figsize=(15, 10)) | |
for i, emotions in enumerate(processed_emotions): | |
for emotion in labels: | |
emotion_arc = [line_emotions.get(emotion, 0) for line_emotions in emotions] | |
color = emotion_labels_with_colors.get(emotion, 'black') # default to black if not found | |
plt.plot(emotion_arc, label=f'Poem {i+1} - {emotion}', color=color) | |
plt.title(f'Emotional Arc of Each Poem ({model_name})') | |
plt.xlabel('Line Number') | |
plt.ylabel('Emotion Score') | |
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) | |
st.pyplot(plt) | |
def extract_features(emotion_data, labels): | |
features = [] | |
for emotions in emotion_data: | |
poem_features = [] | |
for label in labels: | |
scores = [line_emotions.get(label, 0) for line_emotions in emotions] | |
mean_score = np.mean(scores) | |
std_score = np.std(scores) | |
poem_features.extend([mean_score, std_score]) | |
features.append(poem_features) | |
return features | |
# Analyze and plot for each model | |
for model_name, model in models.items(): | |
poem_emotions = [analyze_emotions(poem, model) for poem in poems] | |
processed_emotions = [] | |
all_labels = set() | |
for emotions in poem_emotions: | |
processed, labels = process_emotions(emotions) | |
processed_emotions.append(processed) | |
all_labels.update(labels) | |
selected_labels = [label for label in user_labels if label in all_labels] | |
plot_emotional_arc(processed_emotions, selected_labels, model_name) | |
# Extract features for clustering | |
features = extract_features(processed_emotions, selected_labels) | |
# Create a DataFrame to store the features | |
columns = [] | |
for label in selected_labels: | |
columns.extend([f'{label}_mean', f'{label}_std']) | |
df = pd.DataFrame(features, columns=columns) | |
# Standardize the features | |
scaler = StandardScaler() | |
scaled_features = scaler.fit_transform(df) | |
# Apply KMeans clustering | |
kmeans = KMeans(n_clusters=2, random_state=42) | | | |
df['Cluster'] = kmeans.labels_ | |
# Display the DataFrame | |
st.write(f"Poem Sentiment Features and Clusters ({model_name}):") | |
st.dataframe(df) | |
# Visualize the clusters | |
if not df.empty: | |
plt.figure(figsize=(8, 6)) | |
plt.scatter(df.iloc[:, 0], df.iloc[:, 1], c=df['Cluster'], cmap='viridis', marker='o') | |
plt.title(f'Clusters of Poem Emotional Arcs ({model_name})') | |
plt.xlabel(f'{columns[0]}') | |
plt.ylabel(f'{columns[1]}') | |
st.pyplot(plt) | |
else: | |
st.warning("No text files found in the specified directory.") | |
else: | |
st.error("The specified path is not a valid directory.") | |