import os import streamlit as st import matplotlib.pyplot as plt import pandas as pd from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from transformers import pipeline import nltk import numpy as np from utils import read_poems_from_directory, emotion_labels_with_colors # Download nltk data for tokenization nltk.download('punkt') # Initialize emotion classifier pipelines models = { "Model 1": pipeline('sentiment-analysis', model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True), "Model 2": pipeline('sentiment-analysis', model="cardiffnlp/twitter-roberta-base-emotion", return_all_scores=True) } poems_directory = "./poems" os.makedirs(poems_directory, exist_ok=True) def analyze_poems_page(): st.header("Analyze Poems") # Sidebar for file upload and listing files st.sidebar.title("Upload New Poem") uploaded_file = st.sidebar.file_uploader("Choose a text file", type="txt") if uploaded_file is not None: with open(os.path.join(poems_directory, uploaded_file.name), "wb") as f: f.write(uploaded_file.getbuffer()) st.sidebar.success(f"Uploaded {uploaded_file.name}") st.sidebar.title("Available Poems") poem_files = [f for f in os.listdir(poems_directory) if f.endswith(".txt")] st.sidebar.write("\n".join(poem_files)) # Sidebar input for user-specified labels user_labels_input = st.sidebar.text_input("Enter emotion labels (comma-separated)", "happiness,sadness,anger,fear,disgust,surprise,love,joy,anxiety,contentment,frustration,loneliness,excitement,guilt,shame,envy,jealousy,pride,gratitude,empathy,compassion,boredom,relief,curiosity,awe,confusion,nostalgia,hope,despair,embarrassment") user_labels = [label.strip() for label in user_labels_input.split(",")] if st.button("Analyze Poems"): if os.path.isdir(poems_directory): # Read poems from the specified directory poems = read_poems_from_directory(poems_directory) if poems: def analyze_emotions(poem, model): lines = nltk.sent_tokenize(poem) emotions = [] for line in lines: result = model(line) emotions.append(result) return emotions def process_emotions(emotions): emotion_scores = [] all_labels = set() for line_emotions in emotions: line_score = {emo['label']: emo['score'] for emo in line_emotions[0]} all_labels.update(line_score.keys()) emotion_scores.append(line_score) return emotion_scores, all_labels def plot_emotional_arc(processed_emotions, labels, model_name): st.subheader(model_name) plt.figure(figsize=(15, 10)) for i, emotions in enumerate(processed_emotions): for emotion in labels: emotion_arc = [line_emotions.get(emotion, 0) for line_emotions in emotions] color = emotion_labels_with_colors.get(emotion, 'black') # default to black if not found plt.plot(emotion_arc, label=f'Poem {i+1} - {emotion}', color=color) plt.title(f'Emotional Arc of Each Poem ({model_name})') plt.xlabel('Line Number') plt.ylabel('Emotion Score') plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) st.pyplot(plt) def extract_features(emotion_data, labels): features = [] for emotions in emotion_data: poem_features = [] for label in labels: scores = [line_emotions.get(label, 0) for line_emotions in emotions] mean_score = np.mean(scores) std_score = np.std(scores) poem_features.extend([mean_score, std_score]) features.append(poem_features) return features # Analyze and plot for each model for model_name, model in models.items(): poem_emotions = [analyze_emotions(poem, model) for poem in poems] processed_emotions = [] all_labels = set() for emotions in poem_emotions: processed, labels = process_emotions(emotions) processed_emotions.append(processed) all_labels.update(labels) selected_labels = [label for label in user_labels if label in all_labels] plot_emotional_arc(processed_emotions, selected_labels, model_name) # Extract features for clustering features = extract_features(processed_emotions, selected_labels) # Create a DataFrame to store the features columns = [] for label in selected_labels: columns.extend([f'{label}_mean', f'{label}_std']) df = pd.DataFrame(features, columns=columns) # Standardize the features scaler = StandardScaler() scaled_features = scaler.fit_transform(df) # Apply KMeans clustering kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(scaled_features) df['Cluster'] = kmeans.labels_ # Display the DataFrame st.write(f"Poem Sentiment Features and Clusters ({model_name}):") st.dataframe(df) # Visualize the clusters if not df.empty: plt.figure(figsize=(8, 6)) plt.scatter(df.iloc[:, 0], df.iloc[:, 1], c=df['Cluster'], cmap='viridis', marker='o') plt.title(f'Clusters of Poem Emotional Arcs ({model_name})') plt.xlabel(f'{columns[0]}') plt.ylabel(f'{columns[1]}') st.pyplot(plt) else: st.warning("No text files found in the specified directory.") else: st.error("The specified path is not a valid directory.")