import datetime
from tqdm import tqdm

# Pandas
import pandas as pd

# Expresiones regulares
import re

# Matplotlib, Seaborn y Plotly
import matplotlib.pyplot as plt
import seaborn as sns

# NLTK
import nltk
from nltk.corpus import stopwords

# spaCy
import spacy

# PySentimiento y Transformers
from pysentimiento import create_analyzer
from sentence_transformers import SentenceTransformer

# Word cloud
from PIL import Image

import uuid
import gradio as gr

nltk.download('stopwords')
nltk.download('punkt')


### Reformado. Antes hacia reproceso para obtener output y probas. Se puede hacer en un paso.
def get_sentiment(df,column):
    analyzer = create_analyzer(task="sentiment", lang="es")
    analyzer_outputs = []
    with tqdm(total=len(df), desc="Analyzing Comments") as pbar:
        # Iterate through each element in the DataFrame column
        for element in df[column]:
            # Perform sentiment analysis on each element
            result = analyzer.predict(element)
            # Append the result to the list
            analyzer_outputs.append(result)
            # Update the progress bar
            pbar.update(1)

    # Extracting values into columns
    output_list = [output.output for output in analyzer_outputs]
    NEU_list = [output.probas.get('NEU', None) for output in analyzer_outputs]
    NEG_list = [output.probas.get('NEG', None) for output in analyzer_outputs]
    POS_list = [output.probas.get('POS', None) for output in analyzer_outputs]
    
    # Assigning lists to DataFrame columns
    df['Polaridad'] = output_list
    df['sent_NEU'] = NEU_list
    df['sent_NEG'] = NEG_list
    df['sent_POS'] = POS_list
    return df

### Reformado. Antes hacia reproceso para obtener output y probas. Se puede hacer en un paso.
def get_emotions(df,column):
    analyzer = create_analyzer(task="emotion", lang="es")
    analyzer_outputs = []
    with tqdm(total=len(df), desc="Analyzing Comments") as pbar:
        # Iterate through each element in the DataFrame column
        for element in df[column]:
            # Perform sentiment analysis on each element
            result = analyzer.predict(element)
            # Append the result to the list
            analyzer_outputs.append(result)
            # Update the progress bar
            pbar.update(1)

    # Extracting values into columns
    output_list = [output.output for output in analyzer_outputs]
    anger_list = [output.probas.get('anger', None) for output in analyzer_outputs]
    sadness_list = [output.probas.get('sadness', None) for output in analyzer_outputs]
    surprise_list = [output.probas.get('surprise', None) for output in analyzer_outputs]
    disgust_list = [output.probas.get('disgust', None) for output in analyzer_outputs]
    joy_list = [output.probas.get('joy', None) for output in analyzer_outputs]
    fear_list = [output.probas.get('fear', None) for output in analyzer_outputs]
    others_list = [output.probas.get('others', None) for output in analyzer_outputs]
    
    # Assigning lists to DataFrame columns
    df['Emocion'] = output_list
    df['emo_anger'] = anger_list
    df['emo_sadness'] = sadness_list
    df['emo_surprise'] = surprise_list
    df['emo_disgust'] = disgust_list
    df['emo_joy'] = joy_list
    df['emo_fear'] = fear_list
    df['emo_others'] = others_list
    return df

class ProcesamientoLenguaje:
    def __init__(self):
        self.nlp = spacy.load('es_core_news_md', disable=["parser", "ner"])

    def postags_and_stopwords(self, texts, allowed_postags=['NOUN', 'ADJ','PROPN', 'VB', 'X']):

        '''Función que procesa todos los textos en un pipeline de spaCy para tokenizar y etiquetar las POS.
        Luego, filtra todas las palabras de longitud mayor a 2 caracteres que no sean stop words y que se encuentren
        dentro de las etiquetas permitidas: sustantivo, adjetivo, verbo, nombre propio y todo lo que no caiga en una categoría
        preestablecida (palabras OOV, nombres propios no reconocidos, etc).
        Devuelve los textos procesados.
        '''

        texts_out = ' '.join([token.text for token in self.nlp(texts) if token.pos_ in
                    allowed_postags and token.text not in stop_words and len(token.text) > 2])
        return texts_out

    def cleaner(self, word):

        '''Función que toma un texto y remueve distintos símbolos y variaciones de palabras.
        Devuelve el string limpio.
        '''

        word = re.sub(r'https?\S+', '', word) #remueve todas las URLs
        word = re.sub(r'(?::|;|=)(?:-)?(?:\)|\(|D|P)', "", word) #remueve interrogación, paréntesis, dos puntos, etc
        word = re.sub(r'ee.uu', 'eeuu', word, flags=re.IGNORECASE) #convierte todas las variaciones de EEUU sin importar el separador en EEUU
        word = re.sub(r'\#\.', '', word)
        word = re.sub(r'\n', ' ', word) #remueve todos los line-breaks y los reemplaza con espacios
        word = re.sub(r',', '', word) #remueve comas
        word = re.sub(r'\-', ' ', word) #remueve guiones
        word = re.sub(r'\.{3}', ' ', word) #remueve tres puntos
        word = re.sub(r'a{2,}', 'a', word) #remueve múltiples instancias de la letra a (p.ej: aaaaaaah, holaaaaaa)
        word = re.sub(r'é{2,}', 'é', word) #remueve múltiples instancias de la letra é (p.ej: volvééééé)
        word = re.sub(r'i{2,}', 'i', word) #remueve múltiples instancias de la letra i (p.ej: salíiiiiii)
        word = re.sub(r'ja{2,}', 'ja', word) #remueve las "risas" (p.ej: jaaaaaa)
        word = re.sub(r'[^\w\s@ñ]', '', word, flags=re.UNICODE) #remueve todos los símbolos no alfanuméricos excepto @ y ñ
        word = re.sub(r'\b@\w+\b', '', word) #remueve todos los usuarios de Twitter
        word = re.sub(r'\b\w{1,2}\b', '', word) #remueve todas las palabras de una o dos letras

        return word

def grafico_pie(df, column_name='Polaridad'):
    file_path = f"{uuid.uuid4()}_sentimiento.jpg"
    plt.figure(figsize=(8, 6))
    polaridad_counts = df[column_name].value_counts()
    plt.pie(polaridad_counts, labels=polaridad_counts.index, autopct='%1.1f%%', startangle=140)
    plt.title("Distribución de Polaridad")
    plt.savefig(file_path, bbox_inches="tight")
    plt.close()
    return file_path

def grafico_barras(df, column_name='Emocion'):
    file_path = f"{uuid.uuid4()}_sentimiento.jpg"
    plt.figure(figsize=(8, 6))
    ax = sns.countplot(x=column_name, data=df)
    for p in ax.patches:
        ax.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
    plt.xlabel("Emocion")
    plt.ylabel("Cantidad")
    plt.title("Histograma de Emocion")
    plt.savefig(file_path, bbox_inches="tight")
    plt.close()
    return file_path

pln = ProcesamientoLenguaje()
stop_words = stopwords.words('spanish')

# Función que lee el archivo CSV
def procesar_csv(file):
    if file is None:
        return "No se ha cargado ningún archivo."
    df = pd.read_csv(file.name, delimiter=';')
    df['Fecha'] = pd.to_datetime(df['Fecha'], format='%d/%m/%y')
    df = get_sentiment(df, "Comentario")
    df = get_emotions(df, "Comentario")
    
    df['Comentario_clean'] = df['Comentario'].apply(pln.cleaner)
    df['Comentario_clean'] = df['Comentario_clean'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in (stop_words)]))
    df['Comentario_clean'] = df['Comentario_clean'].apply(pln.postags_and_stopwords)
    output_file = f"{uuid.uuid4()}_processed_output.csv"
    df.to_csv(output_file, index=False)

    grafico_pie_path = grafico_pie(df)
    grafico_barras_path = grafico_barras(df)
    return df.head(10), output_file, grafico_pie_path, grafico_barras_path  # Muestra las primeras filas


# Crear la interfaz en Gradio
interface = gr.Interface(
    fn=procesar_csv,
    inputs=gr.File(label="Archivo CSV"),
    outputs=[gr.Dataframe(label="Vista previa del archivo procesado"),
             gr.File(label="Descargar CSV procesado"),
             gr.Image(type="filepath", label="Gráfico de torta"),
             gr.Image(type="filepath", label="Gráfico de barras")],
    title="Cargar y visualizar CSV",
    description="Sube un archivo CSV para ver los primeros registros. El archivo CSV debe tener los campos Fecha y Comentario."
)

# Ejecutar la app de Gradio
interface.launch()