import pandas as pd
from typing import Dict, List, Union

from src.conversion import csv_to_pandas
from src.ecg_processing import process_batch
from src.pydantic_models import ECGConfig, ECGSample


class PreTrainedPipeline():
    def __init__(self, path=""):
        # Preload all the elements you are going to need at inference.
        # For instance your model, processors, tokenizer that might be needed.
        # This function is only called once, so do all the heavy processing I/O here
        self.path = path
        self.df = None  # Placeholder for the DataFrame

        if path:
            self.load_data()

    def load_data(self):
        # Load CSV file into DataFrame
        self.df = csv_to_pandas(self.path)

    def process_data(self):
        # Read csv file
        df = self.df
        # Implode
        cols_to_implode = ['timestamp_idx', 'ecg', 'label']
        df_imploded = df.groupby(list(set(df.columns) - set(cols_to_implode))) \
            .agg({'timestamp_idx': list,
                  'ecg': list,
                  'label': list}) \
            .reset_index()
        # Get metadata
        config_cols = [col for col in df.columns if col.startswith('configs.')]
        configs = df_imploded[config_cols].iloc[0].to_dict()
        configs = {key.removeprefix('configs.'): value for key, value in configs.items()}
        configs = ECGConfig(**configs)
        batch_cols = [col for col in df.columns if col.startswith('batch.')]
        batch = df_imploded[batch_cols].iloc[0].to_dict()
        batch = {key.removeprefix('batch.'): value for key, value in batch.items()}
        # Get samples
        samples = df_imploded.to_dict(orient='records')
        samples = [ECGSample(**sample) for sample in samples]

        features_df = process_batch(samples, configs)

    def __call__(
            self, inputs: Dict[str, Dict[str, List[Union[str, float]]]]
    ) -> List[Union[str, float]]:
        """
        Args:
            inputs (:obj:`dict`):
                a dictionary containing a key 'data' mapping to a dict in which
                the values represent each column.
        Return:
            A :obj:`list` of floats or strings: The classification output for each row.
        """
        if not self.df:
            raise ValueError("No data loaded. Please provide a valid CSV path.")

        # Implement your processing logic here, if needed
        self.process_data()

        # Assuming you want to return a list of strings or floats from the DataFrame
        result = self.df.values.flatten().tolist()

        return result