hubii-world
/

ECG2HRV

Feature Extraction

Joblib

English

biology

electrocardiogram

Model card Files Files and versions Community

nina-m-m commited on Feb 5

Commit

ec3f61b

•

1 Parent(s): 875bdf8

Implement inference pipeline

Browse files

Files changed (1) hide show

pipeline.py +51 -11

pipeline.py CHANGED Viewed

@@ -1,18 +1,52 @@
 from typing import Dict, List, Union
-import os
 class PreTrainedPipeline():
     def __init__(self, path=""):
-        # IMPLEMENT_THIS
         # Preload all the elements you are going to need at inference.
         # For instance your model, processors, tokenizer that might be needed.
-        # This function is only called once, so do all the heavy processing I/O here"""
-        raise NotImplementedError(
-            "Please implement PreTrainedPipeline __init__ function"
-        )
     def __call__(
-        self, inputs: Dict[str, Dict[str, List[Union[str, float]]]]
     ) -> List[Union[str, float]]:
         """
         Args:
@@ -22,7 +56,13 @@ class PreTrainedPipeline():
         Return:
             A :obj:`list` of floats or strings: The classification output for each row.
         """
-        # IMPLEMENT_THIS
-        raise NotImplementedError(
-            "Please implement PreTrainedPipeline __call__ function"
-        )

+import pandas as pd
 from typing import Dict, List, Union
+from src.conversion import csv_to_pandas
+from src.ecg_processing import process_batch
+from src.pydantic_models import ECGConfig, ECGSample
 class PreTrainedPipeline():
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
         # For instance your model, processors, tokenizer that might be needed.
+        # This function is only called once, so do all the heavy processing I/O here
+        self.path = path
+        self.df = None  # Placeholder for the DataFrame
+        if path:
+            self.load_data()
+    def load_data(self):
+        # Load CSV file into DataFrame
+        self.df = csv_to_pandas(self.path)
+    def process_data(self):
+        # Read csv file
+        df = self.df
+        # Implode
+        cols_to_implode = ['timestamp_idx', 'ecg', 'label']
+        df_imploded = df.groupby(list(set(df.columns) - set(cols_to_implode))) \
+            .agg({'timestamp_idx': list,
+                  'ecg': list,
+                  'label': list}) \
+            .reset_index()
+        # Get metadata
+        config_cols = [col for col in df.columns if col.startswith('configs.')]
+        configs = df_imploded[config_cols].iloc[0].to_dict()
+        configs = {key.removeprefix('configs.'): value for key, value in configs.items()}
+        configs = ECGConfig(**configs)
+        batch_cols = [col for col in df.columns if col.startswith('batch.')]
+        batch = df_imploded[batch_cols].iloc[0].to_dict()
+        batch = {key.removeprefix('batch.'): value for key, value in batch.items()}
+        # Get samples
+        samples = df_imploded.to_dict(orient='records')
+        samples = [ECGSample(**sample) for sample in samples]
+        features_df = process_batch(samples, configs)
     def __call__(
+            self, inputs: Dict[str, Dict[str, List[Union[str, float]]]]
     ) -> List[Union[str, float]]:
         """
         Args:
         Return:
             A :obj:`list` of floats or strings: The classification output for each row.
         """
+        if not self.df:
+            raise ValueError("No data loaded. Please provide a valid CSV path.")
+        # Implement your processing logic here, if needed
+        self.process_data()
+        # Assuming you want to return a list of strings or floats from the DataFrame
+        result = self.df.values.flatten().tolist()
+        return result