"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing.""" from datetime import datetime, timedelta from typing import Union import pandas as pd def cut_out_window(df: pd.DataFrame, time_column: str, start: Union[datetime, pd.Timestamp], end: Union[datetime, pd.Timestamp, None] = None, duration: Union[timedelta, int, None] = None) -> pd.DataFrame: """ Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have a time column containing timestamps. :param df: The dataframe to cut out the window from. :type df: pandas.DataFrame :param time_column: The name of the column containing the timestamps. :type time_column: str :param start: The start timestamp of the window. :type start: datetime.datetime or pandas.Timestamp :param end: The end timestamp of the window. :type end: datetime.datetime or pandas.Timestamp or None :param duration: The duration of the window in seconds. :type duration: datetime.timedelta or int or None :return: The window as a dataframe. :rtype: pandas.DataFrame """ # Convert the timestamp column to datetime if it's not already if not pd.api.types.is_datetime64_ns_dtype(df[time_column]): df[time_column] = pd.to_datetime(df[time_column]) # Cut out the window if end is None and duration is None: raise ValueError('Either end or duration must be given!') if end is None and duration is not None: end = start + pd.Timedelta(seconds=duration) window = df[(df[time_column] >= start) & (df[time_column] <= end)] return window def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'): """ Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it. :param df: The dataframe to slice. :type df: pandas.DataFrame :param time_column: The name of the column containing the timestamps. :type time_column: str :param label_column: The name of the column containing the labels. :type label_column: str :param window_size: The size of the windows in seconds. :type window_size: int :param window_slicing_method: The method used to slice the windows. :type window_slicing_method: str :return: A generator of dataframes containing the windows. :rtype: generator """ # Convert the timestamp column to datetime if it's not already if not pd.api.types.is_datetime64_ns_dtype(df[time_column]): df[time_column] = pd.to_datetime(df[time_column]) # Slice the dataframe into windows if window_slicing_method == 'time_related': # Resample the dataframe every x seconds result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))] return result_dfs elif window_slicing_method == 'label_related_before': pass elif window_slicing_method == 'label_related_after': pass elif window_slicing_method == 'label_related_centered': pass else: raise ValueError(f'window_slicing_method {window_slicing_method} not supported')