|
"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing.""" |
|
from datetime import datetime, timedelta |
|
from typing import Union |
|
|
|
import pandas as pd |
|
|
|
|
|
def cut_out_window(df: pd.DataFrame, |
|
time_column: str, |
|
start: Union[datetime, pd.Timestamp], |
|
end: Union[datetime, pd.Timestamp, None] = None, |
|
duration: Union[timedelta, int, None] = None) -> pd.DataFrame: |
|
""" |
|
Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have |
|
a time column containing timestamps. |
|
|
|
:param df: The dataframe to cut out the window from. |
|
:type df: pandas.DataFrame |
|
:param time_column: The name of the column containing the timestamps. |
|
:type time_column: str |
|
:param start: The start timestamp of the window. |
|
:type start: datetime.datetime or pandas.Timestamp |
|
:param end: The end timestamp of the window. |
|
:type end: datetime.datetime or pandas.Timestamp or None |
|
:param duration: The duration of the window in seconds. |
|
:type duration: datetime.timedelta or int or None |
|
|
|
:return: The window as a dataframe. |
|
:rtype: pandas.DataFrame |
|
""" |
|
|
|
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]): |
|
df[time_column] = pd.to_datetime(df[time_column]) |
|
|
|
|
|
if end is None and duration is None: |
|
raise ValueError('Either end or duration must be given!') |
|
if end is None and duration is not None: |
|
end = start + pd.Timedelta(seconds=duration) |
|
|
|
window = df[(df[time_column] >= start) & (df[time_column] <= end)] |
|
return window |
|
|
|
|
|
def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'): |
|
""" |
|
Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are |
|
returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it. |
|
|
|
:param df: The dataframe to slice. |
|
:type df: pandas.DataFrame |
|
:param time_column: The name of the column containing the timestamps. |
|
:type time_column: str |
|
:param label_column: The name of the column containing the labels. |
|
:type label_column: str |
|
:param window_size: The size of the windows in seconds. |
|
:type window_size: int |
|
:param window_slicing_method: The method used to slice the windows. |
|
:type window_slicing_method: str |
|
|
|
:return: A generator of dataframes containing the windows. |
|
:rtype: generator |
|
""" |
|
|
|
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]): |
|
df[time_column] = pd.to_datetime(df[time_column]) |
|
|
|
|
|
if window_slicing_method == 'time_related': |
|
|
|
result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))] |
|
return result_dfs |
|
elif window_slicing_method == 'label_related_before': |
|
pass |
|
elif window_slicing_method == 'label_related_after': |
|
pass |
|
elif window_slicing_method == 'label_related_centered': |
|
pass |
|
else: |
|
raise ValueError(f'window_slicing_method {window_slicing_method} not supported') |