ECG2HRV / src /utils.py
nina-m-m's picture
Rename utils.py to src/utils.py
f501147 verified
raw
history blame
3.46 kB
"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing."""
from datetime import datetime, timedelta
from typing import Union
import pandas as pd
def cut_out_window(df: pd.DataFrame,
time_column: str,
start: Union[datetime, pd.Timestamp],
end: Union[datetime, pd.Timestamp, None] = None,
duration: Union[timedelta, int, None] = None) -> pd.DataFrame:
"""
Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have
a time column containing timestamps.
:param df: The dataframe to cut out the window from.
:type df: pandas.DataFrame
:param time_column: The name of the column containing the timestamps.
:type time_column: str
:param start: The start timestamp of the window.
:type start: datetime.datetime or pandas.Timestamp
:param end: The end timestamp of the window.
:type end: datetime.datetime or pandas.Timestamp or None
:param duration: The duration of the window in seconds.
:type duration: datetime.timedelta or int or None
:return: The window as a dataframe.
:rtype: pandas.DataFrame
"""
# Convert the timestamp column to datetime if it's not already
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
df[time_column] = pd.to_datetime(df[time_column])
# Cut out the window
if end is None and duration is None:
raise ValueError('Either end or duration must be given!')
if end is None and duration is not None:
end = start + pd.Timedelta(seconds=duration)
window = df[(df[time_column] >= start) & (df[time_column] <= end)]
return window
def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'):
"""
Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are
returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it.
:param df: The dataframe to slice.
:type df: pandas.DataFrame
:param time_column: The name of the column containing the timestamps.
:type time_column: str
:param label_column: The name of the column containing the labels.
:type label_column: str
:param window_size: The size of the windows in seconds.
:type window_size: int
:param window_slicing_method: The method used to slice the windows.
:type window_slicing_method: str
:return: A generator of dataframes containing the windows.
:rtype: generator
"""
# Convert the timestamp column to datetime if it's not already
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
df[time_column] = pd.to_datetime(df[time_column])
# Slice the dataframe into windows
if window_slicing_method == 'time_related':
# Resample the dataframe every x seconds
result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))]
return result_dfs
elif window_slicing_method == 'label_related_before':
pass
elif window_slicing_method == 'label_related_after':
pass
elif window_slicing_method == 'label_related_centered':
pass
else:
raise ValueError(f'window_slicing_method {window_slicing_method} not supported')