File size: 3,456 Bytes
0ffeb19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing."""
from datetime import datetime, timedelta
from typing import Union
import pandas as pd
def cut_out_window(df: pd.DataFrame,
time_column: str,
start: Union[datetime, pd.Timestamp],
end: Union[datetime, pd.Timestamp, None] = None,
duration: Union[timedelta, int, None] = None) -> pd.DataFrame:
"""
Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have
a time column containing timestamps.
:param df: The dataframe to cut out the window from.
:type df: pandas.DataFrame
:param time_column: The name of the column containing the timestamps.
:type time_column: str
:param start: The start timestamp of the window.
:type start: datetime.datetime or pandas.Timestamp
:param end: The end timestamp of the window.
:type end: datetime.datetime or pandas.Timestamp or None
:param duration: The duration of the window in seconds.
:type duration: datetime.timedelta or int or None
:return: The window as a dataframe.
:rtype: pandas.DataFrame
"""
# Convert the timestamp column to datetime if it's not already
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
df[time_column] = pd.to_datetime(df[time_column])
# Cut out the window
if end is None and duration is None:
raise ValueError('Either end or duration must be given!')
if end is None and duration is not None:
end = start + pd.Timedelta(seconds=duration)
window = df[(df[time_column] >= start) & (df[time_column] <= end)]
return window
def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'):
"""
Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are
returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it.
:param df: The dataframe to slice.
:type df: pandas.DataFrame
:param time_column: The name of the column containing the timestamps.
:type time_column: str
:param label_column: The name of the column containing the labels.
:type label_column: str
:param window_size: The size of the windows in seconds.
:type window_size: int
:param window_slicing_method: The method used to slice the windows.
:type window_slicing_method: str
:return: A generator of dataframes containing the windows.
:rtype: generator
"""
# Convert the timestamp column to datetime if it's not already
if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
df[time_column] = pd.to_datetime(df[time_column])
# Slice the dataframe into windows
if window_slicing_method == 'time_related':
# Resample the dataframe every x seconds
result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))]
return result_dfs
elif window_slicing_method == 'label_related_before':
pass
elif window_slicing_method == 'label_related_after':
pass
elif window_slicing_method == 'label_related_centered':
pass
else:
raise ValueError(f'window_slicing_method {window_slicing_method} not supported') |