File size: 3,456 Bytes
0ffeb19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing."""
from datetime import datetime, timedelta
from typing import Union

import pandas as pd


def cut_out_window(df: pd.DataFrame,
                   time_column: str,
                   start: Union[datetime, pd.Timestamp],
                   end: Union[datetime, pd.Timestamp, None] = None,
                   duration: Union[timedelta, int, None] = None) -> pd.DataFrame:
    """
    Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have
    a time column containing timestamps.

    :param df: The dataframe to cut out the window from.
    :type df: pandas.DataFrame
    :param time_column: The name of the column containing the timestamps.
    :type time_column: str
    :param start: The start timestamp of the window.
    :type start: datetime.datetime or pandas.Timestamp
    :param end: The end timestamp of the window.
    :type end: datetime.datetime or pandas.Timestamp or None
    :param duration: The duration of the window in seconds.
    :type duration: datetime.timedelta or int or None

    :return: The window as a dataframe.
    :rtype: pandas.DataFrame
    """
    # Convert the timestamp column to datetime if it's not already
    if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
        df[time_column] = pd.to_datetime(df[time_column])

    # Cut out the window
    if end is None and duration is None:
        raise ValueError('Either end or duration must be given!')
    if end is None and duration is not None:
        end = start + pd.Timedelta(seconds=duration)

    window = df[(df[time_column] >= start) & (df[time_column] <= end)]
    return window


def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'):
    """
    Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are
    returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it.

    :param df: The dataframe to slice.
    :type df: pandas.DataFrame
    :param time_column: The name of the column containing the timestamps.
    :type time_column: str
    :param label_column: The name of the column containing the labels.
    :type label_column: str
    :param window_size: The size of the windows in seconds.
    :type window_size: int
    :param window_slicing_method: The method used to slice the windows.
    :type window_slicing_method: str

    :return: A generator of dataframes containing the windows.
    :rtype: generator
    """
    # Convert the timestamp column to datetime if it's not already
    if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
        df[time_column] = pd.to_datetime(df[time_column])

    # Slice the dataframe into windows
    if window_slicing_method == 'time_related':
        # Resample the dataframe every x seconds
        result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))]
        return result_dfs
    elif window_slicing_method == 'label_related_before':
        pass
    elif window_slicing_method == 'label_related_after':
        pass
    elif window_slicing_method == 'label_related_centered':
        pass
    else:
        raise ValueError(f'window_slicing_method {window_slicing_method} not supported')