File size: 3,101 Bytes
0ffeb19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import h5py
import pandas as pd

def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame:
    """
    Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time,
    nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array.
    The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency,
    configs.device_name.

    h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software
    OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/).

    :param path_to_h5_file: Path to the h5 file.
    :type path_to_h5_file: str
    :param ecg_channel_name: The name of the ecg channel in the h5 file.
    :type ecg_channel_name: str

    :return: The pandas DataFrame.
    :rtype: pd.DataFrame
    """
    with h5py.File(h5_file, 'r') as file:
        # Get the first key as the group key
        group_key = next(iter(file.keys()))
        h5_group = file[group_key]

        # Convert ECG data to a flattened numpy array
        ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten()

        # Extract metadata
        attrs = h5_group.attrs
        sampling_rate = attrs['sampling rate']
        date = attrs['date']
        time = attrs['time']
        num_samples = attrs['nsamples']
        device = attrs['device']
        device_name = attrs['device name']
        duration = attrs['duration']

        # Create the timestamp column
        start = pd.to_datetime(date + ' ' + time)
        freq = f'{1 / sampling_rate}S'
        timestamps = pd.date_range(start=start, periods=num_samples, freq=freq)

        # Check if the overall time range of the calculated timestamps fit the given duration attribute of the h5 file
        end = start + pd.Timedelta(duration)
        assert abs((end - timestamps[-1]).total_seconds()) < 1

        # Create the DataFrame
        df = pd.DataFrame({
            'record_date': date,
            'frequency': sampling_rate,
            'device_name': f'{device}_{device_name}',
            'timestamp_idx': timestamps,
            'ecg': ecg,
        })

    return df

def csv_to_pandas(path: str) -> pd.DataFrame:
    """ Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models.

    :param path: Path to the csv file.
    :type path: str

    :return: The pandas DataFrame.
    :rtype: pd.DataFrame
    """
    # Get metadata of csv file
    with open(path, 'r') as file:
        metadata = file.readline()
        metadata = metadata.replace('# ', '')
        metadata = eval(metadata)

    configs = {key: value for key, value in metadata.items() if key.startswith('config')}
    batch = {key: value for key, value in metadata.items() if key.startswith('batch')}

    # Get samples from csv file
    df = pd.read_csv(path, comment='#')

    # Add metadata to samples
    df = df.assign(**batch)
    df = df.assign(**configs)

    return df