|
import h5py |
|
import pandas as pd |
|
|
|
def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame: |
|
""" |
|
Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time, |
|
nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array. |
|
The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency, |
|
configs.device_name. |
|
|
|
h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software |
|
OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/). |
|
|
|
:param path_to_h5_file: Path to the h5 file. |
|
:type path_to_h5_file: str |
|
:param ecg_channel_name: The name of the ecg channel in the h5 file. |
|
:type ecg_channel_name: str |
|
|
|
:return: The pandas DataFrame. |
|
:rtype: pd.DataFrame |
|
""" |
|
with h5py.File(h5_file, 'r') as file: |
|
|
|
group_key = next(iter(file.keys())) |
|
h5_group = file[group_key] |
|
|
|
|
|
ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten() |
|
|
|
|
|
attrs = h5_group.attrs |
|
sampling_rate = attrs['sampling rate'] |
|
date = attrs['date'] |
|
time = attrs['time'] |
|
num_samples = attrs['nsamples'] |
|
device = attrs['device'] |
|
device_name = attrs['device name'] |
|
duration = attrs['duration'] |
|
|
|
|
|
start = pd.to_datetime(date + ' ' + time) |
|
freq = f'{1 / sampling_rate}S' |
|
timestamps = pd.date_range(start=start, periods=num_samples, freq=freq) |
|
|
|
|
|
end = start + pd.Timedelta(duration) |
|
assert abs((end - timestamps[-1]).total_seconds()) < 1 |
|
|
|
|
|
df = pd.DataFrame({ |
|
'record_date': date, |
|
'frequency': sampling_rate, |
|
'device_name': f'{device}_{device_name}', |
|
'timestamp_idx': timestamps, |
|
'ecg': ecg, |
|
}) |
|
|
|
return df |
|
|
|
def csv_to_pandas(path: str) -> pd.DataFrame: |
|
""" Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models. |
|
|
|
:param path: Path to the csv file. |
|
:type path: str |
|
|
|
:return: The pandas DataFrame. |
|
:rtype: pd.DataFrame |
|
""" |
|
|
|
with open(path, 'r') as file: |
|
metadata = file.readline() |
|
metadata = metadata.replace('# ', '') |
|
metadata = eval(metadata) |
|
|
|
configs = {key: value for key, value in metadata.items() if key.startswith('config')} |
|
batch = {key: value for key, value in metadata.items() if key.startswith('batch')} |
|
|
|
|
|
df = pd.read_csv(path, comment='#') |
|
|
|
|
|
df = df.assign(**batch) |
|
df = df.assign(**configs) |
|
|
|
return df |
|
|