ECG2HRV / src /conversion.py
nina-m-m's picture
Rename conversion.py to src/conversion.py
270b11f verified
raw
history blame
3.1 kB
import h5py
import pandas as pd
def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame:
"""
Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time,
nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array.
The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency,
configs.device_name.
h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software
OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/).
:param path_to_h5_file: Path to the h5 file.
:type path_to_h5_file: str
:param ecg_channel_name: The name of the ecg channel in the h5 file.
:type ecg_channel_name: str
:return: The pandas DataFrame.
:rtype: pd.DataFrame
"""
with h5py.File(h5_file, 'r') as file:
# Get the first key as the group key
group_key = next(iter(file.keys()))
h5_group = file[group_key]
# Convert ECG data to a flattened numpy array
ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten()
# Extract metadata
attrs = h5_group.attrs
sampling_rate = attrs['sampling rate']
date = attrs['date']
time = attrs['time']
num_samples = attrs['nsamples']
device = attrs['device']
device_name = attrs['device name']
duration = attrs['duration']
# Create the timestamp column
start = pd.to_datetime(date + ' ' + time)
freq = f'{1 / sampling_rate}S'
timestamps = pd.date_range(start=start, periods=num_samples, freq=freq)
# Check if the overall time range of the calculated timestamps fit the given duration attribute of the h5 file
end = start + pd.Timedelta(duration)
assert abs((end - timestamps[-1]).total_seconds()) < 1
# Create the DataFrame
df = pd.DataFrame({
'record_date': date,
'frequency': sampling_rate,
'device_name': f'{device}_{device_name}',
'timestamp_idx': timestamps,
'ecg': ecg,
})
return df
def csv_to_pandas(path: str) -> pd.DataFrame:
""" Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models.
:param path: Path to the csv file.
:type path: str
:return: The pandas DataFrame.
:rtype: pd.DataFrame
"""
# Get metadata of csv file
with open(path, 'r') as file:
metadata = file.readline()
metadata = metadata.replace('# ', '')
metadata = eval(metadata)
configs = {key: value for key, value in metadata.items() if key.startswith('config')}
batch = {key: value for key, value in metadata.items() if key.startswith('batch')}
# Get samples from csv file
df = pd.read_csv(path, comment='#')
# Add metadata to samples
df = df.assign(**batch)
df = df.assign(**configs)
return df