hubii-world
/

ECG2HRV

Feature Extraction

electrocardiogram

Model card Files Files and versions Community

ECG2HRV / src /conversion.py

nina-m-m's picture

Rename conversion.py to src/conversion.py

270b11f verified 10 months ago

3.1 kB

	import h5py
	import pandas as pd

	def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame:
	"""
	Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time,
	nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array.
	The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency,
	configs.device_name.

	h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software
	OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/).

	:param path_to_h5_file: Path to the h5 file.
	:type path_to_h5_file: str
	:param ecg_channel_name: The name of the ecg channel in the h5 file.
	:type ecg_channel_name: str

	:return: The pandas DataFrame.
	:rtype: pd.DataFrame
	"""
	with h5py.File(h5_file, 'r') as file:
	# Get the first key as the group key
	group_key = next(iter(file.keys()))
	h5_group = file[group_key]

	# Convert ECG data to a flattened numpy array
	ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten()

	# Extract metadata
	attrs = h5_group.attrs
	sampling_rate = attrs['sampling rate']
	date = attrs['date']
	time = attrs['time']
	num_samples = attrs['nsamples']
	device = attrs['device']
	device_name = attrs['device name']
	duration = attrs['duration']

	# Create the timestamp column
	start = pd.to_datetime(date + ' ' + time)
	freq = f'{1 / sampling_rate}S'
	timestamps = pd.date_range(start=start, periods=num_samples, freq=freq)

	# Check if the overall time range of the calculated timestamps fit the given duration attribute of the h5 file
	end = start + pd.Timedelta(duration)
	assert abs((end - timestamps[-1]).total_seconds()) < 1

	# Create the DataFrame
	df = pd.DataFrame({
	'record_date': date,
	'frequency': sampling_rate,
	'device_name': f'{device}_{device_name}',
	'timestamp_idx': timestamps,
	'ecg': ecg,
	})

	return df

	def csv_to_pandas(path: str) -> pd.DataFrame:
	""" Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models.

	:param path: Path to the csv file.
	:type path: str

	:return: The pandas DataFrame.
	:rtype: pd.DataFrame
	"""
	# Get metadata of csv file
	with open(path, 'r') as file:
	metadata = file.readline()
	metadata = metadata.replace('# ', '')
	metadata = eval(metadata)

	configs = {key: value for key, value in metadata.items() if key.startswith('config')}
	batch = {key: value for key, value in metadata.items() if key.startswith('batch')}

	# Get samples from csv file
	df = pd.read_csv(path, comment='#')

	# Add metadata to samples
	df = df.assign(**batch)
	df = df.assign(**configs)

	return df