import h5py import pandas as pd def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame: """ Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time, nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array. The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency, configs.device_name. h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/). :param path_to_h5_file: Path to the h5 file. :type path_to_h5_file: str :param ecg_channel_name: The name of the ecg channel in the h5 file. :type ecg_channel_name: str :return: The pandas DataFrame. :rtype: pd.DataFrame """ with h5py.File(h5_file, 'r') as file: # Get the first key as the group key group_key = next(iter(file.keys())) h5_group = file[group_key] # Convert ECG data to a flattened numpy array ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten() # Extract metadata attrs = h5_group.attrs sampling_rate = attrs['sampling rate'] date = attrs['date'] time = attrs['time'] num_samples = attrs['nsamples'] device = attrs['device'] device_name = attrs['device name'] duration = attrs['duration'] # Create the timestamp column start = pd.to_datetime(date + ' ' + time) freq = f'{1 / sampling_rate}S' timestamps = pd.date_range(start=start, periods=num_samples, freq=freq) # Check if the overall time range of the calculated timestamps fit the given duration attribute of the h5 file end = start + pd.Timedelta(duration) assert abs((end - timestamps[-1]).total_seconds()) < 1 # Create the DataFrame df = pd.DataFrame({ 'record_date': date, 'frequency': sampling_rate, 'device_name': f'{device}_{device_name}', 'timestamp_idx': timestamps, 'ecg': ecg, }) return df def csv_to_pandas(path: str) -> pd.DataFrame: """ Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models. :param path: Path to the csv file. :type path: str :return: The pandas DataFrame. :rtype: pd.DataFrame """ # Get metadata of csv file with open(path, 'r') as file: metadata = file.readline() metadata = metadata.replace('# ', '') metadata = eval(metadata) configs = {key: value for key, value in metadata.items() if key.startswith('config')} batch = {key: value for key, value in metadata.items() if key.startswith('batch')} # Get samples from csv file df = pd.read_csv(path, comment='#') # Add metadata to samples df = df.assign(**batch) df = df.assign(**configs) return df