File size: 5,694 Bytes
6f7f115 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from collections import OrderedDict
from typing import Optional
ed_cxr_token_type_ids = {
'medrecon': 0,
'edstays': 1,
'triage': 2,
'vitalsign': 3,
'pyxis': 4,
'mimic_cxr_2_0_0_metadata': 5,
'medrecon_name': 6,
'triage_chiefcomplaint': 7,
'triage_pain': 8,
'vitalsign_pain': 9,
'indication': 10,
'history': 11,
'findings': 12,
'impression': 13,
'image': 14,
'comparison': 15,
'previous_findings': 16,
'previous_impression': 17,
'previous_image': 18,
}
NUM_ED_CXR_TOKEN_TYPE_IDS = max(ed_cxr_token_type_ids.values()) + 1
class TableConfig:
def __init__(
self,
name: str,
hadm_id_filter: bool = False,
stay_id_filter: bool = False,
study_id_filter: bool = False,
subject_id_filter: bool = True,
load: Optional[bool] = None,
groupby: Optional[str] = None,
index_columns: list = [],
text_columns: list = [],
value_columns: list = [],
time_columns: list = [],
target_sections: list = [],
use_start_time: bool = False,
mimic_cxr_sectioned: bool = False,
):
self.name = name
self.hadm_id_filter = hadm_id_filter
self.stay_id_filter = stay_id_filter
self.study_id_filter = study_id_filter
self.subject_id_filter = subject_id_filter
self.load = load
self.groupby = groupby
self.index_columns_source = [index_columns] if isinstance(index_columns, str) else index_columns
self.index_columns = [f'{i}_index' for i in self.index_columns_source]
self.text_columns = [text_columns] if isinstance(text_columns, str) else text_columns
self.value_columns = [value_columns] if isinstance(value_columns, str) else value_columns
self.time_columns = [time_columns] if isinstance(time_columns, str) else time_columns
self.target_sections = [target_sections] if isinstance(target_sections, str) else target_sections
self.use_start_time = use_start_time
self.mimic_cxr_sectioned = mimic_cxr_sectioned
assert self.time_columns is None or isinstance(self.time_columns, list)
self.value_column_to_idx = {}
self.total_indices = None
# ed module:
"""
Order the tables for position_ids based on their order of occurance (for cases where their time deltas are matching).
The way that they are ordered here is the way that they will be ordered as input.
1. medrecon - the medications which the patient was taking prior to their ED stay.
2. edstays - patient stays are tracked in the edstays table.
3. triage - information collected from the patient at the time of triage.
4. vitalsign - aperiodic vital signs documented for patients during their stay.
5. pyxis - dispensation information for medications provided by the BD Pyxis MedStation (position is interchangable with 4).
"""
ed_module_tables = OrderedDict(
{
'medrecon': TableConfig(
'Medicine reconciliation',
stay_id_filter=True,
load=True,
index_columns=['gsn', 'ndc', 'etc_rn', 'etccode'],
text_columns='name',
groupby='stay_id',
use_start_time=True,
),
'edstays': TableConfig(
'ED admissions',
stay_id_filter=True,
load=True,
index_columns=['gender', 'race', 'arrival_transport'],
groupby='stay_id',
time_columns='intime',
),
'triage': TableConfig(
'Triage',
stay_id_filter=True,
load=True,
text_columns=['chiefcomplaint', 'pain'],
value_columns=['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'acuity'],
groupby='stay_id',
use_start_time=True,
),
'vitalsign': TableConfig(
'Aperiodic vital signs',
stay_id_filter=True,
load=True,
index_columns=['rhythm'],
text_columns=['pain'],
value_columns=['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp'],
groupby='charttime',
time_columns='charttime',
),
'pyxis': TableConfig(
'Dispensation information for medications provided by the BD Pyxis MedStation',
stay_id_filter=True,
load=True,
index_columns=['med_rn', 'name', 'gsn_rn', 'gsn'],
groupby='charttime',
time_columns='charttime',
),
'diagnosis': TableConfig('Diagnosis', stay_id_filter=True, hadm_id_filter=False),
}
)
# MIMIC-CXR module:
mimic_cxr_tables = OrderedDict(
{
'mimic_cxr_2_0_0_metadata': TableConfig(
'Metadata',
study_id_filter=True,
load=True,
index_columns=[
'PerformedProcedureStepDescription',
'ViewPosition',
'ProcedureCodeSequence_CodeMeaning',
'ViewCodeSequence_CodeMeaning',
'PatientOrientationCodeSequence_CodeMeaning',
],
groupby='study_id',
),
'mimic_cxr_sectioned': TableConfig(
'Report sections',
mimic_cxr_sectioned=True,
subject_id_filter=False,
load=True,
groupby='study',
text_columns=['indication', 'history', 'comparison'],
target_sections=['findings', 'impression'],
),
'mimic_cxr_2_0_0_chexpert': TableConfig('CheXpert', study_id_filter=True),
'mimic_cxr_2_0_0_split': TableConfig('Split', study_id_filter=True),
}
)
|