aehrc
/

cxrmate-ed

anicolson commited on 1 day ago

Commit

cea5efc

•

1 Parent(s): 95a9c50

Upload model

Files changed (4) hide show

config.json CHANGED Viewed

@@ -151,7 +151,7 @@
     "max_length": 20,
     "min_length": 0,
     "mlp_ratio": 4,
-    "model_type": "uniformer",
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,

     "max_length": 20,
     "min_length": 0,
     "mlp_ratio": 4,
+    "model_type": "vit",
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,

configuration_uniformer.py CHANGED Viewed

@@ -6,7 +6,7 @@ logger = logging.get_logger(__name__)
 class UniFormerWithProjectionHeadConfig(PretrainedConfig):
-    model_type = 'uniformer'
     def __init__(
             self,

 class UniFormerWithProjectionHeadConfig(PretrainedConfig):
+    model_type = 'vit'
     def __init__(
             self,

modelling_cxrmate_ed.py CHANGED Viewed

@@ -948,7 +948,10 @@ class MIMICIVEDCXRMultimodalModel(VisionEncoderDecoderModel):
         return position_ids
-    def get_dataset(self, dataset_path, train_transforms, test_transforms, max_train_images_per_study, study_id_split='mimic_iv_ed_mimic_cxr_jpg', test_set_only=False):
         def train_set_transform(batch):
@@ -1051,7 +1054,10 @@ class MIMICIVEDCXRMultimodalModel(VisionEncoderDecoderModel):
         test_set.set_transform(test_set_transform)
         test_set = Subset(test_set, indices)
-        return train_set, val_set, test_set
     def get_stage_1_dataset(self, dataset_path, train_transforms, test_transforms, max_train_images_per_study):

         return position_ids
+    def get_dataset(self, dataset_path, train_transforms=None, test_transforms=None, max_train_images_per_study=None, study_id_split='mimic_iv_ed_mimic_cxr_jpg', test_set_only=False):
+        assert max_train_images_per_study is not None, 'max_train_images_per_study must be defined.'
+        assert test_transforms is not None, 'test_transforms must be defined.'
         def train_set_transform(batch):
         test_set.set_transform(test_set_transform)
         test_set = Subset(test_set, indices)
+        if not test_set_only:
+            return train_set, val_set, test_set
+        else:
+            return test_set
     def get_stage_1_dataset(self, dataset_path, train_transforms, test_transforms, max_train_images_per_study):

prepare_dataset.py CHANGED Viewed

@@ -529,26 +529,26 @@ def prepare_dataset(physionet_dir, database_dir, num_workers=None):
                 lines=False,
             )
-    #     dataset_dict[split] = datasets.Dataset.from_pandas(df)
-    #     cache_dir = os.path.join(database_dir, '.cache')
-    #     Path(cache_dir).mkdir(parents=True, exist_ok=True)
-    #     dataset_dict[split] = dataset_dict[split].map(
-    #         load_image,
-    #         num_proc=num_workers,
-    #         writer_batch_size=8,
-    #         batched=True,
-    #         batch_size=8,
-    #         keep_in_memory=False,
-    #         cache_file_name=os.path.join(cache_dir, f'.{split}'),
-    #         load_from_cache_file=False,
-    #     )
-    #     dataset_dict[split].cleanup_cache_files()
-    #     shutil.rmtree(cache_dir)
-    # dataset = datasets.DatasetDict(dataset_dict)
-    # dataset.save_to_disk(os.path.join(database_dir, 'mimic_iv_ed_mimic_cxr_jpg_dataset'))
-    # con.close()
 if __name__ == "__main__":

                 lines=False,
             )
+        dataset_dict[split] = datasets.Dataset.from_pandas(df)
+        cache_dir = os.path.join(database_dir, '.cache')
+        Path(cache_dir).mkdir(parents=True, exist_ok=True)
+        dataset_dict[split] = dataset_dict[split].map(
+            load_image,
+            num_proc=num_workers,
+            writer_batch_size=8,
+            batched=True,
+            batch_size=8,
+            keep_in_memory=False,
+            cache_file_name=os.path.join(cache_dir, f'.{split}'),
+            load_from_cache_file=False,
+        )
+        dataset_dict[split].cleanup_cache_files()
+        shutil.rmtree(cache_dir)
+    dataset = datasets.DatasetDict(dataset_dict)
+    dataset.save_to_disk(os.path.join(database_dir, 'mimic_iv_ed_mimic_cxr_jpg_dataset'))
+    con.close()
 if __name__ == "__main__":