Spaces:
Running
Running
import os | |
import logging | |
import warnings | |
from minigpt4.common.registry import registry | |
from minigpt4.datasets.builders.pdb_base_dataset_builder import PDB_BaseDatasetBuilder | |
from minigpt4.datasets.datasets.pdb_dataset import ESMDataset | |
class PDBBuilder(PDB_BaseDatasetBuilder): | |
train_dataset_cls = ESMDataset | |
DATASET_CONFIG_DICT = { | |
"default": "configs/datasets/pdb/pdb.yaml", | |
} | |
def build_datasets(self): | |
# at this point, all the annotations and image/videos should be all downloaded to the specified locations. | |
logging.info("Building datasets...") | |
self.build_processors() | |
build_info = self.config.build_info | |
storage_path = build_info.storage | |
datasets = dict() | |
if not os.path.exists(storage_path): | |
warnings.warn("storage path {} does not exist.".format(storage_path)) | |
# create datasets | |
dataset_cls = self.train_dataset_cls | |
datasets['train'] = dataset_cls( | |
text_processor=self.text_processors["train"], | |
ann_paths=[os.path.join(storage_path, 'filter_cap.json')], | |
pdb_root=os.path.join(storage_path, 'pdb'), | |
) | |
return datasets | |