Spaces:
Runtime error
Runtime error
import json | |
import os | |
from tqdm import tqdm | |
import webdataset as wds | |
from utils import MAXCOUNT, NAMING, check_sample | |
import numpy as np | |
PISC_ROOT = "/gpfs/u/home/LMCG/LMCGljnn/scratch/datasets/raw/PISC" | |
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/instruct/eval/pisc" | |
rel_id_to_type = ["friends", "family", "couple", "professional", "commercial", "no relation"] | |
if __name__ == "__main__": | |
os.makedirs(OUT_DIR, exist_ok=True) | |
annotation_image_info = json.load(open(os.path.join(PISC_ROOT, "annotation_image_info.json"))) | |
relationships = json.load(open(os.path.join(PISC_ROOT, "relationship.json"))) | |
relationship_trainidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_trainidx.json"))) | |
relationship_testidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_testidx.json"))) | |
data = {} | |
uuid = 0 | |
with wds.ShardWriter(os.path.join(OUT_DIR, NAMING), maxcount=MAXCOUNT**3) as sink: | |
for annotation in tqdm(annotation_image_info): | |
imgH = annotation["imgH"] | |
imgW = annotation["imgW"] | |
id = annotation["id"] | |
bbox = annotation["bbox"] # xyxy | |
if str(id) not in relationships: | |
tqdm.write(f"skip {id} due to not in relationships") | |
continue | |
if str(id) not in relationship_testidx: | |
tqdm.write(f"skip {id} due to not in train set") | |
continue | |
relationship = relationships[str(id)] | |
for rel in relationship: | |
type = rel_id_to_type[relationship[rel] - 1] | |
A_id, B_id = list(map(int, rel.split(" "))) | |
A_box = np.array(bbox[A_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float) | |
B_box = np.array(bbox[B_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float) | |
data = [A_box, B_box, type] | |
image_path = os.path.join(PISC_ROOT, "image", str(id).zfill(5)+".jpg") | |
dataset = "pisc_relation_split" | |
key = f"{dataset}_{id}_{uuid}" | |
uuid += 1 | |
assert os.path.exists(image_path) | |
sample = { | |
"__key__": key, | |
"image_path.txt": image_path, | |
"dataset.txt": dataset, | |
"data.pyd": data, | |
} | |
check_sample(sample) | |
sink.write(sample) | |