Spaces:
Runtime error
Runtime error
File size: 2,492 Bytes
0b7b08a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import json
import os
from tqdm import tqdm
import webdataset as wds
from utils import MAXCOUNT, NAMING, check_sample
import numpy as np
PISC_ROOT = "/gpfs/u/home/LMCG/LMCGljnn/scratch/datasets/raw/PISC"
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/instruct/eval/pisc"
rel_id_to_type = ["friends", "family", "couple", "professional", "commercial", "no relation"]
if __name__ == "__main__":
os.makedirs(OUT_DIR, exist_ok=True)
annotation_image_info = json.load(open(os.path.join(PISC_ROOT, "annotation_image_info.json")))
relationships = json.load(open(os.path.join(PISC_ROOT, "relationship.json")))
relationship_trainidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_trainidx.json")))
relationship_testidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_testidx.json")))
data = {}
uuid = 0
with wds.ShardWriter(os.path.join(OUT_DIR, NAMING), maxcount=MAXCOUNT**3) as sink:
for annotation in tqdm(annotation_image_info):
imgH = annotation["imgH"]
imgW = annotation["imgW"]
id = annotation["id"]
bbox = annotation["bbox"] # xyxy
if str(id) not in relationships:
tqdm.write(f"skip {id} due to not in relationships")
continue
if str(id) not in relationship_testidx:
tqdm.write(f"skip {id} due to not in train set")
continue
relationship = relationships[str(id)]
for rel in relationship:
type = rel_id_to_type[relationship[rel] - 1]
A_id, B_id = list(map(int, rel.split(" ")))
A_box = np.array(bbox[A_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float)
B_box = np.array(bbox[B_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float)
data = [A_box, B_box, type]
image_path = os.path.join(PISC_ROOT, "image", str(id).zfill(5)+".jpg")
dataset = "pisc_relation_split"
key = f"{dataset}_{id}_{uuid}"
uuid += 1
assert os.path.exists(image_path)
sample = {
"__key__": key,
"image_path.txt": image_path,
"dataset.txt": dataset,
"data.pyd": data,
}
check_sample(sample)
sink.write(sample)
|