File size: 2,492 Bytes
0b7b08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import json
import os
from tqdm import tqdm
import webdataset as wds
from utils import MAXCOUNT, NAMING, check_sample
import numpy as np
PISC_ROOT = "/gpfs/u/home/LMCG/LMCGljnn/scratch/datasets/raw/PISC"
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/instruct/eval/pisc"

rel_id_to_type = ["friends", "family", "couple", "professional", "commercial", "no relation"]

if __name__ == "__main__":
    os.makedirs(OUT_DIR, exist_ok=True)
    annotation_image_info = json.load(open(os.path.join(PISC_ROOT, "annotation_image_info.json")))
    relationships = json.load(open(os.path.join(PISC_ROOT, "relationship.json")))
    relationship_trainidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_trainidx.json")))
    relationship_testidx = json.load(open(os.path.join(PISC_ROOT, "relationship_split", "relation_testidx.json")))
    data = {}
    uuid = 0
    with wds.ShardWriter(os.path.join(OUT_DIR, NAMING), maxcount=MAXCOUNT**3) as sink:
        for annotation in tqdm(annotation_image_info):
            imgH = annotation["imgH"]
            imgW = annotation["imgW"]
            id = annotation["id"]
            bbox = annotation["bbox"] # xyxy
            if str(id) not in relationships:
                tqdm.write(f"skip {id} due to not in relationships")
                continue
            if str(id) not in relationship_testidx:
                tqdm.write(f"skip {id} due to not in train set")
                continue
            relationship = relationships[str(id)]
            for rel in relationship:
                type = rel_id_to_type[relationship[rel] - 1]
                A_id, B_id = list(map(int, rel.split(" ")))
                A_box = np.array(bbox[A_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float)
                B_box = np.array(bbox[B_id - 1]).astype(float) / np.array([imgW, imgH, imgW, imgH]).astype(float)
                data = [A_box, B_box, type]
                image_path = os.path.join(PISC_ROOT, "image", str(id).zfill(5)+".jpg")
                dataset = "pisc_relation_split"
                key = f"{dataset}_{id}_{uuid}"
                uuid += 1
                assert os.path.exists(image_path)
                sample = {
                    "__key__": key,
                    "image_path.txt": image_path,
                    "dataset.txt": dataset,
                    "data.pyd": data,
                }
                check_sample(sample)
                sink.write(sample)