Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

compositional_test / multimodal /tools /prepare_vg_regional_box.py

chendl

Add application file

0b7b08a about 1 year ago

raw

history blame

4.76 kB

	import webdataset as wds
	import glob
	import os
	from tqdm import tqdm
	import orjson as json
	import itertools
	from PIL import Image
	import numpy as np
	from typing import List
	import cv2
	import random
	from tqdm.contrib.concurrent import process_map
	from copy import deepcopy

	class Generator():
	def __init__(self, dataset_name):
	self.dataset_name = dataset_name
	self.is_end = False


	class VisualGenomeGenerator(Generator):
	def __init__(self, root: str):
	super().__init__(dataset_name="vg")
	data = json.loads(open(os.path.join(root, "region_descriptions.json")).read())
	image_data = json.loads(open(os.path.join(root, "image_data.json")).read())
	self.image_id_to_filename = {}
	self.image_id_to_wh = {}
	for image in image_data:
	image_id = image["image_id"]
	subfolder, filename = image['url'].split("/")[-2:]
	self.image_id_to_filename[image_id] = os.path.join(root, subfolder, filename)
	self.image_id_to_wh[image_id] = (image["width"], image["height"])
	self.regions = []
	total = 0
	total_image = 0
	used_image = 0
	for xx in tqdm(data):
	total_image += 1
	flag = False
	for region in xx['regions']:
	total += 1
	region_w = int(region["width"])
	region_h = int(region["height"])
	x = int(region["x"])
	y = int(region["y"])
	image_w = self.image_id_to_wh[region["image_id"]][0]
	image_h = self.image_id_to_wh[region["image_id"]][1]
	region_w /= image_w
	region_h /= image_h
	x /= image_w
	y /= image_h
	if region_w * region_h < 1 / (16164):
	continue
	if " is" in region["phrase"] or " are" in region["phrase"]:
	continue
	region["norm_xywh"] = (x, y, region_w, region_h)
	self.regions.append(region)
	flag = True
	if flag:
	used_image += 1
	random.shuffle(self.regions)
	print("valid region", len(self.regions), total, len(self.regions) / total)
	print("valid image", used_image, total_image, used_image / total_image)

	def __len__(self):
	return len(self.regions)

	def __iter__(self):
	for region in self.regions:
	image_id = region["image_id"]
	phrase = region["phrase"]
	try:
	image = Image.open(self.image_id_to_filename[image_id])
	except:
	continue
	image = image.resize((224, 224))
	x, y, region_w, region_h = region["norm_xywh"]
	x1 = int(x * 224)
	y1 = int(y * 224)
	x2 = int(x1 + region_w * 224)
	y2 = int(y1 + region_h * 224)
	yield [self.dataset_name, image, phrase, np.array([x1, y1, x2, y2]), image_id]
	self.is_end = True


	def handle(args):
	dataset_name = "vg"
	iii, regions, image_id_to_filename = args
	if iii == 0:
	print(regions[:10])
	os.makedirs(os.path.join(OUT_DIR, str(iii)), exist_ok=True)
	with wds.ShardWriter(os.path.join(OUT_DIR, str(iii), "%06d.tar"), maxcount=8500) as sink:
	sink.verbose = False
	for i, region in enumerate(tqdm(regions, disable=(iii != 0))):
	image_id = region["image_id"]
	phrase = region["phrase"]
	image = Image.open(image_id_to_filename[image_id])
	image = image.resize((224, 224))
	x, y, region_w, region_h = region["norm_xywh"]
	x1 = int(x * 224)
	y1 = int(y * 224)
	x2 = int(x1 + region_w * 224)
	y2 = int(y1 + region_h * 224)
	dataset_name, image, caption, xyxy, image_id = [dataset_name, image, phrase, np.array([x1, y1, x2, y2]), image_id]
	sink.write({"__key__": f"{dataset_name}_{i}_containBox", "jpg": image, "txt": caption, "boxes.pyd": xyxy, "logits.pyd": xyxy})
	if i % 200 == 0 and iii == 0:
	tqdm.write(f"{caption} {xyxy}")


	if __name__ == "__main__":
	OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/vg_0826"
	os.makedirs(OUT_DIR, exist_ok=True)
	visual_genome_generator = VisualGenomeGenerator("/gpfs/u/home/LMCG/LMCGljnn/scratch/datasets/raw/vg")
	N_PROC = 150
	data_list = []
	for i in range(N_PROC):
	data_list.append([i, [], deepcopy(visual_genome_generator.image_id_to_filename)])
	for i, region in enumerate(visual_genome_generator.regions):
	data_list[i % N_PROC][1].append(region)
	process_map(handle, data_list, max_workers=N_PROC, disable=True)