Spaces:
Runtime error
Runtime error
import webdataset as wds | |
from groundingdino.demo.caption_grounder import caption_grounder | |
from tqdm import tqdm | |
import sys | |
import os | |
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all" | |
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all_ground" | |
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/ccs_synthetic_filtered_large" | |
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/ccs_synthetic_filtered_large_ground" | |
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full" | |
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full_ground" | |
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/vg_wds_full" | |
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/vg_wds_full_ground" | |
SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/all_data_0620" | |
DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/all_data_ground_0701" | |
def augment_wds(url, output, generator): | |
src = ( | |
wds.WebDataset(url) | |
.decode("pilrgb") | |
.to_tuple("__key__", "jpg;png;jpeg", "txt") | |
) | |
with wds.TarWriter(output) as dst: | |
for key, image, caption in tqdm(src, total=10000): | |
# jpg txt json | |
# image = image.resize((224, 224)) | |
logits, boxes = generator.ground_caption_raw(image_pil=image, caption=caption) | |
sample = { | |
"__key__": key, | |
"jpg": image, | |
"txt": caption, | |
"logits.pyd": logits, | |
"boxes.pyd": boxes, | |
} | |
dst.write(sample) | |
if __name__ == "__main__": | |
print("FROM", os.path.join(SOURCE_DIR, sys.argv[2]+".tar")) | |
print("TO", os.path.join(DEST_DIR, sys.argv[2]+".tar")) | |
# if os.path.exists(os.path.join(DEST_DIR, sys.argv[2]+".tar")): | |
# print("already done. exiting...") | |
# exit() | |
success = False | |
while not success: | |
try: | |
generator = caption_grounder( | |
config_file="/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.cfg.py", | |
checkpoint_path="/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal/GroundingDINO/checkpoints/groundingdino_swinb_cogcoor.pth", | |
cpu_only=False, | |
box_threshold=0.05, | |
) | |
success = True | |
except: | |
import random | |
import time | |
time.sleep(random.random() * 5) | |
augment_wds( | |
os.path.join(SOURCE_DIR, sys.argv[2]+".tar"), | |
os.path.join(DEST_DIR, sys.argv[2]+".tar"), | |
generator=generator, | |
) | |
print("DONE") | |