Spaces:
Runtime error
Runtime error
File size: 2,925 Bytes
0b7b08a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import webdataset as wds
from groundingdino.demo.caption_grounder import caption_grounder
from tqdm import tqdm
import sys
import os
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all"
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all_ground"
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/ccs_synthetic_filtered_large"
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/ccs_synthetic_filtered_large_ground"
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full"
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/karpathy_coco_wds_full_ground"
# SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/vg_wds_full"
# DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/vg_wds_full_ground"
SOURCE_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/all_data_0620"
DEST_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/all_data_ground_0701"
def augment_wds(url, output, generator):
src = (
wds.WebDataset(url)
.decode("pilrgb")
.to_tuple("__key__", "jpg;png;jpeg", "txt")
)
with wds.TarWriter(output) as dst:
for key, image, caption in tqdm(src, total=10000):
# jpg txt json
# image = image.resize((224, 224))
logits, boxes = generator.ground_caption_raw(image_pil=image, caption=caption)
sample = {
"__key__": key,
"jpg": image,
"txt": caption,
"logits.pyd": logits,
"boxes.pyd": boxes,
}
dst.write(sample)
if __name__ == "__main__":
print("FROM", os.path.join(SOURCE_DIR, sys.argv[2]+".tar"))
print("TO", os.path.join(DEST_DIR, sys.argv[2]+".tar"))
# if os.path.exists(os.path.join(DEST_DIR, sys.argv[2]+".tar")):
# print("already done. exiting...")
# exit()
success = False
while not success:
try:
generator = caption_grounder(
config_file="/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.cfg.py",
checkpoint_path="/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal/GroundingDINO/checkpoints/groundingdino_swinb_cogcoor.pth",
cpu_only=False,
box_threshold=0.05,
)
success = True
except:
import random
import time
time.sleep(random.random() * 5)
augment_wds(
os.path.join(SOURCE_DIR, sys.argv[2]+".tar"),
os.path.join(DEST_DIR, sys.argv[2]+".tar"),
generator=generator,
)
print("DONE")
|