Spaces:
Runtime error
Runtime error
import os | |
import shutil | |
import glob | |
import random | |
from pprint import pprint | |
DIR_COCO_VG = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw" | |
DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/" | |
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all" | |
if __name__ == "__main__": | |
os.makedirs(OUT_DIR, exist_ok=True) | |
tars = [] | |
for i in range(10): | |
laion_part_tars = glob.glob(os.path.join(DIR, "laion_synthetic_filtered_large", f"part{i}", "*.tar")) | |
tars.extend(laion_part_tars) | |
print(len(tars)) | |
pprint(tars[:20]) | |
for i, tar in enumerate(tars): | |
dst = os.path.join(OUT_DIR, f"{str(i).zfill(6)}.tar") | |
# print(tar, dst) | |
os.symlink(tar, dst) | |