Spaces:
Sleeping
Sleeping
''' | |
This repository is used to prepare Bridge dataset | |
''' | |
import os, sys, shutil | |
def read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists, copyfile=True): | |
# copyfile is True most of the time | |
start_idx = 0 | |
target_lists = [] | |
prefix_len = len(dataset_path) + 1 | |
# Iterate all the folders inside | |
for scene_name in sorted(os.listdir(dataset_path)): | |
print("We are reading scene ", scene_name) | |
scene_dir = os.path.join(dataset_path, scene_name) | |
for task_name in sorted(os.listdir(scene_dir)): | |
task_dir = os.path.join(scene_dir, task_name) | |
for order_name in sorted(os.listdir(task_dir)): | |
order_dir = os.path.join(task_dir, order_name) | |
for time_clock in sorted(os.listdir(order_dir)): | |
if time_clock == "lmdb": | |
continue # Skip lmdb folder | |
time_dir = os.path.join(order_dir, time_clock, "raw", "traj_group0") | |
if not os.path.exists(time_dir): | |
print("time_dir does not exist for ", time_dir) | |
continue | |
for traj_name in sorted(os.listdir(time_dir)): | |
traj_path = os.path.join(time_dir, traj_name) | |
if not os.path.isdir(traj_path): | |
print("traj_path does not exist for ", traj_path) | |
continue | |
# Directly move policy_out_file_path; just in case there is also valuable information there | |
policy_out_file_path = os.path.join(traj_path, "policy_out.pkl") | |
if not os.path.exists(policy_out_file_path): | |
continue | |
# Check the lang txt file | |
lang_txt_file_path = os.path.join(traj_path, "lang.txt") | |
if not os.path.exists(lang_txt_file_path): | |
continue | |
for img_name in sorted(os.listdir(traj_path)): | |
if img_name != "images0": # Only consider one camera angle | |
continue | |
img_folder_path = os.path.join(traj_path, img_name) | |
if not os.path.isdir(img_folder_path): | |
print("img_folder_path does not exist for ", img_folder_path) | |
continue | |
############################################ Main Process #################################################### | |
# # First Sanity check (Make sure the input source is jpg good) | |
# length = len(os.listdir(img_folder_path)) | |
# status = True | |
# for check_idx in range(length): | |
# if not os.path.exists(os.path.join(img_folder_path, 'im_' + str(check_idx) + '.jpg')): # Should be sequentially exists | |
# status = False | |
# break | |
# Now we can copy the folder to our destination | |
target_lists.append(img_folder_path) | |
if copyfile: | |
print("img_folder_path[prefix_len:] is ", img_folder_path[prefix_len:]) | |
if img_folder_path[prefix_len:] in test_dataset_lists: | |
# Store to test set | |
target_dir = os.path.join(test_store_path, str(start_idx)) | |
else: | |
# This is training set | |
target_dir = os.path.join(train_store_path, str(start_idx)) | |
# Now we can copy the folder to our destination | |
print("Copy " + str(img_folder_path) + " to " + str(os.path.join(train_store_path, str(start_idx)))) | |
shutil.copytree(img_folder_path, target_dir) | |
# Sanity check | |
length = len(os.listdir(target_dir)) | |
status = True | |
for check_idx in range(length): | |
if not os.path.exists(os.path.join(target_dir, 'im_' + str(check_idx) + '.jpg' )): # Should be sequentially exists | |
status = False | |
break | |
if not status: | |
# If they didn't have sequential files we need, we will remove and begin again without updating start_idx | |
print("This file cannot pass the sanity check. We will remove it!") | |
shutil.rmtree(target_dir) | |
continue | |
# Move other auxilary files | |
shutil.copy(policy_out_file_path, os.path.join(target_dir, "policy_out.pkl")) | |
shutil.copy(lang_txt_file_path, os.path.join(target_dir, "lang.txt")) | |
# Update the idx | |
start_idx += 1 | |
print("We have ", start_idx) | |
# Return a list of file path | |
return target_lists | |
if __name__ == "__main__": | |
dataset_path = "/nfs/turbo/jjparkcv-turbo-large/boyangwa/raw/bridge_data_v2" | |
train_store_path = "../sanity_check/bridge_v2_raw" | |
test_store_path = "../sanity_check/bridge_v2_test_raw" | |
test_dataset_predefined_path = "test_path_v2.txt" | |
# Make dir if needed | |
if os.path.exists(train_store_path): | |
shutil.rmtree(train_store_path) | |
os.makedirs(train_store_path) | |
if os.path.exists(test_store_path): | |
shutil.rmtree(test_store_path) | |
os.makedirs(test_store_path) | |
# Read Test dataset path | |
test_dataset_lists = [] | |
read_file = open(test_dataset_predefined_path, "r") | |
for line in read_file.readlines(): | |
test_dataset_lists.append(line[:-1]) | |
print("test_dataset_lists is ", test_dataset_lists) | |
read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists) | |