Spaces:

HikariDawn
/

This-and-That

Sleeping

App Files Files Community

This-and-That / curation_pipeline /prepare_bridge_v2.py

HikariDawn777

feat: initial push

59b2a81 about 2 months ago

raw

history blame contribute delete

6.58 kB

	'''
	This repository is used to prepare Bridge dataset
	'''
	import os, sys, shutil



	def read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists, copyfile=True):
	# copyfile is True most of the time

	start_idx = 0
	target_lists = []
	prefix_len = len(dataset_path) + 1

	# Iterate all the folders inside
	for scene_name in sorted(os.listdir(dataset_path)):
	print("We are reading scene ", scene_name)
	scene_dir = os.path.join(dataset_path, scene_name)

	for task_name in sorted(os.listdir(scene_dir)):
	task_dir = os.path.join(scene_dir, task_name)

	for order_name in sorted(os.listdir(task_dir)):
	order_dir = os.path.join(task_dir, order_name)

	for time_clock in sorted(os.listdir(order_dir)):
	if time_clock == "lmdb":
	continue # Skip lmdb folder

	time_dir = os.path.join(order_dir, time_clock, "raw", "traj_group0")
	if not os.path.exists(time_dir):
	print("time_dir does not exist for ", time_dir)
	continue

	for traj_name in sorted(os.listdir(time_dir)):
	traj_path = os.path.join(time_dir, traj_name)
	if not os.path.isdir(traj_path):
	print("traj_path does not exist for ", traj_path)
	continue

	# Directly move policy_out_file_path; just in case there is also valuable information there
	policy_out_file_path = os.path.join(traj_path, "policy_out.pkl")
	if not os.path.exists(policy_out_file_path):
	continue

	# Check the lang txt file
	lang_txt_file_path = os.path.join(traj_path, "lang.txt")
	if not os.path.exists(lang_txt_file_path):
	continue


	for img_name in sorted(os.listdir(traj_path)):
	if img_name != "images0": # Only consider one camera angle
	continue

	img_folder_path = os.path.join(traj_path, img_name)
	if not os.path.isdir(img_folder_path):
	print("img_folder_path does not exist for ", img_folder_path)
	continue

	############################################ Main Process ####################################################

	# # First Sanity check (Make sure the input source is jpg good)
	# length = len(os.listdir(img_folder_path))
	# status = True
	# for check_idx in range(length):
	# if not os.path.exists(os.path.join(img_folder_path, 'im_' + str(check_idx) + '.jpg')): # Should be sequentially exists
	# status = False
	# break

	# Now we can copy the folder to our destination
	target_lists.append(img_folder_path)
	if copyfile:
	print("img_folder_path[prefix_len:] is ", img_folder_path[prefix_len:])
	if img_folder_path[prefix_len:] in test_dataset_lists:
	# Store to test set
	target_dir = os.path.join(test_store_path, str(start_idx))
	else:
	# This is training set
	target_dir = os.path.join(train_store_path, str(start_idx))

	# Now we can copy the folder to our destination
	print("Copy " + str(img_folder_path) + " to " + str(os.path.join(train_store_path, str(start_idx))))
	shutil.copytree(img_folder_path, target_dir)

	# Sanity check
	length = len(os.listdir(target_dir))
	status = True
	for check_idx in range(length):
	if not os.path.exists(os.path.join(target_dir, 'im_' + str(check_idx) + '.jpg' )): # Should be sequentially exists
	status = False
	break

	if not status:
	# If they didn't have sequential files we need, we will remove and begin again without updating start_idx
	print("This file cannot pass the sanity check. We will remove it!")
	shutil.rmtree(target_dir)
	continue

	# Move other auxilary files
	shutil.copy(policy_out_file_path, os.path.join(target_dir, "policy_out.pkl"))
	shutil.copy(lang_txt_file_path, os.path.join(target_dir, "lang.txt"))

	# Update the idx
	start_idx += 1

	print("We have ", start_idx)

	# Return a list of file path
	return target_lists



	if __name__ == "__main__":
	dataset_path = "/nfs/turbo/jjparkcv-turbo-large/boyangwa/raw/bridge_data_v2"
	train_store_path = "../sanity_check/bridge_v2_raw"
	test_store_path = "../sanity_check/bridge_v2_test_raw"
	test_dataset_predefined_path = "test_path_v2.txt"


	# Make dir if needed
	if os.path.exists(train_store_path):
	shutil.rmtree(train_store_path)
	os.makedirs(train_store_path)
	if os.path.exists(test_store_path):
	shutil.rmtree(test_store_path)
	os.makedirs(test_store_path)

	# Read Test dataset path
	test_dataset_lists = []
	read_file = open(test_dataset_predefined_path, "r")
	for line in read_file.readlines():
	test_dataset_lists.append(line[:-1])
	print("test_dataset_lists is ", test_dataset_lists)


	read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists)