|
|
|
from tqdm import tqdm |
|
import argparse |
|
import json |
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
'--input', |
|
type=str, |
|
required=True, |
|
help="Input path of text-to-image Jsonl annotation file." |
|
) |
|
return parser.parse_args() |
|
|
|
if __name__ == "__main__": |
|
args = parse_args() |
|
|
|
t2i_record = dict() |
|
|
|
with open(args.input, "r", encoding="utf-8") as fin: |
|
for line in tqdm(fin): |
|
obj = json.loads(line.strip()) |
|
text_id = obj['text_id'] |
|
image_ids = obj['image_ids'] |
|
for image_id in image_ids: |
|
if image_id not in t2i_record: |
|
t2i_record[image_id] = [] |
|
t2i_record[image_id].append(text_id) |
|
|
|
with open(args.input.replace(".jsonl", "") + ".tr.jsonl", "w", encoding="utf-8") as fout: |
|
for image_id, text_ids in t2i_record.items(): |
|
out_obj = {"image_id": image_id, "text_ids": text_ids} |
|
fout.write("{}\n".format(json.dumps(out_obj))) |
|
|
|
print("Done!") |