annotation / prepare_for_gpt.py
MudeHui's picture
Add application file
1fb65ae
import json
from argparse import ArgumentParser
from generate_txt_dataset import DELIMITER_0, DELIMITER_1, STOP
def main(input_path: str, output_path: str):
with open(input_path) as f:
prompts = [json.loads(l) for l in f]
with open(output_path, "w") as f:
for prompt in prompts:
prompt_for_gpt = {
"prompt": f"{prompt['input']}{DELIMITER_0}",
"completion": f"{prompt['edit']}{DELIMITER_1}{prompt['output']}{STOP}",
}
f.write(f"{json.dumps(prompt_for_gpt)}\n")
def main_classify(input_path: str, output_path: str):
with open(input_path) as f:
prompts = [json.loads(l) for l in f]
with open(output_path, "w") as f:
for prompt in prompts:
prompt_for_gpt = {
"prompt": f"{prompt['edit']}{DELIMITER_0}",
"completion": f"{prompt['class']}{STOP}",
}
f.write(f"{json.dumps(prompt_for_gpt)}\n")
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--input-path", required=False, type=str, default="/mlx/users/peng.wang/playground/data/chat_edit/assets/test200/edit_instructions_v0.jsonl")
parser.add_argument("--output-path", required=False, type=str, default="/mlx/users/peng.wang/playground/data/chat_edit/assets/test200/edit_class_for_gpt.jsonl")
args = parser.parse_args()
# main(args.input_path, args.output_path)
main_classify(args.input_path, args.output_path)