File size: 1,497 Bytes
1fb65ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import json
from argparse import ArgumentParser

from generate_txt_dataset import DELIMITER_0, DELIMITER_1, STOP


def main(input_path: str, output_path: str):
    with open(input_path) as f:
        prompts = [json.loads(l) for l in f]

    with open(output_path, "w") as f:
        for prompt in prompts:
            prompt_for_gpt = {
                "prompt": f"{prompt['input']}{DELIMITER_0}",
                "completion": f"{prompt['edit']}{DELIMITER_1}{prompt['output']}{STOP}",
            }
            f.write(f"{json.dumps(prompt_for_gpt)}\n")


def main_classify(input_path: str, output_path: str):
    with open(input_path) as f:
        prompts = [json.loads(l) for l in f]

    with open(output_path, "w") as f:
        for prompt in prompts:
            prompt_for_gpt = {
                "prompt": f"{prompt['edit']}{DELIMITER_0}",
                "completion": f"{prompt['class']}{STOP}",
            }
            f.write(f"{json.dumps(prompt_for_gpt)}\n")


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--input-path", required=False, type=str, default="/mlx/users/peng.wang/playground/data/chat_edit/assets/test200/edit_instructions_v0.jsonl")
    parser.add_argument("--output-path", required=False, type=str, default="/mlx/users/peng.wang/playground/data/chat_edit/assets/test200/edit_class_for_gpt.jsonl")
    args = parser.parse_args()
    # main(args.input_path, args.output_path)
    main_classify(args.input_path, args.output_path)