datasets.py 848 Bytes
Newer Older
yuguo960516's avatar
yuguo960516 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os


def convert_txt2json(file_path):
    """Store the dataset in loose JSON format file, you can refer:
    https://libai.readthedocs.io/en/latest/tutorials/basics/Preprocessing_Dataset.html
    """
    filename, ext = os.path.splitext(file_path)
    filename = filename.split("/")[-1]

    with open(file_path) as f:
        lines = f.readlines()
        print(len(lines))

    target_file = "/home/xiezipeng/libai/projects/MagicPrompt/" + filename + "_magicprompy.txt"
    with open(target_file, "w", encoding="utf-8") as f:
        for line in lines:
            line = "{" + '"' + "text" + '"' + ": " + '"' + line.strip() + '"' + "}" + "\n"
            f.write(line)
    os.rename(target_file, target_file[:-4] + ".json")


if __name__ == "__main__":
    convert_txt2json("/path/to/test.txt")
    convert_txt2json("/path/to/train.txt")