data_processor.py 655 Bytes
Newer Older
wanglch's avatar
wanglch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import json

jsonl_file_path = '.../data/dataset_new.jsonl'
json_file_path = '../data/dataset_new.json'
data = []
with open(jsonl_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        jsonl_data = json.loads(line)
        json_data = {
            "instruction": jsonl_data.get("context").split('\n')[0].replace('Instruction: ', ''),
            "input": jsonl_data.get("context").split('\n')[1].replace('Input: ', ''),
            "output": jsonl_data.get("target")
        }
        data.append(json_data)

with open(json_file_path, 'w', encoding='utf-8') as file:
    json.dump(data, file, ensure_ascii=False, indent=4)

print(data)