import os import json import argparse # 配置数据 parse = argparse.ArgumentParser() parse.add_argument('--data_path', default='./data/AdvertiseGen') args = parse.parse_args() # 默认保存路径 save_root_path = os.path.join(args.data_path, 'saves') if not os.path.exists(save_root_path): os.mkdir(save_root_path) def save_to_jsonl(train_infos, save_path): '''将json数据保存到.jsonl文件中''' with open(save_path, 'w', encoding='utf-8') as file: for info in train_infos: file.write(json.dumps(info, ensure_ascii=False)+'\n') file.close() def load_json_infos(file_path): '''读取json数据''' all_data = [] with open(file_path, 'r', encoding='utf-8') as ofile: for info in ofile.readlines(): json_info = json.loads(info) output = {"messages": []} content = {"role": "user", "content": json_info.get("content")} summary = {"role": "assistant", "content": json_info.get("summary")} output["messages"].extend([content, summary]) all_data.append(output) save_file_path = os.path.join(save_root_path, os.path.basename(file_path)+'l') save_to_jsonl(all_data, save_file_path) if __name__ == "__main__": files = ['train.json', 'dev.json'] for file in files: file_path = os.path.join(args.data_path, file) output = load_json_infos(file_path)