# 转换为 ChatML 格式 import os import shutil import json input_dir = "data/AdvertiseGen" output_dir = "data/AdvertiseGenChatML" if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) for fn in ["train.json", "dev.json"]: data_out_list = [] with open(os.path.join(input_dir, fn), "r") as f, open(os.path.join(output_dir, fn), "w") as fo: for line in f: if len(line.strip()) > 0: data = json.loads(line) data_out = { "messages": [ { "role": "user", "content": data["content"], }, { "role": "assistant", "content": data["summary"], }, ] } data_out_list.append(data_out) json.dump(data_out_list, fo, ensure_ascii=False, indent=4)