import json
import re

def is_function_call(text):
    """
    判断文本是否符合函数调用列表格式：
    [Func1(arg="val"), Func2(...)]
    """
    text = text.strip()
    if not (text.startswith('[') and text.endswith(']')):
        return False

    inner = text[1:-1].strip()
    pattern = re.compile(r'([^\(\)]+)\(([^()]*)\)')
    matches = pattern.findall(inner)
    if not matches:
        return False

    replaced = pattern.sub('', inner)
    replaced = replaced.replace(',', '').replace(' ', '')
    return replaced == ''

def parse_function_call_list(text):
    """
    解析函数调用字符串为JSON数组格式
    示例：
    [SEC Filings(identifier="AAPL"), United States Away from Home Mobility API(string="2025-05-17")]
    转换成：
    [
      {"name": "SEC Filings", "arguments": {"identifier": "AAPL"}},
      {"name": "United States Away from Home Mobility API", "arguments": {"string": "2025-05-17"}}
    ]
    """
    inner = text.strip()[1:-1].strip()
    pattern = re.compile(r'([^\(\)]+)\((.*?)\)')
    matches = pattern.findall(inner)

    functions = []
    for func_name, args_str in matches:
        func_name = func_name.strip()

        args = {}
        if args_str.strip():
            # 支持多个参数，形如 key="value", key2="value2"
            parts = re.split(r',\s*(?=\w+=)', args_str)
            for part in parts:
                key_val = part.split('=', 1)
                if len(key_val) == 2:
                    key = key_val[0].strip()
                    val = key_val[1].strip()
                    if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
                        val = val[1:-1]
                    args[key] = val

        functions.append({
            "name": func_name,
            "arguments": args
        })

    return functions

def convert_conversation(conversations):
    converted = []

    for message in conversations:
        role = message.get("from", "")
        value = message.get("value", "")

        if role == "user":
            converted.append({
                "from": "human",
                "value": value
            })

        elif role == "assistant":
            # 判断是否函数调用
            if is_function_call(value):
                parsed_funcs = parse_function_call_list(value)
                converted.append({
                    "from": "function_call",
                    "value": json.dumps(parsed_funcs, ensure_ascii=False)
                })
            else:
                converted.append({
                    "from": "gpt",
                    "value": value
                })

        elif role == "tool":
            converted.append({
                "from": "observation",
                "value": value
            })

    return converted

def transform_data(data):
    """
    对整体data进行转换，保留system，转换conversations
    """
    result = []
    for item in data:
        system_text = item.get("system", "")
        conversations = item.get("conversations", [])

        converted_conversations = convert_conversation(conversations)

        result.append({
            "system": system_text,
            "conversations": converted_conversations
        })

    return result

def main(input_file, output_file):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    transformed = transform_data(data)

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(transformed, f, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    from argparse import ArgumentParser
    
    parser = ArgumentParser()
    
    parser.add_argument("--input_file", type=str)

    parser.add_argument("--output_file", type=str)
    
    args = parser.parse_args()

    main(args.input_file, args.output_file)


