# -*- coding: utf-8 -*-
import base64
import os
from magic_pdf.tools.ofd import OFD
from loguru import logger

import configparser
from magic_pdf.parse.pdf_client import ocrPdfClient
import html
import requests




def decode_html_entities(text):
    # 将 HTML 实体转换为相应的字符
    return html.unescape(text)

def json_to_txt(json_data):
    txt_lines = []

    def parse_dict(d, indent=0):
        for key, value in d.items():
            if isinstance(value, dict):
                txt_lines.append(f"{' ' * indent}{key}:")
                parse_dict(value, indent + 2)
            elif isinstance(value, list):
                txt_lines.append(f"{' ' * indent}{key}:")
                parse_list(value, indent + 2)
            else:
                txt_lines.append(f"{' ' * indent}{key}: {value}")

    def parse_list(lst, indent=0):
        for i, item in enumerate(lst):
            if isinstance(item, dict):
                txt_lines.append(f"{' ' * indent}- Item {i + 1}:")
                parse_dict(item, indent + 2)
            elif isinstance(item, list):
                txt_lines.append(f"{' ' * indent}- List {i + 1}:")
                parse_list(item, indent + 2)
            else:
                txt_lines.append(f"{' ' * indent}- {item}")

    # Start parsing JSON data
    if isinstance(json_data, dict):
        parse_dict(json_data)
    elif isinstance(json_data, list):
        parse_list(json_data)
    else:
        txt_lines.append(str(json_data))

    return "\n".join(txt_lines)


def ofd2pdf(file_path,output_dir,pdfbytes):
    """
    ofd2pdf
    ofd2img
    """
    file_prefix = os.path.splitext(os.path.split(file_path)[1])[0]
    # logger.info(f'file_prefix:{file_prefix}')
    # logger.info(f'file_path:{file_path}')
    with open(file_path, "rb") as f:
        ofdb64 = str(base64.b64encode(f.read()), "utf-8")
    ofd = OFD()  # 初始化OFD 工具类
    file_outpath = os.path.join(output_dir, file_prefix)
    # logger.info(f'file_outpath:{file_outpath}')

    # ofd.read(ofdb64, save_xml=False, xml_name=f"{file_outpath}_xml")  # 读取ofdb64
    # pdf_bytes = ofd.to_pdf()  # 转pdf
    ofd.del_data()

    with open(f"{file_outpath}.pdf", "wb") as f:
        f.write(pdfbytes)
    return f"{file_outpath}.pdf"

def ofd2img(file_path,output_dir):

    file_prefix = os.path.splitext(os.path.split(file_path)[1])[0]
    output_file = os.path.join(output_dir,file_prefix)
    with open(file_path, "rb") as f:
        ofdb64 = str(base64.b64encode(f.read()), "utf-8")
    ofd = OFD()  # 初始化OFD 工具类
    ofd.read(ofdb64, save_xml=False, xml_name=f"{output_file}_xml")  # 读取ofdb64
    img_np,pdfbytes = ofd.to_jpg()  # 转图片
    ofd.del_data()
    output_files = []

    for idx, img in enumerate(img_np):
        # im = Image.fromarray(img)
        img.save(f"{output_file}_{idx}.jpg")
        output_files.append(f'{output_file}_{idx}.jpg')

    return output_files,pdfbytes


class ocrOfdClient:
    def __init__(self, api_url):
        self.api_url = api_url

    def check_health(self):
        health_check_url = f'{self.api_url}/health'
        try:
            response = requests.get(health_check_url)
            if response.status_code == 200:
                logger.info("ofd Server is healthy and ready to process requests.")
                return True
            else:
                logger.error(f'ofd Server health check failed with status code:{response.status_code}')
                return False
        except requests.exceptions.RequestException as e:
            logger.error(f'ofd Health check request failed:{e}')
            return False

    def parse_ofd(self,config_path,file_path,output_dir):
        # 构造请求数据
        data = {
            "path": str(file_path),
            "output_dir": str(output_dir),
            "config_path": str(config_path),
        }
        # 发送 POST 请求
        #logger.info(f'data:{data}')
        response = requests.post(f"{self.api_url}/ofd_ocr", json=data)

        # 处理响应
        if response.status_code == 200:
            result = response.json()
            #logger.info(f"文件解析成功，输出路径：{result['output_path']}")
            return result['output_path']
        else:
            logger.error(f"文件解析失败，错误信息：{response.json()}")

