import os
from pathlib import Path

import click
from loguru import logger
from typing import List
from fastapi import FastAPI, HTTPException, Request
import magic_pdf.model as model_config
from magic_pdf.libs.version import __version__
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from magic_pdf.tools.common import do_parse, parse_pdf_methods
from argparse import ArgumentParser
from pydantic import BaseModel
import uvicorn
import time
import configparser
#from magic_pdf.tools.config import update_config

app = FastAPI()
method = 'auto'

logger.add("parse.log", rotation="10 MB", level="INFO",
           format="{time} {level} {message}", encoding='utf-8', enqueue=True)
config_path = None
class ocrRequest(BaseModel):
    path: str
    output_dir: str

class ocrResponse(BaseModel):
    status_code: int
    output_path: str


def parse_args():
    parser = ArgumentParser()
    parser.add_argument(
        '--dcu_id',
        default='0',
        help='设置DCU')
    parser.add_argument(
        '--method',
        type=parse_pdf_methods,
        help = """the method for parsing pdf.
        ocr: using ocr technique to extract information from pdf.
        txt: suitable for the text-based pdf only and outperform ocr.
        auto: automatically choose the best method for parsing pdf from ocr and txt.
        without method specified, auto will be used by default.""",
        default = 'auto',
        )
    parser.add_argument(
        '--debug',
        type=bool,
        help='Enables detailed debugging information during the execution of the CLI commands.',
        default=False,
    )
    parser.add_argument(
        '--config_path',
        default='/home/practice/magic_pdf-main/magic_pdf/config.ini')

    args = parser.parse_args()
    return args

def ocr_pdf_serve(args: str):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.dcu_id
    config = configparser.ConfigParser()
    config.read(args.config_path)
    host = config.get('server', 'pdf_host')
    port = int(config.get('server', 'pdf_port'))
    global config_path
    config_path = args.config_path
    uvicorn.run(app, host=host, port=port)

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

@app.post("/pdf_ocr")
# def cli(path, output_dir, method, debug_able, start_page_id, end_page_id):
async def pdf_ocr(request: ocrRequest):
    model_config.__use_inside_model__ = True
    model_config.__model_mode__ = 'full'
    output_dir = request.output_dir
    path = request.path
    #config_path = request.config_path
    os.makedirs(output_dir, exist_ok=True)
    debug_able = False
    start_page_id = 0
    end_page_id = None
    logger.info(f"method: {method}, path: {path}, output_dir: {output_dir}, config_path: {config_path}")

    def read_fn(path):
        disk_rw = DiskReaderWriter(os.path.dirname(path))
        return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)

    def parse_doc(doc_path: str, config_path: str):
        try:
            file_name = str(Path(doc_path).stem)
            pdf_data = read_fn(doc_path)
            output_path = do_parse(
                config_path,
                output_dir,
                file_name,
                pdf_data,
                [],
                method,
                debug_able,
                start_page_id=start_page_id,
                end_page_id=end_page_id,
            )

            logger.info(f'文件解析成功：{output_path}')
            return output_path

        except Exception as e:
            logger.exception(e)

    logger.info(f'config_path：{config_path}')

    output_path = parse_doc(path,config_path)
    if output_path:
        logger.info(f'文件解析成功：{output_path}')
        return {"status_code": 200, "output_path": output_path}
    else:
        logger.error(f'文件解析失败，文件为：{path}')
        raise HTTPException(status_code=500)

def main():
    args = parse_args()
    ocr_pdf_serve(args)



if __name__ == '__main__':
    main()





