# -*- coding: utf-8 -*-
import time

import requests
from loguru import logger
import argparse
import os



class ocrPdfClient:
    def __init__(self, api_url):
        self.api_url = api_url

    def ocr_pdf_client(self, path,output_dir):
        payload = {
            "path": str(path),
            "output_dir": str(output_dir),
        }
        logger.info(f'pdf路径:{path}，输出路径{output_dir}')
        response = requests.post(f"{self.api_url}/pdf_ocr", json=payload)
        logger.info(f'response:{response}')
        if response.status_code == 200:
            return output_dir

        else:
            raise Exception(f"ocrPdf API request failed with status code {response.status_code}")


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--url',
        default='http://0.0.0.0:6030',
        )
    parser.add_argument(
        '--path',
        '-p',
        required=True
        )
    parser.add_argument(
        '--output_dir',
        '-o',
        required=True
        )
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    embedder = ocrPdfClient(args.url)
    doc_analyze_start = time.time()

    if not os.path.isabs(args.output_dir):
        current_working_directory = os.getcwd()
        output_dir = os.path.join(current_working_directory, args.output_dir)
        # logger.info(f'相对路径output_dir:{output_dir}')
    else:
        output_dir = args.output_dir
    logger.info(f'output_dir:{output_dir}')
    try:
        res = embedder.ocr_pdf_client(path=args.path,output_dir=output_dir)
        if res:
            logger.info(f"output_dir: '{res}'")
        else:
            logger.warning("None")
    except requests.exceptions.RequestException as e:
        logger.error(f"Error while making request to reranker service: {e}")
    except Exception as e:
        logger.error(f"Unexpected error occurred: {e}")
    doc_analyze_cost = time.time() - doc_analyze_start

    logger.info(f'解析当前pdf{args.path}耗时为:{doc_analyze_cost}')

if __name__ == "__main__":
    main()

