# -*- coding: utf-8 -*-
import time

import requests
from loguru import logger
import argparse
import os
from pdf_client import ocrPdfClient
from excel_parse import ExcelParser


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--url',
        default='http://0.0.0.0:6030',
        )
    parser.add_argument(
        '--path',
        '-p',
        required=True
        )
    parser.add_argument(
        '--output_dir',
        '-o',
        required=True
        )
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    input_path = args.path
    pdf_ocr = ocrPdfClient(args.url)
    excel_ocr = ExcelParser()
    if not os.path.isabs(args.output_dir):
        current_working_directory = os.getcwd()
        output_dir = os.path.join(current_working_directory, args.output_dir)
        # logger.info(f'相对路径output_dir:{output_dir}')
    else:
        output_dir = args.output_dir

    if '\\' in input_path:
        input_path = input_path.replace('\\', '/')
    logger.info(f'输入目录或文件的路径为:{input_path}')
    logger.info(f'output_dir:{output_dir}')

    if os.path.isdir(input_path):
        for root, dirs, files in os.walk(input_path):
            # 查找所有的pdf文件
            for file in files:
                # 打印pdf文件的完整路径
                doc_path = os.path.join(root, file)
                logger.info(f'正在解析：{doc_path}')
                try:
                    res = ''
                    if file.endswith('.pdf'):
                        res = pdf_ocr.ocr_pdf_client(path=doc_path,output_dir=output_dir)
                    elif file.endswith('.xls') or file.endswith('.xlsx'):
                        res = excel_ocr.parse(doc_path,output_dir)
                    if res:
                        logger.info(f"输出文件的的路径为: '{res}'")
                    else:
                        logger.warning("None")
                except requests.exceptions.RequestException as e:
                    logger.error(f"Error while making request to reranker service: {e}")
                except Exception as e:
                    logger.error(f"Unexpected error occurred: {e}")

    else:
        try:
            res = ''
            if input_path.endswith('.pdf'):
                res = pdf_ocr.ocr_pdf_client(path=input_path, output_dir=output_dir)
            elif input_path.endswith('.xls') or input_path.endswith('.xlsx'):
                res = excel_ocr.parse(input_path,output_dir)
            if res:
                logger.info(f"output_dir: '{res}'")
            else:
                logger.warning("None")
        except requests.exceptions.RequestException as e:
            logger.error(f"Error while making request to reranker service: {e}")
        except Exception as e:
            logger.error(f"Unexpected error occurred: {e}")


if __name__ == "__main__":
    main()


