Commit 0e8f989e authored by zhougaofeng's avatar zhougaofeng
Browse files

Update common_parse.py

parent 922eeb6e
...@@ -37,14 +37,15 @@ def process_file(file_path, pdf_ocr, excel_ocr, output_dir): ...@@ -37,14 +37,15 @@ def process_file(file_path, pdf_ocr, excel_ocr, output_dir):
"""Process a single file for OCR based on its extension.""" """Process a single file for OCR based on its extension."""
try: try:
res = '' res = ''
start = time.time()
if file_path.endswith('.pdf'): if file_path.endswith('.pdf'):
res = pdf_ocr.ocr_pdf_client(path=file_path, output_dir=output_dir) res = pdf_ocr.ocr_pdf_client(path=file_path, output_dir=output_dir)
elif file_path.endswith('.xls') or file_path.endswith('.xlsx'): elif file_path.endswith('.xls') or file_path.endswith('.xlsx'):
res = excel_ocr.parse(file_path, output_dir) res = excel_ocr.parse(file_path, output_dir)
end = time.time()
if res: if res:
logger.info(f"文件处理成功,输出文件路径为: '{res}'") logger.info(f"文件处理成功,输出文件路径为: '{res}', 耗时为:{end-start}")
else: else:
logger.warning(f"文件处理结果为空: '{file_path}'") logger.warning(f"文件处理结果为空: '{file_path}'")
except requests.exceptions.RequestException as req_err: except requests.exceptions.RequestException as req_err:
...@@ -83,6 +84,13 @@ def main(): ...@@ -83,6 +84,13 @@ def main():
config.read(args.config_path) config.read(args.config_path)
pdf_server = config.get('server', 'pdf_server') pdf_server = config.get('server', 'pdf_server')
pdf_ocr = ocrPdfClient(pdf_server) pdf_ocr = ocrPdfClient(pdf_server)
status = pdf_ocr.check_health()
if not status:
pdf_ocr = None
logger.warning(f'Health check failed. The server at "{pdf_server}" is not responding as expected.')
logger.info(f'文件解析服务无法正常运行')
return None
else:
excel_ocr = ExcelParser() excel_ocr = ExcelParser()
logger.info(f'输入目录或文件的路径为: {input_path}') logger.info(f'输入目录或文件的路径为: {input_path}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment