Update pdf_server.py

ab8e538f · zhougaofeng · b2bb218c · ab8e538f
Commit ab8e538f authored Nov 13, 2024 by zhougaofeng
Show whitespace changes
Inline Side-by-side

Showing with 72 additions and 3 deletions

magic_pdf/tools/pdf_server.py magic_pdf/tools/pdf_server.py +72 -3

No files found.
--- a/magic_pdf/tools/pdf_server.py
+++ b/magic_pdf/tools/pdf_server.py
@@ -6,8 +6,6 @@ from loguru import logger
 from typing import List
 from fastapi import FastAPI, HTTPException, Request
 import magic_pdf.model as model_config
-# from magic_pdf.dict2md.ocr_client import PredictClient
-from magic_pdf.dict2md.ocr_vllm_client import PredictClient
 from magic_pdf.libs.version import __version__
 from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
 from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
@@ -18,7 +16,12 @@ from pydantic import BaseModel
 import uvicorn
 import time
 import configparser
-#from magic_pdf.tools.config import update_config
+from magic_pdf.dict2md.ocr_vllm_client import PredictClient,compress_image
+# from magic_pdf.dict2md.ocr_client import PredictClient,compress_image
+from magic_pdf.parse.pdf_client import ocrPdfClient
+from magic_pdf.parse.ofd_parse import *
+
+

 app = FastAPI()
 method = 'auto'
@@ -33,6 +36,7 @@ custom_model = None
 class ocrRequest(BaseModel):
    path: str
    output_dir: str
+    config_path: str

 class ocrResponse(BaseModel):
    status_code: int
@@ -68,6 +72,10 @@ def parse_args():
    args = parser.parse_args()
    return args

+
+
+
+
 def ocr_pdf_serve(args: str):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.dcu_id
    config = configparser.ConfigParser()
@@ -148,6 +156,67 @@ async def pdf_ocr(request: ocrRequest):
        logger.error(f'文件解析失败，文件为：{path}')
        raise HTTPException(status_code=500)

+
+@app.post("/ofd_ocr")
+async def ofd_ocr(request: ocrRequest):
+    try:
+        # 读取配置文件
+        config = configparser.ConfigParser()
+        config.read(request.config_path)
+        url = config.get('server', 'ocr_server')
+        pdf_server = config.get('server', 'pdf_server')
+
+        # 创建客户端
+        client = PredictClient(url)
+        pdf_ocr = ocrPdfClient(pdf_server)
+
+        # 确保输出目录存在
+        os.makedirs(request.output_dir, exist_ok=True)
+
+        # 处理 OFD 文件
+        ofd_imgs, pdfbytes = ofd2img(request.path, request.output_dir)
+        text = '识别图片的内容，如果是发票就执行以下操作识别图中的文字信息，并以json格式返回，如果不是发票返回False'
+
+        # 初始化变量
+        ofd_txts = ''
+        ofd_txt = ''
+
+        # 遍历 OFD 图片，逐一进行识别
+        for ofd_img in ofd_imgs:
+            compress_image(ofd_img)
+            res = client.predict(ofd_img, text)
+
+            # 如果识别结果是非发票，则尝试解析 PDF
+            if 'False' in res or 'false' in res:
+                ofd_pdf = ofd2pdf(request.path, request.output_dir, pdfbytes)
+                ofd_txt = pdf_ocr.ocr_pdf_client(path=ofd_pdf, output_dir=request.output_dir)
+                break
+            else:
+                # 处理识别结果
+                res = decode_html_entities(res)
+                res = json_to_txt(res)
+                ofd_txts += res + '\n'
+
+        # 如果有识别文本，将其写入文件
+        if ofd_txts:
+            file_name = Path(request.path).stem
+            ofd_txt = os.path.join(request.output_dir, f"{file_name}.txt")
+            with open(ofd_txt, 'w', encoding='utf-8') as f:
+                f.write(ofd_txts)
+
+        # 返回结果
+        if ofd_txt:
+            logger.info(f'文件解析成功：{ofd_txt}')
+            return {"status_code": 200, "output_path": ofd_txt}
+        else:
+            logger.error(f'文件解析失败，文件为：{request.path}')
+            raise HTTPException(status_code=500, detail="文件解析失败")
+
+    except Exception as e:
+        logger.exception(f"处理文件 {request.path} 时发生错误: {e}")
+        raise HTTPException(status_code=500, detail="处理文件时发生错误")
+
+
 def main():
    args = parse_args()
    ocr_pdf_serve(args)