Commit 4781cd49 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_server.py

parent 68851ae0
...@@ -6,6 +6,8 @@ from loguru import logger ...@@ -6,6 +6,8 @@ from loguru import logger
from typing import List from typing import List
from fastapi import FastAPI, HTTPException, Request from fastapi import FastAPI, HTTPException, Request
import magic_pdf.model as model_config import magic_pdf.model as model_config
from magic_pdf.dict2md.ocr_client import PredictClient
# from magic_pdf.dict2md.ocr_vllm_client import PredictClient
from magic_pdf.libs.version import __version__ from magic_pdf.libs.version import __version__
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
...@@ -23,6 +25,9 @@ method = 'auto' ...@@ -23,6 +25,9 @@ method = 'auto'
logger.add("parse.log", rotation="10 MB", level="INFO", logger.add("parse.log", rotation="10 MB", level="INFO",
format="{time} {level} {message}", encoding='utf-8', enqueue=True) format="{time} {level} {message}", encoding='utf-8', enqueue=True)
config_path = None config_path = None
ocr_status = None
class ocrRequest(BaseModel): class ocrRequest(BaseModel):
path: str path: str
output_dir: str output_dir: str
...@@ -71,6 +76,10 @@ def ocr_pdf_serve(args: str): ...@@ -71,6 +76,10 @@ def ocr_pdf_serve(args: str):
host, port = pdf_server.split(':')[0], int(pdf_server.split(':')[1]) host, port = pdf_server.split(':')[0], int(pdf_server.split(':')[1])
global config_path global config_path
config_path = args.config_path config_path = args.config_path
ocr_server = config.get('server', 'ocr_server')
ocr_client = PredictClient(ocr_server)
global ocr_status
ocr_status = ocr_client.check_health()
uvicorn.run(app, host=host, port=port) uvicorn.run(app, host=host, port=port)
@app.get("/health") @app.get("/health")
...@@ -100,6 +109,7 @@ async def pdf_ocr(request: ocrRequest): ...@@ -100,6 +109,7 @@ async def pdf_ocr(request: ocrRequest):
file_name = str(Path(doc_path).stem) file_name = str(Path(doc_path).stem)
pdf_data = read_fn(doc_path) pdf_data = read_fn(doc_path)
output_path = do_parse( output_path = do_parse(
ocr_status,
config_path, config_path,
output_dir, output_dir,
file_name, file_name,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment