Commit b327e826 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_server.py

parent 2cdfddd2
...@@ -19,6 +19,10 @@ import configparser ...@@ -19,6 +19,10 @@ import configparser
app = FastAPI() app = FastAPI()
method = 'auto' method = 'auto'
logger.add("../../parse.log", rotation="10 MB", level="INFO",
format="{time} {level} {message}", encoding='utf-8', enqueue=True)
class ocrRequest(BaseModel): class ocrRequest(BaseModel):
path: str path: str
output_dir: str output_dir: str
...@@ -57,10 +61,8 @@ def ocr_pdf_serve(args: str): ...@@ -57,10 +61,8 @@ def ocr_pdf_serve(args: str):
os.environ["CUDA_VISIBLE_DEVICES"] = args.dcu_id os.environ["CUDA_VISIBLE_DEVICES"] = args.dcu_id
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read(args.config_path) config.read(args.config_path)
pdf_server = config.get('server', 'pdf_server') host = config.get('server', 'pdf_host')
if 'http' in pdf_server: port = int(config.get('server', 'pdf_port'))
pdf_server = pdf_server.split('://')[1]
host,port = pdf_server.split(':')[0],int(pdf_server.split(':')[1])
uvicorn.run(app, host=host, port=port) uvicorn.run(app, host=host, port=port)
@app.post("/pdf_ocr") @app.post("/pdf_ocr")
...@@ -75,7 +77,8 @@ async def pdf_ocr(request: ocrRequest): ...@@ -75,7 +77,8 @@ async def pdf_ocr(request: ocrRequest):
debug_able = False debug_able = False
start_page_id = 0 start_page_id = 0
end_page_id = None end_page_id = None
logger.info(f'method:{method},path:{path},output_dir{output_dir}') logger.info(f"method: {method}, path: {path}, output_dir: {output_dir}, config_path: {request.config_path}")
def read_fn(path): def read_fn(path):
disk_rw = DiskReaderWriter(os.path.dirname(path)) disk_rw = DiskReaderWriter(os.path.dirname(path))
return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN) return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
...@@ -100,7 +103,13 @@ async def pdf_ocr(request: ocrRequest): ...@@ -100,7 +103,13 @@ async def pdf_ocr(request: ocrRequest):
logger.exception(e) logger.exception(e)
logger.info(f'config_path:{request.config_path}') logger.info(f'config_path:{request.config_path}')
parse_doc(path,request.config_path) try:
parse_doc(path,request.config_path)
return {"status_code": "200", "message": "PDF parsed successfully"}
except Exception as e:
logger.exception(e)
# Return error response with error details
raise HTTPException(status_code=500, detail=str(e))
def main(): def main():
args = parse_args() args = parse_args()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment