Unverified Commit 8aac6107 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #2879 from myhloli/dev

refactor: add GZip middleware and refactor get_infer_result function …
parents 275e662e a55c47f1
import uuid import uuid
import os import os
from base64 import b64encode
import uvicorn import uvicorn
import argparse import argparse
from pathlib import Path from pathlib import Path
from glob import glob from glob import glob
from fastapi import FastAPI, UploadFile, File, Form from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from typing import List, Optional from typing import List, Optional
from loguru import logger from loguru import logger
from base64 import b64encode
from mineru.cli.common import aio_do_parse, read_fn from mineru.cli.common import aio_do_parse, read_fn
from mineru.version import __version__ from mineru.version import __version__
app = FastAPI() app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)
def encode_image(image_path: str) -> str: def encode_image(image_path: str) -> str:
"""Encode image using base64""" """Encode image using base64"""
...@@ -24,6 +23,15 @@ def encode_image(image_path: str) -> str: ...@@ -24,6 +23,15 @@ def encode_image(image_path: str) -> str:
return b64encode(f.read()).decode() return b64encode(f.read()).decode()
def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str) -> Optional[str]:
"""从结果文件中读取推理结果"""
result_file_path = os.path.join(parse_dir, f"{pdf_name}{file_suffix_identifier}")
if os.path.exists(result_file_path):
with open(result_file_path, "r", encoding="utf-8") as fp:
return fp.read()
return None
@app.post(path="/file_parse",) @app.post(path="/file_parse",)
async def parse_pdf( async def parse_pdf(
files: List[UploadFile] = File(...), files: List[UploadFile] = File(...),
...@@ -118,27 +126,18 @@ async def parse_pdf( ...@@ -118,27 +126,18 @@ async def parse_pdf(
else: else:
parse_dir = os.path.join(unique_dir, pdf_name, "vlm") parse_dir = os.path.join(unique_dir, pdf_name, "vlm")
def get_infer_result(file_suffix_identifier: str):
"""从结果文件中读取推理结果"""
result_file_path = os.path.join(parse_dir, f"{pdf_name}{file_suffix_identifier}")
if os.path.exists(result_file_path):
with open(result_file_path, "r", encoding="utf-8") as fp:
return fp.read()
return None
if os.path.exists(parse_dir): if os.path.exists(parse_dir):
if return_md: if return_md:
data["md_content"] = get_infer_result(".md") data["md_content"] = get_infer_result(".md", pdf_name, parse_dir)
if return_middle_json: if return_middle_json:
data["middle_json"] = get_infer_result("_middle.json") data["middle_json"] = get_infer_result("_middle.json", pdf_name, parse_dir)
if return_model_output: if return_model_output:
if backend.startswith("pipeline"): if backend.startswith("pipeline"):
data["model_output"] = get_infer_result("_model.json") data["model_output"] = get_infer_result("_model.json", pdf_name, parse_dir)
else: else:
data["model_output"] = get_infer_result("_model_output.txt") data["model_output"] = get_infer_result("_model_output.txt", pdf_name, parse_dir)
if return_content_list: if return_content_list:
data["content_list"] = get_infer_result("_content_list.json") data["content_list"] = get_infer_result("_content_list.json", pdf_name, parse_dir)
if return_images: if return_images:
image_paths = glob(f"{parse_dir}/images/*.jpg") image_paths = glob(f"{parse_dir}/images/*.jpg")
data["images"] = { data["images"] = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment