Unverified Commit 3b98efbb authored by shniubobo's avatar shniubobo
Browse files

feat(web_api): Return images in api

parent 635418b6
import json import json
import os import os
from base64 import b64encode
from glob import glob
from io import StringIO from io import StringIO
from typing import Tuple, Union from typing import Tuple, Union
...@@ -136,6 +138,12 @@ def process_pdf( ...@@ -136,6 +138,12 @@ def process_pdf(
return infer_result, pipe_result return infer_result, pipe_result
def encode_image(image_path: str) -> str:
"""Encode image using base64"""
with open(image_path, "rb") as f:
return b64encode(f.read()).decode()
@app.post( @app.post(
"/pdf_parse", "/pdf_parse",
tags=["projects"], tags=["projects"],
...@@ -150,6 +158,7 @@ async def pdf_parse( ...@@ -150,6 +158,7 @@ async def pdf_parse(
return_layout: bool = False, return_layout: bool = False,
return_info: bool = False, return_info: bool = False,
return_content_list: bool = False, return_content_list: bool = False,
return_images: bool = False,
): ):
""" """
Execute the process of converting PDF to JSON and MD, outputting MD and JSON files Execute the process of converting PDF to JSON and MD, outputting MD and JSON files
...@@ -243,6 +252,14 @@ async def pdf_parse( ...@@ -243,6 +252,14 @@ async def pdf_parse(
data["info"] = middle_json data["info"] = middle_json
if return_content_list: if return_content_list:
data["content_list"] = content_list data["content_list"] = content_list
if return_images:
image_paths = glob(f"{output_image_path}/*.jpg")
data["images"] = {
os.path.basename(
image_path
): f"data:image/jpeg;base64,{encode_image(image_path)}"
for image_path in image_paths
}
data["md_content"] = md_content # md_content is always returned data["md_content"] = md_content # md_content is always returned
# Clean up memory writers # Clean up memory writers
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment