Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
3b98efbb
Unverified
Commit
3b98efbb
authored
Feb 14, 2025
by
shniubobo
Browse files
feat(web_api): Return images in api
parent
635418b6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
0 deletions
+17
-0
projects/web_api/app.py
projects/web_api/app.py
+17
-0
No files found.
projects/web_api/app.py
View file @
3b98efbb
import
json
import
os
from
base64
import
b64encode
from
glob
import
glob
from
io
import
StringIO
from
typing
import
Tuple
,
Union
...
...
@@ -136,6 +138,12 @@ def process_pdf(
return
infer_result
,
pipe_result
def
encode_image
(
image_path
:
str
)
->
str
:
"""Encode image using base64"""
with
open
(
image_path
,
"rb"
)
as
f
:
return
b64encode
(
f
.
read
()).
decode
()
@
app
.
post
(
"/pdf_parse"
,
tags
=
[
"projects"
],
...
...
@@ -150,6 +158,7 @@ async def pdf_parse(
return_layout
:
bool
=
False
,
return_info
:
bool
=
False
,
return_content_list
:
bool
=
False
,
return_images
:
bool
=
False
,
):
"""
Execute the process of converting PDF to JSON and MD, outputting MD and JSON files
...
...
@@ -243,6 +252,14 @@ async def pdf_parse(
data
[
"info"
]
=
middle_json
if
return_content_list
:
data
[
"content_list"
]
=
content_list
if
return_images
:
image_paths
=
glob
(
f
"
{
output_image_path
}
/*.jpg"
)
data
[
"images"
]
=
{
os
.
path
.
basename
(
image_path
):
f
"data:image/jpeg;base64,
{
encode_image
(
image_path
)
}
"
for
image_path
in
image_paths
}
data
[
"md_content"
]
=
md_content
# md_content is always returned
# Clean up memory writers
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment