Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
a55c47f1
Commit
a55c47f1
authored
Jul 04, 2025
by
myhloli
Browse files
refactor: add GZip middleware and refactor get_infer_result function in fast_api.py
parent
275e662e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
18 deletions
+17
-18
mineru/cli/fast_api.py
mineru/cli/fast_api.py
+17
-18
No files found.
mineru/cli/fast_api.py
View file @
a55c47f1
import
uuid
import
uuid
import
os
import
os
from
base64
import
b64encode
import
uvicorn
import
uvicorn
import
argparse
import
argparse
from
pathlib
import
Path
from
pathlib
import
Path
from
glob
import
glob
from
glob
import
glob
from
fastapi
import
FastAPI
,
UploadFile
,
File
,
Form
from
fastapi
import
FastAPI
,
UploadFile
,
File
,
Form
from
fastapi.middleware.gzip
import
GZipMiddleware
from
fastapi.responses
import
JSONResponse
from
fastapi.responses
import
JSONResponse
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
from
loguru
import
logger
from
loguru
import
logger
from
base64
import
b64encode
from
mineru.cli.common
import
aio_do_parse
,
read_fn
from
mineru.cli.common
import
aio_do_parse
,
read_fn
from
mineru.version
import
__version__
from
mineru.version
import
__version__
app
=
FastAPI
()
app
=
FastAPI
()
app
.
add_middleware
(
GZipMiddleware
,
minimum_size
=
1000
)
def
encode_image
(
image_path
:
str
)
->
str
:
def
encode_image
(
image_path
:
str
)
->
str
:
"""Encode image using base64"""
"""Encode image using base64"""
...
@@ -24,6 +23,15 @@ def encode_image(image_path: str) -> str:
...
@@ -24,6 +23,15 @@ def encode_image(image_path: str) -> str:
return
b64encode
(
f
.
read
()).
decode
()
return
b64encode
(
f
.
read
()).
decode
()
def
get_infer_result
(
file_suffix_identifier
:
str
,
pdf_name
:
str
,
parse_dir
:
str
)
->
Optional
[
str
]:
"""从结果文件中读取推理结果"""
result_file_path
=
os
.
path
.
join
(
parse_dir
,
f
"
{
pdf_name
}{
file_suffix_identifier
}
"
)
if
os
.
path
.
exists
(
result_file_path
):
with
open
(
result_file_path
,
"r"
,
encoding
=
"utf-8"
)
as
fp
:
return
fp
.
read
()
return
None
@
app
.
post
(
path
=
"/file_parse"
,)
@
app
.
post
(
path
=
"/file_parse"
,)
async
def
parse_pdf
(
async
def
parse_pdf
(
files
:
List
[
UploadFile
]
=
File
(...),
files
:
List
[
UploadFile
]
=
File
(...),
...
@@ -118,27 +126,18 @@ async def parse_pdf(
...
@@ -118,27 +126,18 @@ async def parse_pdf(
else
:
else
:
parse_dir
=
os
.
path
.
join
(
unique_dir
,
pdf_name
,
"vlm"
)
parse_dir
=
os
.
path
.
join
(
unique_dir
,
pdf_name
,
"vlm"
)
def
get_infer_result
(
file_suffix_identifier
:
str
):
"""从结果文件中读取推理结果"""
result_file_path
=
os
.
path
.
join
(
parse_dir
,
f
"
{
pdf_name
}{
file_suffix_identifier
}
"
)
if
os
.
path
.
exists
(
result_file_path
):
with
open
(
result_file_path
,
"r"
,
encoding
=
"utf-8"
)
as
fp
:
return
fp
.
read
()
return
None
if
os
.
path
.
exists
(
parse_dir
):
if
os
.
path
.
exists
(
parse_dir
):
if
return_md
:
if
return_md
:
data
[
"md_content"
]
=
get_infer_result
(
".md"
)
data
[
"md_content"
]
=
get_infer_result
(
".md"
,
pdf_name
,
parse_dir
)
if
return_middle_json
:
if
return_middle_json
:
data
[
"middle_json"
]
=
get_infer_result
(
"_middle.json"
)
data
[
"middle_json"
]
=
get_infer_result
(
"_middle.json"
,
pdf_name
,
parse_dir
)
if
return_model_output
:
if
return_model_output
:
if
backend
.
startswith
(
"pipeline"
):
if
backend
.
startswith
(
"pipeline"
):
data
[
"model_output"
]
=
get_infer_result
(
"_model.json"
)
data
[
"model_output"
]
=
get_infer_result
(
"_model.json"
,
pdf_name
,
parse_dir
)
else
:
else
:
data
[
"model_output"
]
=
get_infer_result
(
"_model_output.txt"
)
data
[
"model_output"
]
=
get_infer_result
(
"_model_output.txt"
,
pdf_name
,
parse_dir
)
if
return_content_list
:
if
return_content_list
:
data
[
"content_list"
]
=
get_infer_result
(
"_content_list.json"
)
data
[
"content_list"
]
=
get_infer_result
(
"_content_list.json"
,
pdf_name
,
parse_dir
)
if
return_images
:
if
return_images
:
image_paths
=
glob
(
f
"
{
parse_dir
}
/images/*.jpg"
)
image_paths
=
glob
(
f
"
{
parse_dir
}
/images/*.jpg"
)
data
[
"images"
]
=
{
data
[
"images"
]
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment