Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
14f45075
"git@developer.sourcefind.cn:OpenDAS/autoawq.git" did not exist on "98d874d1c4a16e7654b6f9ba1da685bb6e617229"
Commit
14f45075
authored
Jul 10, 2024
by
myhloli
Browse files
small fix
parent
831db2e0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
4 deletions
+3
-4
magic_pdf/model/doc_analyze_by_custom_model.py
magic_pdf/model/doc_analyze_by_custom_model.py
+2
-3
magic_pdf/model/pdf_extract_kit.py
magic_pdf/model/pdf_extract_kit.py
+1
-1
No files found.
magic_pdf/model/doc_analyze_by_custom_model.py
View file @
14f45075
...
@@ -44,9 +44,8 @@ def load_images_from_pdf(pdf_bytes: bytes, dpi=200) -> list:
...
@@ -44,9 +44,8 @@ def load_images_from_pdf(pdf_bytes: bytes, dpi=200) -> list:
return
images
return
images
def
doc_analyze
(
pdf_bytes
:
bytes
,
ocr
:
bool
=
False
,
show_log
:
bool
=
False
,
model
=
MODEL
.
Paddle
,
def
doc_analyze
(
pdf_bytes
:
bytes
,
ocr
:
bool
=
False
,
show_log
:
bool
=
False
,
model
=
MODEL
.
PEK
,
model_type
=
MODEL_TYPE
.
SINGLE_PAGE
):
model_type
=
MODEL_TYPE
.
MULTI_PAGE
):
custom_model
=
None
if
model_config
.
__use_inside_model__
:
if
model_config
.
__use_inside_model__
:
if
model
==
MODEL
.
Paddle
:
if
model
==
MODEL
.
Paddle
:
from
magic_pdf.model.pp_structure_v2
import
CustomPaddleModel
from
magic_pdf.model.pp_structure_v2
import
CustomPaddleModel
...
...
magic_pdf/model/pdf_extract_kit.py
View file @
14f45075
...
@@ -152,8 +152,8 @@ class CustomPEKModel:
...
@@ -152,8 +152,8 @@ class CustomPEKModel:
b
=
time
.
time
()
b
=
time
.
time
()
logger
.
info
(
f
"formula nums:
{
len
(
mf_image_list
)
}
, mfr time:
{
round
(
b
-
a
,
2
)
}
"
)
logger
.
info
(
f
"formula nums:
{
len
(
mf_image_list
)
}
, mfr time:
{
round
(
b
-
a
,
2
)
}
"
)
# ocr识别
if
self
.
apply_ocr
:
if
self
.
apply_ocr
:
# ocr识别
for
idx
,
img_dict
in
enumerate
(
images
):
for
idx
,
img_dict
in
enumerate
(
images
):
image
=
img_dict
[
"img"
]
image
=
img_dict
[
"img"
]
pil_img
=
Image
.
fromarray
(
image
)
pil_img
=
Image
.
fromarray
(
image
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment