Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
magic_pdf
Commits
c2ad4c75
"examples/vscode:/vscode.git/clone" did not exist on "a83cc0c0bc2c5f4bbb55beb0132de03e222dd199"
Commit
c2ad4c75
authored
Oct 25, 2024
by
zhougaofeng
Browse files
Update pdf_extract_kit.py
parent
0135861f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
5 deletions
+9
-5
magic_pdf/model/pdf_extract_kit.py
magic_pdf/model/pdf_extract_kit.py
+9
-5
No files found.
magic_pdf/model/pdf_extract_kit.py
View file @
c2ad4c75
...
@@ -247,7 +247,7 @@ class CustomPEKModel:
...
@@ -247,7 +247,7 @@ class CustomPEKModel:
logger
.
info
(
'DocAnalysis init done!'
)
logger
.
info
(
'DocAnalysis init done!'
)
def
__call__
(
self
,
image
):
def
__call__
(
self
,
image
,
index
,
end_page_id
):
latex_filling_list
=
[]
latex_filling_list
=
[]
mf_image_list
=
[]
mf_image_list
=
[]
...
@@ -256,8 +256,8 @@ class CustomPEKModel:
...
@@ -256,8 +256,8 @@ class CustomPEKModel:
layout_start
=
time
.
time
()
layout_start
=
time
.
time
()
layout_res
=
self
.
layout_model
(
image
,
ignore_catids
=
[])
layout_res
=
self
.
layout_model
(
image
,
ignore_catids
=
[])
layout_cost
=
round
(
time
.
time
()
-
layout_start
,
2
)
layout_cost
=
round
(
time
.
time
()
-
layout_start
,
2
)
logger
.
info
(
f
"layout detection cost:
{
layout_cost
}
"
)
#
logger.info(f"layout detection cost: {layout_cost}")
total_cost
=
layout_cost
if
self
.
apply_formula
:
if
self
.
apply_formula
:
# 公式检测
# 公式检测
mfd_res
=
self
.
mfd_model
.
predict
(
image
,
imgsz
=
1888
,
conf
=
0.25
,
iou
=
0.45
,
verbose
=
True
)[
0
]
mfd_res
=
self
.
mfd_model
.
predict
(
image
,
imgsz
=
1888
,
conf
=
0.25
,
iou
=
0.45
,
verbose
=
True
)[
0
]
...
@@ -286,7 +286,7 @@ class CustomPEKModel:
...
@@ -286,7 +286,7 @@ class CustomPEKModel:
for
res
,
latex
in
zip
(
latex_filling_list
,
mfr_res
):
for
res
,
latex
in
zip
(
latex_filling_list
,
mfr_res
):
res
[
'latex'
]
=
latex_rm_whitespace
(
latex
)
res
[
'latex'
]
=
latex_rm_whitespace
(
latex
)
mfr_cost
=
round
(
time
.
time
()
-
mfr_start
,
2
)
mfr_cost
=
round
(
time
.
time
()
-
mfr_start
,
2
)
logger
.
info
(
f
"formula nums:
{
len
(
mf_image_list
)
}
, mfr time:
{
mfr_cost
}
"
)
#
logger.info(f"formula nums: {len(mf_image_list)}, mfr time: {mfr_cost}")
# Select regions for OCR / formula regions / table regions
# Select regions for OCR / formula regions / table regions
ocr_res_list
=
[]
ocr_res_list
=
[]
...
@@ -369,7 +369,11 @@ class CustomPEKModel:
...
@@ -369,7 +369,11 @@ class CustomPEKModel:
})
})
ocr_cost
=
round
(
time
.
time
()
-
ocr_start
,
2
)
ocr_cost
=
round
(
time
.
time
()
-
ocr_start
,
2
)
logger
.
info
(
f
"ocr cost:
{
ocr_cost
}
"
)
# logger.info(f"ocr cost: {ocr_cost}")
total_cost
=
total_cost
+
ocr_cost
index
=
index
+
1
end_page_id
=
end_page_id
+
1
logger
.
info
(
f
'当前解析第【
{
index
}
/
{
end_page_id
}
】页, 耗时:
{
total_cost
}
'
)
#logger.info(f'是否表格识别:{self.apply_table}')
#logger.info(f'是否表格识别:{self.apply_table}')
# 表格识别 table recognition
# 表格识别 table recognition
if
self
.
apply_table
:
if
self
.
apply_table
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment