Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
a0eff3be
Commit
a0eff3be
authored
Oct 28, 2024
by
liukaiwen
Browse files
feat: table model update with paddle recognition v4
parent
51f56aa3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
3 deletions
+19
-3
magic_pdf/libs/Constants.py
magic_pdf/libs/Constants.py
+6
-0
magic_pdf/model/pdf_extract_kit.py
magic_pdf/model/pdf_extract_kit.py
+13
-3
No files found.
magic_pdf/libs/Constants.py
View file @
a0eff3be
...
@@ -37,4 +37,10 @@ REC_MODEL_DIR = "ch_PP-OCRv4_rec_infer"
...
@@ -37,4 +37,10 @@ REC_MODEL_DIR = "ch_PP-OCRv4_rec_infer"
# pp rec char dict path
# pp rec char dict path
REC_CHAR_DICT
=
"ppocr_keys_v1.txt"
REC_CHAR_DICT
=
"ppocr_keys_v1.txt"
# pp rec copy rec directory
PP_REC_DIRECTORY
=
".paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer"
# pp rec copy det directory
PP_DET_DIRECTORY
=
".paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer"
magic_pdf/model/pdf_extract_kit.py
View file @
a0eff3be
from
loguru
import
logger
from
loguru
import
logger
import
os
import
os
import
time
import
time
from
pathlib
import
Path
import
shutil
from
magic_pdf.libs.Constants
import
*
from
magic_pdf.libs.Constants
import
*
from
magic_pdf.libs.clean_memory
import
clean_memory
from
magic_pdf.libs.clean_memory
import
clean_memory
from
magic_pdf.model.model_list
import
AtomicModel
from
magic_pdf.model.model_list
import
AtomicModel
...
@@ -271,6 +272,17 @@ class CustomPEKModel:
...
@@ -271,6 +272,17 @@ class CustomPEKModel:
device
=
self
.
device
device
=
self
.
device
)
)
home_directory
=
Path
.
home
()
det_source
=
os
.
path
.
join
(
models_dir
,
table_model_dir
,
DETECT_MODEL_DIR
)
rec_source
=
os
.
path
.
join
(
models_dir
,
table_model_dir
,
REC_MODEL_DIR
)
det_dest_dir
=
os
.
path
.
join
(
home_directory
,
PP_DET_DIRECTORY
)
rec_dest_dir
=
os
.
path
.
join
(
home_directory
,
PP_REC_DIRECTORY
)
if
not
os
.
path
.
exists
(
det_dest_dir
):
shutil
.
copytree
(
det_source
,
det_dest_dir
)
if
not
os
.
path
.
exists
(
rec_dest_dir
):
shutil
.
copytree
(
rec_source
,
rec_dest_dir
)
logger
.
info
(
'DocAnalysis init done!'
)
logger
.
info
(
'DocAnalysis init done!'
)
def
__call__
(
self
,
image
):
def
__call__
(
self
,
image
):
...
@@ -433,7 +445,5 @@ class CustomPEKModel:
...
@@ -433,7 +445,5 @@ class CustomPEKModel:
logger
.
info
(
f
"-----page total time:
{
round
(
time
.
time
()
-
page_start
,
2
)
}
-----"
)
logger
.
info
(
f
"-----page total time:
{
round
(
time
.
time
()
-
page_start
,
2
)
}
-----"
)
return
layout_res
return
layout_res
if
__name__
==
'__main__'
:
print
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment