Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
2c1f06b3
Unverified
Commit
2c1f06b3
authored
Jan 09, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jan 09, 2025
Browse files
Merge pull request #1459 from myhloli/dev
Dev
parents
aa535316
a80ff051
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
11 additions
and
11 deletions
+11
-11
magic_pdf/model/sub_modules/language_detection/utils.py
magic_pdf/model/sub_modules/language_detection/utils.py
+4
-8
magic_pdf/pdf_parse_union_core_v2.py
magic_pdf/pdf_parse_union_core_v2.py
+5
-0
magic_pdf/resources/model_config/model_configs.yaml
magic_pdf/resources/model_config/model_configs.yaml
+1
-2
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt
+0
-0
projects/gradio_app/examples/complex_layout.pdf
projects/gradio_app/examples/complex_layout.pdf
+0
-0
setup.py
setup.py
+1
-1
No files found.
magic_pdf/model/sub_modules/language_detection/utils.py
View file @
2c1f06b3
...
...
@@ -24,11 +24,11 @@ def get_model_config():
config_path
=
os
.
path
.
join
(
model_config_dir
,
'model_configs.yaml'
)
with
open
(
config_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
configs
=
yaml
.
load
(
f
,
Loader
=
yaml
.
FullLoader
)
return
local_models_dir
,
device
,
configs
return
root_dir
,
local_models_dir
,
device
,
configs
def
get_text_images
(
simple_images
):
local_models_dir
,
device
,
configs
=
get_model_config
()
_
,
local_models_dir
,
device
,
configs
=
get_model_config
()
atom_model_manager
=
AtomModelSingleton
()
temp_layout_model
=
atom_model_manager
.
get_atom_model
(
atom_model_name
=
AtomicModel
.
Layout
,
...
...
@@ -69,15 +69,11 @@ def model_init(model_name: str):
atom_model_manager
=
AtomModelSingleton
()
if
model_name
==
MODEL_NAME
.
YOLO_V11_LangDetect
:
local_models
_dir
,
device
,
configs
=
get_model_config
()
root
_dir
,
_
,
device
,
_
=
get_model_config
()
model
=
atom_model_manager
.
get_atom_model
(
atom_model_name
=
AtomicModel
.
LangDetect
,
langdetect_model_name
=
MODEL_NAME
.
YOLO_V11_LangDetect
,
langdetect_model_weight
=
str
(
os
.
path
.
join
(
local_models_dir
,
configs
[
'weights'
][
MODEL_NAME
.
YOLO_V11_LangDetect
]
)
),
langdetect_model_weight
=
str
(
os
.
path
.
join
(
root_dir
,
'resources'
,
'yolov11-langdetect'
,
'yolo_v11_ft.pt'
)),
device
=
device
,
)
else
:
...
...
magic_pdf/pdf_parse_union_core_v2.py
View file @
2c1f06b3
...
...
@@ -768,6 +768,11 @@ def parse_page_core(
"""重排block"""
sorted_blocks
=
sorted
(
fix_blocks
,
key
=
lambda
b
:
b
[
'index'
])
"""block内重排(img和table的block内多个caption或footnote的排序)"""
for
block
in
sorted_blocks
:
if
block
[
'type'
]
in
[
BlockType
.
Image
,
BlockType
.
Table
]:
block
[
'blocks'
]
=
sorted
(
block
[
'blocks'
],
key
=
lambda
b
:
b
[
'index'
])
"""获取QA需要外置的list"""
images
,
tables
,
interline_equations
=
get_qa_need_list_v2
(
sorted_blocks
)
...
...
magic_pdf/resources/model_config/model_configs.yaml
View file @
2c1f06b3
...
...
@@ -5,5 +5,4 @@ weights:
unimernet_small
:
MFR/unimernet_small
struct_eqtable
:
TabRec/StructEqTable
tablemaster
:
TabRec/TableMaster
rapid_table
:
TabRec/RapidTable
yolo_v11n_langdetect
:
LangDetect/YOLO/yolo_v11_ft.pt
\ No newline at end of file
rapid_table
:
TabRec/RapidTable
\ No newline at end of file
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt
0 → 100644
View file @
2c1f06b3
File added
projects/gradio_app/examples/complex_layout.pdf
100755 → 100644
View file @
2c1f06b3
No preview for this file type
setup.py
View file @
2c1f06b3
...
...
@@ -51,7 +51,7 @@ if __name__ == '__main__':
"doclayout_yolo==0.0.2"
,
# doclayout_yolo
"rapidocr-paddle"
,
# rapidocr-paddle
"rapidocr_onnxruntime"
,
"rapid_table"
,
# rapid_table
"rapid_table
==0.3.0
"
,
# rapid_table
"PyYAML"
,
# yaml
"openai"
,
# openai SDK
"detectron2"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment