Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
a1df670e
"tools/python/vscode:/vscode.git/clone" did not exist on "9be82fdc378e884aaccadf654cb7dc1589b9140f"
Unverified
Commit
a1df670e
authored
Apr 14, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Apr 14, 2025
Browse files
Merge pull request #2225 from opendatalab/release-1.3.3
Release 1.3.3
parents
47d287a2
a67de492
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
5 deletions
+5
-5
magic_pdf/model/doc_analyze_by_custom_model.py
magic_pdf/model/doc_analyze_by_custom_model.py
+1
-1
magic_pdf/pre_proc/ocr_detect_all_bboxes.py
magic_pdf/pre_proc/ocr_detect_all_bboxes.py
+2
-2
setup.py
setup.py
+2
-2
No files found.
magic_pdf/model/doc_analyze_by_custom_model.py
View file @
a1df670e
...
@@ -147,7 +147,7 @@ def doc_analyze(
...
@@ -147,7 +147,7 @@ def doc_analyze(
images
.
append
(
img_dict
[
'img'
])
images
.
append
(
img_dict
[
'img'
])
page_wh_list
.
append
((
img_dict
[
'width'
],
img_dict
[
'height'
]))
page_wh_list
.
append
((
img_dict
[
'width'
],
img_dict
[
'height'
]))
images_with_extra_info
=
[(
images
[
index
],
ocr
,
dataset
.
_lang
)
for
index
in
range
(
len
(
dataset
))]
images_with_extra_info
=
[(
images
[
index
],
ocr
,
dataset
.
_lang
)
for
index
in
range
(
len
(
images
))]
if
len
(
images
)
>=
MIN_BATCH_INFERENCE_SIZE
:
if
len
(
images
)
>=
MIN_BATCH_INFERENCE_SIZE
:
batch_size
=
MIN_BATCH_INFERENCE_SIZE
batch_size
=
MIN_BATCH_INFERENCE_SIZE
...
...
magic_pdf/pre_proc/ocr_detect_all_bboxes.py
View file @
a1df670e
...
@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
...
@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
all_discarded_blocks
=
[]
all_discarded_blocks
=
[]
add_bboxes
(
discarded_blocks
,
BlockType
.
Discarded
,
all_discarded_blocks
)
add_bboxes
(
discarded_blocks
,
BlockType
.
Discarded
,
all_discarded_blocks
)
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半
5
0%区域的"""
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半
3
0%区域的"""
footnote_blocks
=
[]
footnote_blocks
=
[]
for
discarded
in
discarded_blocks
:
for
discarded
in
discarded_blocks
:
x0
,
y0
,
x1
,
y1
=
discarded
[
'bbox'
]
x0
,
y0
,
x1
,
y1
=
discarded
[
'bbox'
]
if
(
x1
-
x0
)
>
(
page_w
/
3
)
and
(
y1
-
y0
)
>
10
and
y0
>
(
page_h
/
2
):
if
(
x1
-
x0
)
>
(
page_w
/
3
)
and
(
y1
-
y0
)
>
10
and
y0
>
(
page_h
*
0.7
):
footnote_blocks
.
append
([
x0
,
y0
,
x1
,
y1
])
footnote_blocks
.
append
([
x0
,
y0
,
x1
,
y1
])
"""移除在footnote下面的任何框"""
"""移除在footnote下面的任何框"""
...
...
setup.py
View file @
a1df670e
...
@@ -43,7 +43,7 @@ if __name__ == '__main__':
...
@@ -43,7 +43,7 @@ if __name__ == '__main__':
"matplotlib>=3.10,<4"
,
"matplotlib>=3.10,<4"
,
"ultralytics>=8.3.48,<9"
,
# yolov8,公式检测
"ultralytics>=8.3.48,<9"
,
# yolov8,公式检测
"doclayout_yolo==0.0.2b1"
,
# doclayout_yolo
"doclayout_yolo==0.0.2b1"
,
# doclayout_yolo
"dill>=0.3.
9
,<1"
,
# doclayout_yolo
"dill>=0.3.
8
,<1"
,
# doclayout_yolo
"rapid_table>=1.0.5,<2.0.0"
,
# rapid_table
"rapid_table>=1.0.5,<2.0.0"
,
# rapid_table
"PyYAML>=6.0.2,<7"
,
# yaml
"PyYAML>=6.0.2,<7"
,
# yaml
"ftfy>=6.3.1,<7"
,
# unimernet_hf
"ftfy>=6.3.1,<7"
,
# unimernet_hf
...
@@ -56,7 +56,7 @@ if __name__ == '__main__':
...
@@ -56,7 +56,7 @@ if __name__ == '__main__':
"matplotlib>=3.10,<=3.10.1"
,
"matplotlib>=3.10,<=3.10.1"
,
"ultralytics>=8.3.48,<=8.3.104"
,
# yolov8,公式检测
"ultralytics>=8.3.48,<=8.3.104"
,
# yolov8,公式检测
"doclayout_yolo==0.0.2b1"
,
# doclayout_yolo
"doclayout_yolo==0.0.2b1"
,
# doclayout_yolo
"dill==0.3.
9
"
,
# doclayout_yolo
"dill==0.3.
8
"
,
# doclayout_yolo
"PyYAML==6.0.2"
,
# yaml
"PyYAML==6.0.2"
,
# yaml
"ftfy==6.3.1"
,
# unimernet_hf
"ftfy==6.3.1"
,
# unimernet_hf
"openai==1.71.0"
,
# openai SDK
"openai==1.71.0"
,
# openai SDK
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment