Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
07abba71
Commit
07abba71
authored
Mar 13, 2024
by
赵小蒙
Browse files
draw_bbox工具类逻辑更新
parent
63969109
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
13 deletions
+18
-13
magic_pdf/libs/draw_bbox.py
magic_pdf/libs/draw_bbox.py
+18
-13
No files found.
magic_pdf/libs/draw_bbox.py
View file @
07abba71
from
magic_pdf.libs.commons
import
fitz
# PyMuPDF
def
draw_bbox
(
i
,
bbox_list
,
page
,
rgb_config
):
def
draw_bbox
_without_number
(
i
,
bbox_list
,
page
,
rgb_config
):
new_rgb
=
[]
for
item
in
rgb_config
:
item
=
float
(
item
)
/
255
...
...
@@ -12,6 +12,19 @@ def draw_bbox(i, bbox_list, page, rgb_config):
page
.
draw_rect
(
rect_coords
,
color
=
new_rgb
,
fill
=
None
,
width
=
0.5
,
overlay
=
True
)
# Draw the rectangle
def
draw_bbox_with_number
(
i
,
bbox_list
,
page
,
rgb_config
):
new_rgb
=
[]
for
item
in
rgb_config
:
item
=
float
(
item
)
/
255
new_rgb
.
append
(
item
)
page_data
=
bbox_list
[
i
]
for
j
,
bbox
in
enumerate
(
page_data
):
x0
,
y0
,
x1
,
y1
=
bbox
rect_coords
=
fitz
.
Rect
(
x0
,
y0
,
x1
,
y1
)
# Define the rectangle
page
.
draw_rect
(
rect_coords
,
color
=
new_rgb
,
fill
=
None
,
width
=
0.5
,
overlay
=
True
)
# Draw the rectangle
page
.
insert_text
((
x0
,
y0
),
str
(
j
+
1
),
fontsize
=
10
,
color
=
new_rgb
)
# Insert the index at the top left corner of the rectangle
def
draw_layout_bbox
(
pdf_info_dict
,
input_path
,
out_path
):
layout_bbox_list
=
[]
for
page
in
pdf_info_dict
.
values
():
...
...
@@ -22,13 +35,7 @@ def draw_layout_bbox(pdf_info_dict, input_path, out_path):
doc
=
fitz
.
open
(
input_path
)
for
i
,
page
in
enumerate
(
doc
):
# 获取当前页面的数据
page_data
=
layout_bbox_list
[
i
]
for
j
,
bbox
in
enumerate
(
page_data
):
x0
,
y0
,
x1
,
y1
=
bbox
rect_coords
=
fitz
.
Rect
(
x0
,
y0
,
x1
,
y1
)
# Define the rectangle
page
.
draw_rect
(
rect_coords
,
color
=
(
1
,
0
,
0
),
fill
=
None
,
width
=
0.5
,
overlay
=
True
)
# Draw the rectangle
page
.
insert_text
((
x0
,
y0
),
str
(
j
+
1
),
fontsize
=
10
,
color
=
(
1
,
0
,
0
))
# Insert the index at the top left corner of the rectangle
draw_bbox_with_number
(
i
,
layout_bbox_list
,
page
,
[
255
,
0
,
0
])
# Save the PDF
doc
.
save
(
f
"
{
out_path
}
/layout.pdf"
)
...
...
@@ -56,11 +63,9 @@ def draw_text_bbox(pdf_info_dict, input_path, out_path):
doc
=
fitz
.
open
(
input_path
)
for
i
,
page
in
enumerate
(
doc
):
# 获取当前页面的数据
draw_bbox
(
i
,
text_list
,
page
,
[
255
,
0
,
0
])
draw_bbox
(
i
,
inline_equation_list
,
page
,
[
0
,
255
,
0
])
draw_bbox
(
i
,
displayed_equation_list
,
page
,
[
0
,
0
,
255
])
draw_bbox_without_number
(
i
,
text_list
,
page
,
[
255
,
0
,
0
])
draw_bbox_without_number
(
i
,
inline_equation_list
,
page
,
[
0
,
255
,
0
])
draw_bbox_without_number
(
i
,
displayed_equation_list
,
page
,
[
0
,
0
,
255
])
# Save the PDF
doc
.
save
(
f
"
{
out_path
}
/text.pdf"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment