Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
bedefd8d
Unverified
Commit
bedefd8d
authored
Oct 28, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Oct 28, 2024
Browse files
Merge pull request #797 from icecraft/feat/new_table_caption_match
Feat/new table caption match
parents
d68b3d90
f09148b9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
14 deletions
+19
-14
magic_pdf/model/magic_model.py
magic_pdf/model/magic_model.py
+19
-14
No files found.
magic_pdf/model/magic_model.py
View file @
bedefd8d
...
@@ -594,7 +594,7 @@ class MagicModel:
...
@@ -594,7 +594,7 @@ class MagicModel:
self
,
page_no
,
subject_category_id
,
object_category_id
self
,
page_no
,
subject_category_id
,
object_category_id
):
):
AXIS_MULPLICITY
=
3
AXIS_MULPLICITY
=
0.5
subjects
=
self
.
__reduct_overlap
(
subjects
=
self
.
__reduct_overlap
(
list
(
list
(
map
(
map
(
...
@@ -640,10 +640,10 @@ class MagicModel:
...
@@ -640,10 +640,10 @@ class MagicModel:
axis_unit
=
min
(
l_x_axis
,
l_y_axis
)
axis_unit
=
min
(
l_x_axis
,
l_y_axis
)
for
j
,
sub
in
enumerate
(
subjects
):
for
j
,
sub
in
enumerate
(
subjects
):
bbox1
,
bbox2
,
_
=
_remove_overlap_between_bbox
(
objects
[
i
][
'bbox'
],
subjects
[
j
][
'bbox'
])
bbox1
,
bbox2
,
_
=
_remove_overlap_between_bbox
(
left
,
right
,
bottom
,
top
=
bbox_relative_pos
(
objects
[
i
][
'bbox'
],
subjects
[
j
][
'bbox'
]
bbox1
,
bbox2
)
)
left
,
right
,
bottom
,
top
=
bbox_relative_pos
(
bbox1
,
bbox2
)
flags
=
[
left
,
right
,
bottom
,
top
]
flags
=
[
left
,
right
,
bottom
,
top
]
if
sum
([
1
if
v
else
0
for
v
in
flags
])
>
1
:
if
sum
([
1
if
v
else
0
for
v
in
flags
])
>
1
:
continue
continue
...
@@ -680,7 +680,6 @@ class MagicModel:
...
@@ -680,7 +680,6 @@ class MagicModel:
j
,
j
,
bbox_distance
(
obj
[
'bbox'
],
sub
[
'bbox'
]),
bbox_distance
(
obj
[
'bbox'
],
sub
[
'bbox'
]),
]
]
if
dis_by_directions
[
'left'
][
i
][
1
]
!=
float
(
'inf'
)
or
dis_by_directions
[
if
dis_by_directions
[
'left'
][
i
][
1
]
!=
float
(
'inf'
)
or
dis_by_directions
[
'right'
'right'
][
i
][
1
]
!=
float
(
'inf'
):
][
i
][
1
]
!=
float
(
'inf'
):
...
@@ -701,15 +700,18 @@ class MagicModel:
...
@@ -701,15 +700,18 @@ class MagicModel:
left_sub_bbox_y_axis
=
left_sub_bbox
[
3
]
-
left_sub_bbox
[
1
]
left_sub_bbox_y_axis
=
left_sub_bbox
[
3
]
-
left_sub_bbox
[
1
]
right_sub_bbox_y_axis
=
right_sub_bbox
[
3
]
-
right_sub_bbox
[
1
]
right_sub_bbox_y_axis
=
right_sub_bbox
[
3
]
-
right_sub_bbox
[
1
]
if
abs
(
left_sub_bbox_y_axis
-
l_y_axis
)
>
abs
(
if
(
right_sub_bbox_y_axis
-
l_y_axis
abs
(
left_sub_bbox_y_axis
-
l_y_axis
)
+
dis_by_directions
[
'left'
][
i
][
0
]
>
abs
(
right_sub_bbox_y_axis
-
l_y_axis
)
+
dis_by_directions
[
'right'
][
i
][
0
]
):
):
left_or_right
=
dis_by_directions
[
'right'
][
i
]
left_or_right
=
dis_by_directions
[
'right'
][
i
]
else
:
else
:
left_or_right
=
dis_by_directions
[
'left'
][
i
]
left_or_right
=
dis_by_directions
[
'left'
][
i
]
else
:
else
:
left_or_right
=
dis_by_directions
[
'left'
][
i
]
left_or_right
=
dis_by_directions
[
'left'
][
i
]
if
left_or_right
[
1
]
==
float
(
'inf'
)
:
if
left_or_right
[
1
]
>
dis_by_directions
[
'right'
][
i
][
1
]
:
left_or_right
=
dis_by_directions
[
'right'
][
i
]
left_or_right
=
dis_by_directions
[
'right'
][
i
]
else
:
else
:
left_or_right
=
dis_by_directions
[
'left'
][
i
]
left_or_right
=
dis_by_directions
[
'left'
][
i
]
...
@@ -733,15 +735,15 @@ class MagicModel:
...
@@ -733,15 +735,15 @@ class MagicModel:
top_bottom_x_axis
=
top_bottom
[
2
]
-
top_bottom
[
0
]
top_bottom_x_axis
=
top_bottom
[
2
]
-
top_bottom
[
0
]
bottom_top_x_axis
=
bottom_top
[
2
]
-
bottom_top
[
0
]
bottom_top_x_axis
=
bottom_top
[
2
]
-
bottom_top
[
0
]
if
abs
(
top_bottom_x_axis
-
l_x_axis
)
>
abs
(
if
abs
(
top_bottom_x_axis
-
l_x_axis
)
+
dis_by_directions
[
'bottom'
][
i
][
1
]
>
abs
(
bottom_top_x_axis
-
l_x_axis
bottom_top_x_axis
-
l_x_axis
):
)
+
dis_by_directions
[
'top'
][
i
][
1
]:
top_or_bottom
=
dis_by_directions
[
'bottom'
][
i
]
else
:
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
else
:
top_or_bottom
=
dis_by_directions
[
'bottom'
][
i
]
else
:
else
:
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
if
top_or_bottom
[
1
]
==
float
(
'inf'
)
:
if
top_or_bottom
[
1
]
>
dis_by_directions
[
'bottom'
][
i
][
1
]
:
top_or_bottom
=
dis_by_directions
[
'bottom'
][
i
]
top_or_bottom
=
dis_by_directions
[
'bottom'
][
i
]
else
:
else
:
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
top_or_bottom
=
dis_by_directions
[
'top'
][
i
]
...
@@ -782,7 +784,10 @@ class MagicModel:
...
@@ -782,7 +784,10 @@ class MagicModel:
for
i
in
sub_obj_map_h
.
keys
():
for
i
in
sub_obj_map_h
.
keys
():
ret
.
append
(
ret
.
append
(
{
{
'sub_bbox'
:
{
'bbox'
:
subjects
[
i
][
'bbox'
],
'score'
:
subjects
[
i
][
'score'
]},
'sub_bbox'
:
{
'bbox'
:
subjects
[
i
][
'bbox'
],
'score'
:
subjects
[
i
][
'score'
],
},
'obj_bboxes'
:
[
'obj_bboxes'
:
[
{
'score'
:
objects
[
j
][
'score'
],
'bbox'
:
objects
[
j
][
'bbox'
]}
{
'score'
:
objects
[
j
][
'score'
],
'bbox'
:
objects
[
j
][
'bbox'
]}
for
j
in
sub_obj_map_h
[
i
]
for
j
in
sub_obj_map_h
[
i
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment