Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
037a3ae6
Unverified
Commit
037a3ae6
authored
Jun 23, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jun 23, 2025
Browse files
Merge pull request #2763 from herryqg/master
encapsulate prediction parsing logic in DocLayoutYOLOModel
parents
af7dee49
4c52a05b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
110 additions
and
77 deletions
+110
-77
mineru/model/layout/doclayout_yolo.py
mineru/model/layout/doclayout_yolo.py
+58
-51
mineru/model/mfd/yolo_v8.py
mineru/model/mfd/yolo_v8.py
+44
-26
signatures/version1/cla.json
signatures/version1/cla.json
+8
-0
No files found.
mineru/model/layout/doclayout_yolo.py
View file @
037a3ae6
from
typing
import
List
,
Dict
,
Union
from
doclayout_yolo
import
YOLOv10
from
doclayout_yolo
import
YOLOv10
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
numpy
as
np
from
PIL
import
Image
class
DocLayoutYOLOModel
(
object
):
class
DocLayoutYOLOModel
:
def
__init__
(
self
,
weight
,
device
):
def
__init__
(
self
.
model
=
YOLOv10
(
weight
)
self
,
weight
:
str
,
device
:
str
=
"cuda"
,
imgsz
:
int
=
1280
,
conf
:
float
=
0.1
,
iou
:
float
=
0.45
,
):
self
.
model
=
YOLOv10
(
weight
).
to
(
device
)
self
.
device
=
device
self
.
device
=
device
self
.
imgsz
=
imgsz
self
.
conf
=
conf
self
.
iou
=
iou
def
predict
(
self
,
image
)
:
def
_parse_
predict
ion
(
self
,
prediction
)
->
List
[
Dict
]
:
layout_res
=
[]
layout_res
=
[]
doclayout_yolo_res
=
self
.
model
.
predict
(
image
,
# 容错处理
imgsz
=
1280
,
if
not
hasattr
(
prediction
,
"boxes"
)
or
prediction
.
boxes
is
None
:
conf
=
0.10
,
return
layout_res
iou
=
0.45
,
verbose
=
False
,
device
=
self
.
device
for
xyxy
,
conf
,
cls
in
zip
(
)[
0
]
prediction
.
boxes
.
xyxy
.
cpu
(),
for
xyxy
,
conf
,
cla
in
zip
(
prediction
.
boxes
.
conf
.
cpu
(),
doclayout_yolo_res
.
boxes
.
xyxy
.
cpu
(),
prediction
.
boxes
.
cls
.
cpu
(),
doclayout_yolo_res
.
boxes
.
conf
.
cpu
(),
doclayout_yolo_res
.
boxes
.
cls
.
cpu
(),
):
):
xmin
,
ymin
,
xmax
,
ymax
=
[
int
(
p
.
item
())
for
p
in
xyxy
]
coords
=
list
(
map
(
int
,
xyxy
.
tolist
()))
new_item
=
{
xmin
,
ymin
,
xmax
,
ymax
=
coords
"category_id"
:
int
(
cla
.
item
()),
layout_res
.
append
({
"category_id"
:
int
(
cls
.
item
()),
"poly"
:
[
xmin
,
ymin
,
xmax
,
ymin
,
xmax
,
ymax
,
xmin
,
ymax
],
"poly"
:
[
xmin
,
ymin
,
xmax
,
ymin
,
xmax
,
ymax
,
xmin
,
ymax
],
"score"
:
round
(
float
(
conf
.
item
()),
3
),
"score"
:
round
(
float
(
conf
.
item
()),
3
),
}
})
layout_res
.
append
(
new_item
)
return
layout_res
return
layout_res
def
batch_predict
(
self
,
images
:
list
,
batch_size
:
int
)
->
list
:
def
predict
(
self
,
image
:
Union
[
np
.
ndarray
,
Image
.
Image
])
->
List
[
Dict
]:
images_layout_res
=
[]
prediction
=
self
.
model
.
predict
(
# for index in range(0, len(images), batch_size):
image
,
for
index
in
tqdm
(
range
(
0
,
len
(
images
),
batch_size
),
desc
=
"Layout Predict"
):
imgsz
=
self
.
imgsz
,
doclayout_yolo_res
=
[
conf
=
self
.
conf
,
image_res
.
cpu
()
iou
=
self
.
iou
,
for
image_res
in
self
.
model
.
predict
(
verbose
=
False
images
[
index
:
index
+
batch_size
],
)[
0
]
imgsz
=
1280
,
return
self
.
_parse_prediction
(
prediction
)
conf
=
0.10
,
iou
=
0.45
,
def
batch_predict
(
self
,
images
:
List
[
Union
[
np
.
ndarray
,
Image
.
Image
]],
batch_size
:
int
=
4
)
->
List
[
List
[
Dict
]]:
results
=
[]
for
idx
in
tqdm
(
range
(
0
,
len
(
images
),
batch_size
),
desc
=
"Layout Predict"
):
batch
=
images
[
idx
:
idx
+
batch_size
]
predictions
=
self
.
model
.
predict
(
batch
,
imgsz
=
self
.
imgsz
,
conf
=
self
.
conf
,
iou
=
self
.
iou
,
verbose
=
False
,
verbose
=
False
,
device
=
self
.
device
,
)
)
]
for
pred
in
predictions
:
for
image_res
in
doclayout_yolo_res
:
results
.
append
(
self
.
_parse_prediction
(
pred
))
layout_res
=
[]
return
results
for
xyxy
,
conf
,
cla
in
zip
(
\ No newline at end of file
image_res
.
boxes
.
xyxy
,
image_res
.
boxes
.
conf
,
image_res
.
boxes
.
cls
,
):
xmin
,
ymin
,
xmax
,
ymax
=
[
int
(
p
.
item
())
for
p
in
xyxy
]
new_item
=
{
"category_id"
:
int
(
cla
.
item
()),
"poly"
:
[
xmin
,
ymin
,
xmax
,
ymin
,
xmax
,
ymax
,
xmin
,
ymax
],
"score"
:
round
(
float
(
conf
.
item
()),
3
),
}
layout_res
.
append
(
new_item
)
images_layout_res
.
append
(
layout_res
)
return
images_layout_res
mineru/model/mfd/yolo_v8.py
View file @
037a3ae6
from
typing
import
List
,
Union
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
ultralytics
import
YOLO
from
ultralytics
import
YOLO
import
numpy
as
np
from
PIL
import
Image
class
YOLOv8MFDModel
(
object
):
class
YOLOv8MFDModel
:
def
__init__
(
self
,
weight
,
device
=
"cpu"
):
def
__init__
(
self
.
mfd_model
=
YOLO
(
weight
)
self
,
weight
:
str
,
device
:
str
=
"cpu"
,
imgsz
:
int
=
1888
,
conf
:
float
=
0.25
,
iou
:
float
=
0.45
,
):
self
.
model
=
YOLO
(
weight
).
to
(
device
)
self
.
device
=
device
self
.
device
=
device
self
.
imgsz
=
imgsz
self
.
conf
=
conf
self
.
iou
=
iou
def
predict
(
self
,
image
):
def
_run_predict
(
mfd_res
=
self
.
mfd_model
.
predict
(
self
,
image
,
imgsz
=
1888
,
conf
=
0.25
,
iou
=
0.45
,
verbose
=
False
,
device
=
self
.
device
inputs
:
Union
[
np
.
ndarray
,
Image
.
Image
,
List
],
)[
0
]
is_batch
:
bool
=
False
return
mfd_res
)
->
List
:
preds
=
self
.
model
.
predict
(
def
batch_predict
(
self
,
images
:
list
,
batch_size
:
int
)
->
list
:
inputs
,
images_mfd_res
=
[]
imgsz
=
self
.
imgsz
,
# for index in range(0, len(images), batch_size):
conf
=
self
.
conf
,
for
index
in
tqdm
(
range
(
0
,
len
(
images
),
batch_size
),
desc
=
"MFD Predict"
):
iou
=
self
.
iou
,
mfd_res
=
[
image_res
.
cpu
()
for
image_res
in
self
.
mfd_model
.
predict
(
images
[
index
:
index
+
batch_size
],
imgsz
=
1888
,
conf
=
0.25
,
iou
=
0.45
,
verbose
=
False
,
verbose
=
False
,
device
=
self
.
device
,
device
=
self
.
device
)
)
]
return
[
pred
.
cpu
()
for
pred
in
preds
]
if
is_batch
else
preds
[
0
].
cpu
()
for
image_res
in
mfd_res
:
images_mfd_res
.
append
(
image_res
)
def
predict
(
self
,
image
:
Union
[
np
.
ndarray
,
Image
.
Image
]):
return
images_mfd_res
return
self
.
_run_predict
(
image
)
def
batch_predict
(
self
,
images
:
List
[
Union
[
np
.
ndarray
,
Image
.
Image
]],
batch_size
:
int
=
4
)
->
List
:
results
=
[]
for
idx
in
tqdm
(
range
(
0
,
len
(
images
),
batch_size
),
desc
=
"MFD Predict"
):
batch
=
images
[
idx
:
idx
+
batch_size
]
batch_preds
=
self
.
_run_predict
(
batch
,
is_batch
=
True
)
results
.
extend
(
batch_preds
)
return
results
\ No newline at end of file
signatures/version1/cla.json
View file @
037a3ae6
...
@@ -343,6 +343,14 @@
...
@@ -343,6 +343,14 @@
"created_at"
:
"2025-06-18T11:27:23Z"
,
"created_at"
:
"2025-06-18T11:27:23Z"
,
"repoId"
:
765083837
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2727
"pullRequestNo"
:
2727
},
{
"name"
:
"QIN2DIM"
,
"id"
:
62018067
,
"comment_id"
:
2992279796
,
"created_at"
:
"2025-06-20T17:04:59Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2758
}
}
]
]
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment