Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
e36a083d
"...python/git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "de1350ea20530e0744b48d0d50415fa2ff5122cd"
Commit
e36a083d
authored
Apr 07, 2025
by
icecraft
Browse files
fix: image dataset add lang field
parent
f442adfc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
1 deletion
+12
-1
magic_pdf/data/dataset.py
magic_pdf/data/dataset.py
+12
-1
No files found.
magic_pdf/data/dataset.py
View file @
e36a083d
...
@@ -232,7 +232,7 @@ class PymuDocDataset(Dataset):
...
@@ -232,7 +232,7 @@ class PymuDocDataset(Dataset):
self
.
_records
[
i
].
set_image
(
images
[
i
])
self
.
_records
[
i
].
set_image
(
images
[
i
])
class
ImageDataset
(
Dataset
):
class
ImageDataset
(
Dataset
):
def
__init__
(
self
,
bits
:
bytes
):
def
__init__
(
self
,
bits
:
bytes
,
lang
=
None
):
"""Initialize the dataset, which wraps the pymudoc documents.
"""Initialize the dataset, which wraps the pymudoc documents.
Args:
Args:
...
@@ -244,6 +244,17 @@ class ImageDataset(Dataset):
...
@@ -244,6 +244,17 @@ class ImageDataset(Dataset):
self
.
_raw_data
=
bits
self
.
_raw_data
=
bits
self
.
_data_bits
=
pdf_bytes
self
.
_data_bits
=
pdf_bytes
if
lang
==
''
:
self
.
_lang
=
None
elif
lang
==
'auto'
:
from
magic_pdf.model.sub_modules.language_detection.utils
import
\
auto_detect_lang
self
.
_lang
=
auto_detect_lang
(
bits
)
logger
.
info
(
f
'lang:
{
lang
}
, detect_lang:
{
self
.
_lang
}
'
)
else
:
self
.
_lang
=
lang
logger
.
info
(
f
'lang:
{
lang
}
'
)
def
__len__
(
self
)
->
int
:
def
__len__
(
self
)
->
int
:
"""The length of the dataset."""
"""The length of the dataset."""
return
len
(
self
.
_records
)
return
len
(
self
.
_records
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment