Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8fb6794b
Unverified
Commit
8fb6794b
authored
Apr 17, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Apr 17, 2025
Browse files
Merge pull request #2265 from opendatalab/release-1.3.5
Release 1.3.5
parents
a2b07bfd
af53a463
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
24 deletions
+16
-24
docker/ascend_npu/Dockerfile
docker/ascend_npu/Dockerfile
+1
-1
magic_pdf/utils/office_to_pdf.py
magic_pdf/utils/office_to_pdf.py
+13
-22
tests/unittest/test_table/test_rapidtable.py
tests/unittest/test_table/test_rapidtable.py
+2
-1
No files found.
docker/ascend_npu/Dockerfile
View file @
8fb6794b
...
...
@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate &&
\
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full]
'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops
-i https://mirrors.aliyun.com/pypi/simple &&
\
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl &&
\
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
...
...
magic_pdf/utils/office_to_pdf.py
View file @
8fb6794b
...
...
@@ -4,6 +4,8 @@ import platform
from
pathlib
import
Path
import
shutil
from
loguru
import
logger
class
ConvertToPdfError
(
Exception
):
def
__init__
(
self
,
msg
):
...
...
@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super
().
__init__
(
self
.
msg
)
# Chinese font list
REQUIRED_CHS_FONTS
=
[
'SimSun'
,
'Microsoft YaHei'
,
'Noto Sans CJK SC'
]
def
check_fonts_installed
():
"""Check if required Chinese fonts are installed."""
system_type
=
platform
.
system
()
if
system_type
==
'Windows'
:
# Windows: check fonts via registry or system font folder
font_dir
=
Path
(
"C:/Windows/Fonts"
)
installed_fonts
=
[
f
.
name
for
f
in
font_dir
.
glob
(
"*.ttf"
)]
if
any
(
font
for
font
in
REQUIRED_CHS_FONTS
if
any
(
font
in
f
for
f
in
installed_fonts
)):
return
True
raise
EnvironmentError
(
f
"Missing Chinese font. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
)
if
system_type
in
[
'Windows'
,
'Darwin'
]:
pass
else
:
# Linux
/macOS
: use fc-list
# Linux: use fc-list
try
:
output
=
subprocess
.
check_output
([
'fc-list'
,
':lang=zh'
],
encoding
=
'utf-8'
)
for
font
in
REQUIRED_CHS_FONTS
:
if
font
in
output
:
if
output
.
strip
():
# 只要有任何输出(非空)
return
True
raise
EnvironmentError
(
f
"Missing Chinese font. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
else
:
logger
.
warning
(
f
"No Chinese fonts were detected, the converted document may not display Chinese content properly."
)
except
Exception
as
e
:
raise
EnvironmentError
(
f
"Font detection failed. Please install 'fontconfig' and fonts:
{
str
(
e
)
}
"
)
except
Exception
:
pass
def
get_soffice_command
():
...
...
tests/unittest/test_table/test_rapidtable.py
View file @
8fb6794b
import
unittest
import
os
from
PIL
import
Image
from
lxml
import
etree
...
...
@@ -8,7 +9,7 @@ from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableM
class
TestppTableModel
(
unittest
.
TestCase
):
def
test_image2html
(
self
):
img
=
Image
.
open
(
"assets/table.jpg"
)
img
=
Image
.
open
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"assets/table.jpg"
)
)
atom_model_manager
=
AtomModelSingleton
()
ocr_engine
=
atom_model_manager
.
get_atom_model
(
atom_model_name
=
'ocr'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment