Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
af53a463
Unverified
Commit
af53a463
authored
Apr 17, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Apr 17, 2025
Browse files
Merge pull request #2264 from myhloli/dev
refactor(office_to_pdf): simplify font checking and add logging
parents
4bd3381c
2e5e55cf
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
23 deletions
+14
-23
docker/ascend_npu/Dockerfile
docker/ascend_npu/Dockerfile
+1
-1
magic_pdf/utils/office_to_pdf.py
magic_pdf/utils/office_to_pdf.py
+13
-22
No files found.
docker/ascend_npu/Dockerfile
View file @
af53a463
...
...
@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate &&
\
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full]
'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops
-i https://mirrors.aliyun.com/pypi/simple &&
\
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl &&
\
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
...
...
magic_pdf/utils/office_to_pdf.py
View file @
af53a463
...
...
@@ -4,6 +4,8 @@ import platform
from
pathlib
import
Path
import
shutil
from
loguru
import
logger
class
ConvertToPdfError
(
Exception
):
def
__init__
(
self
,
msg
):
...
...
@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super
().
__init__
(
self
.
msg
)
# Chinese font list
REQUIRED_CHS_FONTS
=
[
'SimSun'
,
'Microsoft YaHei'
,
'Noto Sans CJK SC'
]
def
check_fonts_installed
():
"""Check if required Chinese fonts are installed."""
system_type
=
platform
.
system
()
if
system_type
==
'Windows'
:
# Windows: check fonts via registry or system font folder
font_dir
=
Path
(
"C:/Windows/Fonts"
)
installed_fonts
=
[
f
.
name
for
f
in
font_dir
.
glob
(
"*.ttf"
)]
if
any
(
font
for
font
in
REQUIRED_CHS_FONTS
if
any
(
font
in
f
for
f
in
installed_fonts
)):
return
True
raise
EnvironmentError
(
f
"Missing Chinese font. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
)
if
system_type
in
[
'Windows'
,
'Darwin'
]:
pass
else
:
# Linux
/macOS
: use fc-list
# Linux: use fc-list
try
:
output
=
subprocess
.
check_output
([
'fc-list'
,
':lang=zh'
],
encoding
=
'utf-8'
)
for
font
in
REQUIRED_CHS_FONTS
:
if
font
in
output
:
if
output
.
strip
():
# 只要有任何输出(非空)
return
True
raise
EnvironmentError
(
f
"Missing Chinese font. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
else
:
logger
.
warning
(
f
"No Chinese fonts were detected, the converted document may not display Chinese content properly."
)
except
Exception
as
e
:
raise
EnvironmentError
(
f
"Font detection failed. Please install 'fontconfig' and fonts:
{
str
(
e
)
}
"
)
except
Exception
:
pass
def
get_soffice_command
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment