Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8fb6794b
"vscode:/vscode.git/clone" did not exist on "6a510ffc93e890008dab58eb7d722bd43997a956"
Unverified
Commit
8fb6794b
authored
Apr 17, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Apr 17, 2025
Browse files
Merge pull request #2265 from opendatalab/release-1.3.5
Release 1.3.5
parents
a2b07bfd
af53a463
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
24 deletions
+16
-24
docker/ascend_npu/Dockerfile
docker/ascend_npu/Dockerfile
+1
-1
magic_pdf/utils/office_to_pdf.py
magic_pdf/utils/office_to_pdf.py
+13
-22
tests/unittest/test_table/test_rapidtable.py
tests/unittest/test_table/test_rapidtable.py
+2
-1
No files found.
docker/ascend_npu/Dockerfile
View file @
8fb6794b
...
...
@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate &&
\
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple &&
\
pip3 install -U magic-pdf[full]
'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops
-i https://mirrors.aliyun.com/pypi/simple &&
\
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl &&
\
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
...
...
magic_pdf/utils/office_to_pdf.py
View file @
8fb6794b
...
...
@@ -4,6 +4,8 @@ import platform
from
pathlib
import
Path
import
shutil
from
loguru
import
logger
class
ConvertToPdfError
(
Exception
):
def
__init__
(
self
,
msg
):
...
...
@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super
().
__init__
(
self
.
msg
)
# Chinese font list
REQUIRED_CHS_FONTS
=
[
'SimSun'
,
'Microsoft YaHei'
,
'Noto Sans CJK SC'
]
def
check_fonts_installed
():
"""Check if required Chinese fonts are installed."""
system_type
=
platform
.
system
()
if
system_type
==
'Windows'
:
# Windows: check fonts via registry or system font folder
font_dir
=
Path
(
"C:/Windows/Fonts"
)
installed_fonts
=
[
f
.
name
for
f
in
font_dir
.
glob
(
"*.ttf"
)]
if
any
(
font
for
font
in
REQUIRED_CHS_FONTS
if
any
(
font
in
f
for
f
in
installed_fonts
)):
return
True
raise
EnvironmentError
(
f
"Missing Chinese font. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
)
if
system_type
in
[
'Windows'
,
'Darwin'
]:
pass
else
:
# Linux
/macOS
: use fc-list
# Linux: use fc-list
try
:
output
=
subprocess
.
check_output
([
'fc-list'
,
':lang=zh'
],
encoding
=
'utf-8'
)
for
font
in
REQUIRED_CHS_FONTS
:
if
font
in
output
:
return
True
raise
EnvironmentError
(
f
"Missing
Chinese font
. Please install at least one of:
{
', '
.
join
(
REQUIRED_CHS_FONTS
)
}
"
)
except
Exception
as
e
:
raise
EnvironmentError
(
f
"Font detection failed. Please install 'fontconfig' and fonts:
{
str
(
e
)
}
"
)
if
output
.
strip
():
# 只要有任何输出(非空)
return
True
else
:
logger
.
warning
(
f
"No
Chinese font
s were detected, the converted document may not display Chinese content properly.
"
)
except
Exception
:
pass
def
get_soffice_command
():
...
...
tests/unittest/test_table/test_rapidtable.py
View file @
8fb6794b
import
unittest
import
os
from
PIL
import
Image
from
lxml
import
etree
...
...
@@ -8,7 +9,7 @@ from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableM
class
TestppTableModel
(
unittest
.
TestCase
):
def
test_image2html
(
self
):
img
=
Image
.
open
(
"assets/table.jpg"
)
img
=
Image
.
open
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"assets/table.jpg"
)
)
atom_model_manager
=
AtomModelSingleton
()
ocr_engine
=
atom_model_manager
.
get_atom_model
(
atom_model_name
=
'ocr'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment