Unverified Commit 8fb6794b authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #2265 from opendatalab/release-1.3.5

Release 1.3.5
parents a2b07bfd af53a463
......@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate && \
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install -U magic-pdf[full] 'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops -i https://mirrors.aliyun.com/pypi/simple && \
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
......
......@@ -4,6 +4,8 @@ import platform
from pathlib import Path
import shutil
from loguru import logger
class ConvertToPdfError(Exception):
def __init__(self, msg):
......@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super().__init__(self.msg)
# Chinese font list
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
def check_fonts_installed():
"""Check if required Chinese fonts are installed."""
system_type = platform.system()
if system_type == 'Windows':
# Windows: check fonts via registry or system font folder
font_dir = Path("C:/Windows/Fonts")
installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
return True
raise EnvironmentError(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
)
if system_type in ['Windows', 'Darwin']:
pass
else:
# Linux/macOS: use fc-list
# Linux: use fc-list
try:
output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
for font in REQUIRED_CHS_FONTS:
if font in output:
if output.strip(): # 只要有任何输出(非空)
return True
raise EnvironmentError(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
else:
logger.warning(
f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
)
except Exception as e:
raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")
except Exception:
pass
def get_soffice_command():
......
import unittest
import os
from PIL import Image
from lxml import etree
......@@ -8,7 +9,7 @@ from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableM
class TestppTableModel(unittest.TestCase):
def test_image2html(self):
img = Image.open("assets/table.jpg")
img = Image.open(os.path.join(os.path.dirname(__file__), "assets/table.jpg"))
atom_model_manager = AtomModelSingleton()
ocr_engine = atom_model_manager.get_atom_model(
atom_model_name='ocr',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment