Unverified Commit af53a463 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #2264 from myhloli/dev

refactor(office_to_pdf): simplify font checking and add logging
parents 4bd3381c 2e5e55cf
...@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m ...@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate && \ source /opt/mineru_venv/bin/activate && \
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \ pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple && \ pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple && \ pip3 install -U magic-pdf[full] 'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops -i https://mirrors.aliyun.com/pypi/simple && \
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \ wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
......
...@@ -4,6 +4,8 @@ import platform ...@@ -4,6 +4,8 @@ import platform
from pathlib import Path from pathlib import Path
import shutil import shutil
from loguru import logger
class ConvertToPdfError(Exception): class ConvertToPdfError(Exception):
def __init__(self, msg): def __init__(self, msg):
...@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception): ...@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super().__init__(self.msg) super().__init__(self.msg)
# Chinese font list
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
def check_fonts_installed(): def check_fonts_installed():
"""Check if required Chinese fonts are installed.""" """Check if required Chinese fonts are installed."""
system_type = platform.system() system_type = platform.system()
if system_type == 'Windows': if system_type in ['Windows', 'Darwin']:
# Windows: check fonts via registry or system font folder pass
font_dir = Path("C:/Windows/Fonts")
installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
return True
raise EnvironmentError(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
)
else: else:
# Linux/macOS: use fc-list # Linux: use fc-list
try: try:
output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8') output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
for font in REQUIRED_CHS_FONTS: if output.strip(): # 只要有任何输出(非空)
if font in output: return True
return True else:
raise EnvironmentError( logger.warning(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}" f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
) )
except Exception as e: except Exception:
raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}") pass
def get_soffice_command(): def get_soffice_command():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment