office_to_pdf.py 2.79 KB
Newer Older
1
2
import os
import subprocess
3
import platform
4
5
6
7
8
9
10
11
12
from pathlib import Path


class ConvertToPdfError(Exception):
    def __init__(self, msg):
        self.msg = msg
        super().__init__(self.msg)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Chinese font list
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']


def check_fonts_installed():
    """Check if required Chinese fonts are installed."""
    system_type = platform.system()

    if system_type == 'Windows':
        # Windows: check fonts via registry or system font folder
        font_dir = Path("C:/Windows/Fonts")
        installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
        if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
            return True
        raise EnvironmentError(
            f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
        )
    else:
        # Linux/macOS: use fc-list
        try:
            output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
            for font in REQUIRED_CHS_FONTS:
                if font in output:
                    return True
            raise EnvironmentError(
                f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
            )
        except Exception as e:
            raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")


def get_soffice_command():
    """Return the path to LibreOffice's soffice executable depending on the platform."""
    if platform.system() == 'Windows':
        possible_paths = [
            Path("C:/Program Files/LibreOffice/program/soffice.exe"),
            Path("C:/Program Files (x86)/LibreOffice/program/soffice.exe")
        ]
        for path in possible_paths:
            if path.exists():
                return str(path)
        raise ConvertToPdfError(
            "LibreOffice not found. Please install LibreOffice and ensure soffice.exe is located in a standard path."
        )
    else:
        return 'soffice'  # Assume it's in PATH on Linux/macOS


61
def convert_file_to_pdf(input_path, output_dir):
62
    """Convert a single document (ppt, doc, etc.) to PDF."""
63
64
65
66
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")

    os.makedirs(output_dir, exist_ok=True)
67
68
69
70
71

    check_fonts_installed()

    soffice_cmd = get_soffice_command()

72
    cmd = [
73
        soffice_cmd,
74
        '--headless',
75
76
        '--norestore',
        '--invisible',
77
78
79
80
        '--convert-to', 'pdf',
        '--outdir', str(output_dir),
        str(input_path)
    ]
81

82
    process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
83

84
    if process.returncode != 0:
85
        raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")