office_to_pdf.py 4.45 KB
Newer Older
1
2
import os
import subprocess
3
import platform
4
from pathlib import Path
5
import shutil
6
7
8
9
10
11
12
13


class ConvertToPdfError(Exception):
    def __init__(self, msg):
        self.msg = msg
        super().__init__(self.msg)


14
15
16
17
18
19
20
21
22
23
# Chinese font list
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']


def check_fonts_installed():
    """Check if required Chinese fonts are installed."""
    system_type = platform.system()

    if system_type == 'Windows':
        # Windows: check fonts via registry or system font folder
24
25
26
27
28
29
30
31
        # font_dir = Path("C:/Windows/Fonts")
        # installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
        # if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
        #     return True
        # raise EnvironmentError(
        #     f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
        # )
        pass
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
    else:
        # Linux/macOS: use fc-list
        try:
            output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
            for font in REQUIRED_CHS_FONTS:
                if font in output:
                    return True
            raise EnvironmentError(
                f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
            )
        except Exception as e:
            raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")


def get_soffice_command():
    """Return the path to LibreOffice's soffice executable depending on the platform."""
48
49
50
51
52
53
54
55
56
    system_type = platform.system()

    # First check if soffice is in PATH
    soffice_path = shutil.which('soffice')
    if soffice_path:
        return soffice_path

    if system_type == 'Windows':
        # Check common installation paths
57
        possible_paths = [
58
59
60
61
            Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
            Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
            Path('C:/Program Files/LibreOffice/program/soffice.exe'),
            Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
62
        ]
63
64
65
66
67

        # Check other drives for windows
        for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
            possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))

68
69
70
        for path in possible_paths:
            if path.exists():
                return str(path)
71

72
        raise ConvertToPdfError(
73
74
            "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
            "or ensure soffice.exe is in your PATH environment variable."
75
76
        )
    else:
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        # For Linux/macOS, provide installation instructions if not found
        try:
            # Try to find soffice in standard locations
            possible_paths = [
                '/usr/bin/soffice',
                '/usr/local/bin/soffice',
                '/opt/libreoffice/program/soffice',
                '/Applications/LibreOffice.app/Contents/MacOS/soffice'
            ]
            for path in possible_paths:
                if os.path.exists(path):
                    return path

            raise ConvertToPdfError(
                "LibreOffice not found. Please install it:\n"
                "  - Ubuntu/Debian: sudo apt-get install libreoffice\n"
                "  - CentOS/RHEL: sudo yum install libreoffice\n"
                "  - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
                "  - Or ensure soffice is in your PATH environment variable."
            )
        except Exception as e:
            raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
99
100


101
def convert_file_to_pdf(input_path, output_dir):
102
    """Convert a single document (ppt, doc, etc.) to PDF."""
103
104
105
106
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")

    os.makedirs(output_dir, exist_ok=True)
107
108
109
110
111

    check_fonts_installed()

    soffice_cmd = get_soffice_command()

112
    cmd = [
113
        soffice_cmd,
114
        '--headless',
115
116
        '--norestore',
        '--invisible',
117
118
119
120
        '--convert-to', 'pdf',
        '--outdir', str(output_dir),
        str(input_path)
    ]
121

122
    process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
123

124
    if process.returncode != 0:
125
        raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")