office_to_pdf.py 3.87 KB
Newer Older
1
2
import os
import subprocess
3
import platform
4
from pathlib import Path
5
import shutil
6

7
8
from loguru import logger

9
10
11
12
13
14
15

class ConvertToPdfError(Exception):
    def __init__(self, msg):
        self.msg = msg
        super().__init__(self.msg)


16
17
18
19
def check_fonts_installed():
    """Check if required Chinese fonts are installed."""
    system_type = platform.system()

20
    if system_type in ['Windows', 'Darwin']:
21
        pass
22
    else:
23
        # Linux: use fc-list
24
25
        try:
            output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
26
27
28
29
30
31
32
33
            if output.strip():  # 只要有任何输出(非空)
                return True
            else:
                logger.warning(
                    f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
                )
        except Exception:
            pass
34
35
36
37


def get_soffice_command():
    """Return the path to LibreOffice's soffice executable depending on the platform."""
38
39
40
41
42
43
44
45
46
    system_type = platform.system()

    # First check if soffice is in PATH
    soffice_path = shutil.which('soffice')
    if soffice_path:
        return soffice_path

    if system_type == 'Windows':
        # Check common installation paths
47
        possible_paths = [
48
49
50
51
            Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
            Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
            Path('C:/Program Files/LibreOffice/program/soffice.exe'),
            Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
52
        ]
53
54
55
56
57

        # Check other drives for windows
        for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
            possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))

58
59
60
        for path in possible_paths:
            if path.exists():
                return str(path)
61

62
        raise ConvertToPdfError(
63
64
            "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
            "or ensure soffice.exe is in your PATH environment variable."
65
66
        )
    else:
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
        # For Linux/macOS, provide installation instructions if not found
        try:
            # Try to find soffice in standard locations
            possible_paths = [
                '/usr/bin/soffice',
                '/usr/local/bin/soffice',
                '/opt/libreoffice/program/soffice',
                '/Applications/LibreOffice.app/Contents/MacOS/soffice'
            ]
            for path in possible_paths:
                if os.path.exists(path):
                    return path

            raise ConvertToPdfError(
                "LibreOffice not found. Please install it:\n"
                "  - Ubuntu/Debian: sudo apt-get install libreoffice\n"
                "  - CentOS/RHEL: sudo yum install libreoffice\n"
                "  - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
                "  - Or ensure soffice is in your PATH environment variable."
            )
        except Exception as e:
            raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
89
90


91
def convert_file_to_pdf(input_path, output_dir):
92
    """Convert a single document (ppt, doc, etc.) to PDF."""
93
94
95
96
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")

    os.makedirs(output_dir, exist_ok=True)
97
98
99
100
101

    check_fonts_installed()

    soffice_cmd = get_soffice_command()

102
    cmd = [
103
        soffice_cmd,
104
        '--headless',
105
106
        '--norestore',
        '--invisible',
107
108
109
110
        '--convert-to', 'pdf',
        '--outdir', str(output_dir),
        str(input_path)
    ]
111

112
    process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
113

114
    if process.returncode != 0:
115
        raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")