build_ext.py 9.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

"""Installation script."""

import ctypes
import os
import subprocess
import sys
import sysconfig
import copy

from pathlib import Path
from subprocess import CalledProcessError
from typing import List, Optional, Type

import setuptools

from .utils import (
    cmake_bin,
    debug_build_enabled,
    found_ninja,
    get_frameworks,
    cuda_path,
26
    get_max_jobs_for_parallel_build,
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
)


class CMakeExtension(setuptools.Extension):
    """CMake extension module"""

    def __init__(
        self,
        name: str,
        cmake_path: Path,
        cmake_flags: Optional[List[str]] = None,
    ) -> None:
        super().__init__(name, sources=[])  # No work for base class
        self.cmake_path: Path = cmake_path
        self.cmake_flags: List[str] = [] if cmake_flags is None else cmake_flags

    def _build_cmake(self, build_dir: Path, install_dir: Path) -> None:
        # Make sure paths are str
        _cmake_bin = str(cmake_bin())
        cmake_path = str(self.cmake_path)
        build_dir = str(build_dir)
        install_dir = str(install_dir)

        # CMake configure command
        build_type = "Debug" if debug_build_enabled() else "Release"
        configure_command = [
            _cmake_bin,
            "-S",
            cmake_path,
            "-B",
            build_dir,
            f"-DPython_EXECUTABLE={sys.executable}",
            f"-DPython_INCLUDE_DIR={sysconfig.get_path('include')}",
            f"-DCMAKE_BUILD_TYPE={build_type}",
            f"-DCMAKE_INSTALL_PREFIX={install_dir}",
        ]
        configure_command += self.cmake_flags

        import pybind11
66

67
68
69
70
71
72
73
74
        pybind11_dir = Path(pybind11.__file__).resolve().parent
        pybind11_dir = pybind11_dir / "share" / "cmake" / "pybind11"
        configure_command.append(f"-Dpybind11_DIR={pybind11_dir}")

        # CMake build and install commands
        build_command = [_cmake_bin, "--build", build_dir]
        install_command = [_cmake_bin, "--install", build_dir]

75
76
77
78
79
80
81
82
        # Check whether parallel build is restricted
        max_jobs = get_max_jobs_for_parallel_build()
        if found_ninja():
            configure_command.append("-GNinja")
        build_command.append("--parallel")
        if max_jobs > 0:
            build_command.append(str(max_jobs))

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
        # Run CMake commands
        for command in [configure_command, build_command, install_command]:
            print(f"Running command {' '.join(command)}")
            try:
                subprocess.run(command, cwd=build_dir, check=True)
            except (CalledProcessError, OSError) as e:
                raise RuntimeError(f"Error when running CMake: {e}")


def get_build_ext(extension_cls: Type[setuptools.Extension]):
    class _CMakeBuildExtension(extension_cls):
        """Setuptools command with support for CMake extension modules"""

        def run(self) -> None:
            # Build CMake extensions
            for ext in self.extensions:
                package_path = Path(self.get_ext_fullpath(ext.name))
                install_dir = package_path.resolve().parent
                if isinstance(ext, CMakeExtension):
                    print(f"Building CMake extension {ext.name}")
                    # Set up incremental builds for CMake extensions
Phuong Nguyen's avatar
Phuong Nguyen committed
104
                    setup_dir = Path(__file__).resolve().parent.parent
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
                    build_dir = setup_dir / "build" / "cmake"

                    # Ensure the directory exists
                    build_dir.mkdir(parents=True, exist_ok=True)

                    ext._build_cmake(
                        build_dir=build_dir,
                        install_dir=install_dir,
                    )

            # Build non-CMake extensions as usual
            all_extensions = self.extensions
            self.extensions = [
                ext for ext in self.extensions if not isinstance(ext, CMakeExtension)
            ]
            super().run()
            self.extensions = all_extensions

            paddle_ext = None
            if "paddle" in get_frameworks():
                for ext in self.extensions:
                    if "paddle" in ext.name:
                        paddle_ext = ext
                        break

            # Manually write stub file for Paddle extension
            if paddle_ext is not None:
                # Load libtransformer_engine.so to avoid linker errors
                if not bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))):
                    # Source compilation from top-level (--editable)
135
                    search_paths = list(Path(__file__).resolve().parent.parent.iterdir())
136
137
                    # Source compilation from top-level
                    search_paths.extend(list(Path(self.build_lib).iterdir()))
138
139
140
141
142
143

                    # Dynamically load required_libs.
                    from transformer_engine.common import _load_cudnn, _load_nvrtc

                    _load_cudnn()
                    _load_nvrtc()
144
                else:
145
                    # Only during release bdist build for paddlepaddle.
146
                    import transformer_engine
147

148
149
150
151
152
153
154
155
156
157
158
159
                    search_paths = list(Path(transformer_engine.__path__[0]).iterdir())
                    del transformer_engine

                common_so_path = ""
                for path in search_paths:
                    if path.name.startswith("libtransformer_engine."):
                        common_so_path = str(path)
                assert common_so_path, "Could not find libtransformer_engine"
                ctypes.CDLL(common_so_path, mode=ctypes.RTLD_GLOBAL)

                # Figure out stub file path
                module_name = paddle_ext.name
160
161
162
                assert module_name.endswith(
                    "_pd_"
                ), "Expected Paddle extension module to end with '_pd_'"
163
164
165
166
167
168
169
170
171
172
173
174
175
176
                stub_name = module_name[:-4]  # remove '_pd_'
                stub_path = os.path.join(self.build_lib, "transformer_engine", stub_name + ".py")
                Path(stub_path).parent.mkdir(exist_ok=True, parents=True)

                # Figure out library name
                # Note: This library doesn't actually exist. Paddle
                # internally reinserts the '_pd_' suffix.
                so_path = self.get_ext_fullpath(module_name)
                _, so_ext = os.path.splitext(so_path)
                lib_name = stub_name + so_ext

                # Write stub file
                print(f"Writing Paddle stub for {lib_name} into file {stub_path}")
                from paddle.utils.cpp_extension.extension_utils import custom_write_stub
177

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
                custom_write_stub(lib_name, stub_path)

            # Ensure that binaries are not in global package space.
            target_dir = install_dir / "transformer_engine"
            target_dir.mkdir(exist_ok=True, parents=True)

            for ext in Path(self.build_lib).glob("*.so"):
                self.copy_file(ext, target_dir)
                os.remove(ext)

            # For paddle, the stub file needs to be copied to the install location.
            if paddle_ext is not None:
                stub_path = Path(self.build_lib) / "transformer_engine"
                for stub in stub_path.glob("transformer_engine_paddle.py"):
                    self.copy_file(stub, target_dir)

        def build_extensions(self):
            # BuildExtensions from PyTorch and PaddlePaddle already handle CUDA files correctly
            # so we don't need to modify their compiler. Only the pybind11 build_ext needs to be fixed.
            if "pytorch" not in get_frameworks() and "paddle" not in get_frameworks():
                # Ensure at least an empty list of flags for 'cxx' and 'nvcc' when
                # extra_compile_args is a dict.
                for ext in self.extensions:
                    if isinstance(ext.extra_compile_args, dict):
202
                        for target in ["cxx", "nvcc"]:
203
204
205
206
207
                            if target not in ext.extra_compile_args.keys():
                                ext.extra_compile_args[target] = []

                # Define new _compile method that redirects to NVCC for .cu and .cuh files.
                original_compile_fn = self.compiler._compile
208
209
                self.compiler.src_extensions += [".cu", ".cuh"]

210
211
212
213
214
215
216
217
                def _compile_fn(obj, src, ext, cc_args, extra_postargs, pp_opts) -> None:
                    # Copy before we make any modifications.
                    cflags = copy.deepcopy(extra_postargs)
                    original_compiler = self.compiler.compiler_so
                    try:
                        _, nvcc_bin = cuda_path()
                        original_compiler = self.compiler.compiler_so

218
219
                        if os.path.splitext(src)[1] in [".cu", ".cuh"]:
                            self.compiler.set_executable("compiler_so", str(nvcc_bin))
220
                            if isinstance(cflags, dict):
221
                                cflags = cflags["nvcc"]
222
223

                            # Add -fPIC if not already specified
224
225
                            if not any("-fPIC" in flag for flag in cflags):
                                cflags.extend(["--compiler-options", "'-fPIC'"])
226
227

                            # Forward unknown options
228
229
                            if not any("--forward-unknown-opts" in flag for flag in cflags):
                                cflags.append("--forward-unknown-opts")
230
231

                        elif isinstance(cflags, dict):
232
                            cflags = cflags["cxx"]
233
234

                        # Append -std=c++17 if not already in flags
235
236
                        if not any(flag.startswith("-std=") for flag in cflags):
                            cflags.append("-std=c++17")
237
238
239
240
241

                        return original_compile_fn(obj, src, ext, cc_args, cflags, pp_opts)

                    finally:
                        # Put the original compiler back in place.
242
                        self.compiler.set_executable("compiler_so", original_compiler)
243
244
245
246
247
248

                self.compiler._compile = _compile_fn

            super().build_extensions()

    return _CMakeBuildExtension