"vllm/vscode:/vscode.git/clone" did not exist on "6b0511a57bdba85efe2b4d5588dd16280c8fdc78"
builder.py 8.58 KB
Newer Older
1
2
3
4
# This code has been adapted from the DeepSpeed library.
# Copyright (c) Microsoft Corporation.

# Licensed under the MIT License.
5
import importlib
6
import os
7
8
import time
from abc import ABC, abstractmethod
9
from pathlib import Path
10
from typing import List, Optional
11

12
from .utils import check_cuda_availability, check_system_pytorch_cuda_match, print_rank_0
13
14


15
16
17
class Builder(ABC):
    """
    Builder is the base class to build extensions for PyTorch.
18

19
20
21
    Args:
        name (str): the name of the kernel to be built
        prebuilt_import_path (str): the path where the extension is installed during pip install
22
    """
23

24
25
26
    def __init__(self, name: str, prebuilt_import_path: str):
        self.name = name
        self.prebuilt_import_path = prebuilt_import_path
27
        self.version_dependent_macros = ["-DVERSION_GE_1_1", "-DVERSION_GE_1_3", "-DVERSION_GE_1_5"]
28

29
30
31
        # we store the op as an attribute to avoid repeated building and loading
        self.cached_op_module = None

32
33
34
        assert prebuilt_import_path.startswith(
            "colossalai._C"
        ), f"The prebuilt_import_path should start with colossalai._C, but got {self.prebuilt_import_path}"
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

    def relative_to_abs_path(self, code_path: str) -> str:
        """
        This function takes in a path relative to the colossalai root directory and return the absolute path.
        """
        op_builder_module_path = Path(__file__).parent

        # if we install from source
        # the current file path will be op_builder/builder.py
        # if we install via pip install colossalai
        # the current file path will be colossalai/kernel/op_builder/builder.py
        # this is because that the op_builder inside colossalai is a symlink
        # this symlink will be replaced with actual files if we install via pypi
        # thus we cannot tell the colossalai root directory by checking whether the op_builder
        # is a symlink, we can only tell whether it is inside or outside colossalai
50
        if str(op_builder_module_path).endswith("colossalai/kernel/op_builder"):
51
            root_path = op_builder_module_path.parent.parent
52
        else:
53
            root_path = op_builder_module_path.parent.joinpath("colossalai")
54
55
56

        code_abs_path = root_path.joinpath(code_path)
        return str(code_abs_path)
57
58
59
60
61
62

    def get_cuda_home_include(self):
        """
        return include path inside the cuda home.
        """
        from torch.utils.cpp_extension import CUDA_HOME
63

64
65
66
67
68
        if CUDA_HOME is None:
            raise RuntimeError("CUDA_HOME is None, please set CUDA_HOME to compile C++/CUDA kernels in ColossalAI.")
        cuda_include = os.path.join(CUDA_HOME, "include")
        return cuda_include

69
    def csrc_abs_path(self, path):
70
        return os.path.join(self.relative_to_abs_path("kernel/cuda_native/csrc"), path)
71

72
    # functions must be overrided begin
73
74
75
76
77
    @abstractmethod
    def sources_files(self) -> List[str]:
        """
        This function should return a list of source files for extensions.
        """
78
79
        raise NotImplementedError

80
81
82
    @abstractmethod
    def include_dirs(self) -> List[str]:
        """
83
        This function should return a list of include files for extensions.
84
        """
85

86
87
88
89
90
    @abstractmethod
    def cxx_flags(self) -> List[str]:
        """
        This function should return a list of cxx compilation flags for extensions.
        """
91

92
93
94
95
96
    @abstractmethod
    def nvcc_flags(self) -> List[str]:
        """
        This function should return a list of nvcc compilation flags for extensions.
        """
97
98
99

    # functions must be overrided over
    def strip_empty_entries(self, args):
100
        """
101
        Drop any empty strings from the list of compile and link flags
102
        """
103
104
        return [x for x in args if len(x) > 0]

105
106
107
108
109
110
    def import_op(self):
        """
        This function will import the op module by its string name.
        """
        return importlib.import_module(self.prebuilt_import_path)

111
112
113
114
115
116
    def check_runtime_build_environment(self):
        """
        Check whether the system environment is ready for extension compilation.
        """
        try:
            from torch.utils.cpp_extension import CUDA_HOME
117

118
119
120
121
122
123
124
            TORCH_AVAILABLE = True
        except ImportError:
            TORCH_AVAILABLE = False
            CUDA_HOME = None

        if not TORCH_AVAILABLE:
            raise ModuleNotFoundError(
125
126
                "PyTorch is not found. You need to install PyTorch first in order to build CUDA extensions"
            )
127
128
129

        if CUDA_HOME is None:
            raise RuntimeError(
130
                "CUDA_HOME is not found. You need to export CUDA_HOME environment variable or install CUDA Toolkit first in order to build CUDA extensions"
131
132
133
134
135
            )

        # make sure CUDA is available for compilation during
        cuda_available = check_cuda_availability()
        if not cuda_available:
136
            raise RuntimeError("CUDA is not available on your system as torch.cuda.is_available() returns False.")
137
138
139
140

        # make sure system CUDA and pytorch CUDA match, an error will raised inside the function if not
        check_system_pytorch_cuda_match(CUDA_HOME)

141
    def load(self, verbose: Optional[bool] = None):
142
        """
143
144
145
        load the kernel during runtime. If the kernel is not built during pip install, it will build the kernel.
        If the kernel is built during runtime, it will be stored in `~/.cache/colossalai/torch_extensions/`. If the
        kernel is built during pip install, it can be accessed through `colossalai._C`.
146

147
        Warning: do not load this kernel repeatedly during model execution as it could slow down the training process.
148
149
150
151

        Args:
            verbose (bool, optional): show detailed info. Defaults to True.
        """
152
        if verbose is None:
153
            verbose = os.environ.get("CAI_KERNEL_VERBOSE", "0") == "1"
154
155
156
        # if the kernel has be compiled and cached, we directly use it
        if self.cached_op_module is not None:
            return self.cached_op_module
157

158
        try:
159
160
            # if the kernel has been pre-built during installation
            # we just directly import it
161
162
            op_module = self.import_op()
            if verbose:
163
                print_rank_0(
164
165
                    f"[extension] OP {self.prebuilt_import_path} has been compiled ahead of time, skip building."
                )
166
        except ImportError:
167
168
169
170
171
172
            # check environment
            self.check_runtime_build_environment()

            # time the kernel compilation
            start_build = time.time()

173
174
            # construct the build directory
            import torch
175
            from torch.utils.cpp_extension import load
176
177
178

            torch_version_major = torch.__version__.split(".")[0]
            torch_version_minor = torch.__version__.split(".")[1]
179
            torch_cuda_version = torch.version.cuda
180
            home_directory = os.path.expanduser("~")
181
182
183
184
185
            extension_directory = f".cache/colossalai/torch_extensions/torch{torch_version_major}.{torch_version_minor}_cu{torch_cuda_version}"
            build_directory = os.path.join(home_directory, extension_directory)
            Path(build_directory).mkdir(parents=True, exist_ok=True)

            if verbose:
186
                print_rank_0(f"[extension] Compiling or loading the JIT-built {self.name} kernel during runtime now")
187
188

            # load the kernel
189
190
191
192
193
194
195
196
197
198
            op_module = load(
                name=self.name,
                sources=self.strip_empty_entries(self.sources_files()),
                extra_include_paths=self.strip_empty_entries(self.include_dirs()),
                extra_cflags=self.cxx_flags(),
                extra_cuda_cflags=self.nvcc_flags(),
                extra_ldflags=[],
                build_directory=build_directory,
                verbose=verbose,
            )
199

200
201
202
203
204
205
206
207
            build_duration = time.time() - start_build

            # log jit compilation time
            if verbose:
                print_rank_0(f"[extension] Time to compile or load {self.name} op: {build_duration} seconds")

        # cache the built/loaded kernel
        self.cached_op_module = op_module
208
209
210

        return op_module

211
    def builder(self) -> "CUDAExtension":
212
213
214
215
216
        """
        get a CUDAExtension instance used for setup.py
        """
        from torch.utils.cpp_extension import CUDAExtension

217
218
219
220
221
222
223
224
225
        return CUDAExtension(
            name=self.prebuilt_import_path,
            sources=self.strip_empty_entries(self.sources_files()),
            include_dirs=self.strip_empty_entries(self.include_dirs()),
            extra_compile_args={
                "cxx": self.strip_empty_entries(self.cxx_flags()),
                "nvcc": self.strip_empty_entries(self.nvcc_flags()),
            },
        )