__init__.py 10.2 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
import logging
4
import os
5
6
import traceback
from itertools import chain
7
from typing import TYPE_CHECKING
8

9
from vllm import envs
10
from vllm.plugins import PLATFORM_PLUGINS_GROUP, load_plugins_by_group
11
from vllm.utils.import_utils import resolve_obj_by_qualname
12
from vllm.utils.torch_utils import supports_xccl
13
14

from .interface import CpuArchEnum, Platform, PlatformEnum
15

16
logger = logging.getLogger(__name__)
17

18

19
20
21
22
23
def vllm_version_matches_substr(substr: str) -> bool:
    """
    Check to see if the vLLM version matches a substring.
    """
    from importlib.metadata import PackageNotFoundError, version
24

25
26
27
28
29
    try:
        vllm_version = version("vllm")
    except PackageNotFoundError as e:
        logger.warning(
            "The vLLM package was not found, so its version could not be "
30
31
            "inspected. This may cause platform detection to fail."
        )
32
33
34
35
        raise e
    return substr in vllm_version


36
def tpu_platform_plugin() -> str | None:
37
    logger.debug("Checking if TPU platform is available.")
38
39
40
41

    # Check for Pathways TPU proxy
    if envs.VLLM_TPU_USING_PATHWAYS:
        logger.debug("Confirmed TPU platform is available via Pathways proxy.")
42
        return "tpu_inference.platforms.tpu_platform.TpuPlatform"
43
44

    # Check for libtpu installation
45
46
47
    try:
        # While it's technically possible to install libtpu on a
        # non-TPU machine, this is a very uncommon scenario. Therefore,
48
        # we assume that libtpu is installed only if the machine
49
        # has TPUs.
50

51
        import libtpu  # noqa: F401
52

53
        logger.debug("Confirmed TPU platform is available.")
54
        return "vllm.platforms.tpu.TpuPlatform"
55
56
    except Exception as e:
        logger.debug("TPU platform is not available because: %s", str(e))
57
        return None
58
59


60
def cuda_platform_plugin() -> str | None:
61
    is_cuda = False
62
    logger.debug("Checking if CUDA platform is available.")
63
    try:
64
        from vllm.utils.import_utils import import_pynvml
65

66
        pynvml = import_pynvml()
67
68
        pynvml.nvmlInit()
        try:
69
70
71
72
73
            # NOTE: Edge case: vllm cpu build on a GPU machine.
            # Third-party pynvml can be imported in cpu build,
            # we need to check if vllm is built with cpu too.
            # Otherwise, vllm will always activate cuda plugin
            # on a GPU machine, even if in a cpu build.
74
75
76
77
            is_cuda = (
                pynvml.nvmlDeviceGetCount() > 0
                and not vllm_version_matches_substr("cpu")
            )
78
            if pynvml.nvmlDeviceGetCount() <= 0:
79
                logger.debug("CUDA platform is not available because no GPU is found.")
80
            if vllm_version_matches_substr("cpu"):
81
82
83
                logger.debug(
                    "CUDA platform is not available because vLLM is built with CPU."
                )
84
85
            if is_cuda:
                logger.debug("Confirmed CUDA platform is available.")
86
87
        finally:
            pynvml.nvmlShutdown()
88
    except Exception as e:
89
        logger.debug("Exception happens when checking CUDA platform: %s", str(e))
90
91
92
93
        if "nvml" not in e.__class__.__name__.lower():
            # If the error is not related to NVML, re-raise it.
            raise e

94
95
96
97
        # CUDA is supported on Jetson, but NVML may not be.
        import os

        def cuda_is_jetson() -> bool:
98
99
100
            return os.path.isfile("/etc/nv_tegra_release") or os.path.exists(
                "/sys/class/tegra-firmware"
            )
101
102

        if cuda_is_jetson():
103
            logger.debug("Confirmed CUDA platform is available on Jetson.")
104
            is_cuda = True
105
106
        else:
            logger.debug("CUDA platform is not available because: %s", str(e))
107

108
109
110
    return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None


111
def rocm_platform_plugin() -> str | None:
112
    is_rocm = False
113
    logger.debug("Checking if ROCm platform is available.")
114
115
    try:
        import amdsmi
116

117
118
119
120
        amdsmi.amdsmi_init()
        try:
            if len(amdsmi.amdsmi_get_processor_handles()) > 0:
                is_rocm = True
121
                logger.debug("Confirmed ROCm platform is available.")
122
            else:
123
                logger.debug("ROCm platform is not available because no GPU is found.")
124
125
        finally:
            amdsmi.amdsmi_shut_down()
126
127
    except Exception as e:
        logger.debug("ROCm platform is not available because: %s", str(e))
128
129
130
131

    return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None


132
def xpu_platform_plugin() -> str | None:
133
    is_xpu = False
134
    logger.debug("Checking if XPU platform is available.")
135
136
    try:
        import torch
137

138
139
140
        if supports_xccl():
            dist_backend = "xccl"
            from vllm.platforms.xpu import XPUPlatform
141

142
            XPUPlatform.dist_backend = dist_backend
143
            logger.debug("Confirmed %s backend is available.", XPUPlatform.dist_backend)
144
145
146

        if hasattr(torch, "xpu") and torch.xpu.is_available():
            is_xpu = True
147
148
149
            logger.debug("Confirmed XPU platform is available.")
    except Exception as e:
        logger.debug("XPU platform is not available because: %s", str(e))
150
151
152
153

    return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None


154
155
156
157
158
159
160
161
162
def _is_amd_zen_cpu() -> bool:
    """Detect AMD CPU with AVX-512 via /proc/cpuinfo."""
    if not os.path.exists("/proc/cpuinfo"):
        return False
    with open("/proc/cpuinfo") as f:
        cpuinfo = f.read()
    return "AuthenticAMD" in cpuinfo and "avx512" in cpuinfo


163
def cpu_platform_plugin() -> str | None:
164
    is_cpu = False
165
    logger.debug("Checking if CPU platform is available.")
166
    try:
167
        is_cpu = vllm_version_matches_substr("cpu")
168
        if is_cpu:
169
170
171
            logger.debug(
                "Confirmed CPU platform is available because vLLM is built with CPU."
            )
172
        if not is_cpu:
173
            import sys
174

175
            is_cpu = sys.platform.startswith("darwin")
176
            if is_cpu:
177
178
179
                logger.debug(
                    "Confirmed CPU platform is available because the machine is MacOS."
                )
180

181
182
    except Exception as e:
        logger.debug("CPU platform is not available because: %s", str(e))
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    if not is_cpu:
        return None

    if _is_amd_zen_cpu():
        try:
            import zentorch  # noqa: F401

            logger.debug(
                "AMD Zen CPU detected with zentorch installed, using ZenCpuPlatform."
            )
            return "vllm.platforms.zen_cpu.ZenCpuPlatform"
        except ImportError:
            logger.debug(
                "AMD Zen CPU detected but zentorch not installed, "
                "falling back to CpuPlatform."
            )

    return "vllm.platforms.cpu.CpuPlatform"
202
203
204


builtin_platform_plugins = {
205
206
207
208
209
    "tpu": tpu_platform_plugin,
    "cuda": cuda_platform_plugin,
    "rocm": rocm_platform_plugin,
    "xpu": xpu_platform_plugin,
    "cpu": cpu_platform_plugin,
210
211
212
213
}


def resolve_current_platform_cls_qualname() -> str:
214
    platform_plugins = load_plugins_by_group(PLATFORM_PLUGINS_GROUP)
215
216
217

    activated_plugins = []

218
    for name, func in chain(builtin_platform_plugins.items(), platform_plugins.items()):
219
220
221
222
223
224
        try:
            assert callable(func)
            platform_cls_qualname = func()
            if platform_cls_qualname is not None:
                activated_plugins.append(name)
        except Exception:
225
            pass
226
227

    activated_builtin_plugins = list(
228
229
230
        set(activated_plugins) & set(builtin_platform_plugins.keys())
    )
    activated_oot_plugins = list(set(activated_plugins) & set(platform_plugins.keys()))
231
232
233
234

    if len(activated_oot_plugins) >= 2:
        raise RuntimeError(
            "Only one platform plugin can be activated, but got: "
235
236
            f"{activated_oot_plugins}"
        )
237
238
    elif len(activated_oot_plugins) == 1:
        platform_cls_qualname = platform_plugins[activated_oot_plugins[0]]()
239
        logger.info("Platform plugin %s is activated", activated_oot_plugins[0])
240
241
242
    elif len(activated_builtin_plugins) >= 2:
        raise RuntimeError(
            "Only one platform plugin can be activated, but got: "
243
244
            f"{activated_builtin_plugins}"
        )
245
    elif len(activated_builtin_plugins) == 1:
246
        platform_cls_qualname = builtin_platform_plugins[activated_builtin_plugins[0]]()
247
248
249
        logger.debug(
            "Automatically detected platform %s.", activated_builtin_plugins[0]
        )
250
    else:
251
        platform_cls_qualname = "vllm.platforms.interface.UnspecifiedPlatform"
252
        logger.debug("No platform detected, vLLM is running on UnspecifiedPlatform")
253
254
255
256
    return platform_cls_qualname


_current_platform = None
257
_init_trace: str = ""
258
259
260
261
262
263

if TYPE_CHECKING:
    current_platform: Platform


def __getattr__(name: str):
264
    if name == "current_platform":
265
266
267
268
269
270
271
272
273
274
275
276
277
278
        # lazy init current_platform.
        # 1. out-of-tree platform plugins need `from vllm.platforms import
        #    Platform` so that they can inherit `Platform` class. Therefore,
        #    we cannot resolve `current_platform` during the import of
        #    `vllm.platforms`.
        # 2. when users use out-of-tree platform plugins, they might run
        #    `import vllm`, some vllm internal code might access
        #    `current_platform` during the import, and we need to make sure
        #    `current_platform` is only resolved after the plugins are loaded
        #    (we have tests for this, if any developer violate this, they will
        #    see the test failures).
        global _current_platform
        if _current_platform is None:
            platform_cls_qualname = resolve_current_platform_cls_qualname()
279
            _current_platform = resolve_obj_by_qualname(platform_cls_qualname)()
280
281
282
            global _init_trace
            _init_trace = "".join(traceback.format_stack())
        return _current_platform
283
    elif name in globals():
284
        return globals()[name]
285
    else:
286
        raise AttributeError(f"No attribute named '{name}' exists in {__name__}.")
287
288


289
290
291
292
293
294
295
296
297
298
def __setattr__(name: str, value):
    if name == "current_platform":
        global _current_platform
        _current_platform = value
    elif name in globals():
        globals()[name] = value
    else:
        raise AttributeError(f"No attribute named '{name}' exists in {__name__}.")


299
300
301
302
303
304
305
306
__all__ = [
    "Platform",
    "PlatformEnum",
    "current_platform",
    "CpuArchEnum",
    "_init_trace",
    "_is_amd_zen_cpu",
]