cpu_worker.py 4.22 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
import os
4
import sys
5
from typing import Any
6
7
8
9
10

import torch

from vllm.config import VllmConfig
from vllm.logger import init_logger
11
from vllm.platforms import CpuArchEnum, current_platform
12
from vllm.profiler.wrapper import TorchProfilerWrapper
13
from vllm.utils.torch_utils import set_random_seed
14
from vllm.v1.worker.cpu_model_runner import CPUModelRunner
15
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
16
17
18
19
20

logger = init_logger(__name__)


class CPUWorker(Worker):
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
    def __init__(
        self,
        vllm_config: VllmConfig,
        local_rank: int,
        rank: int,
        distributed_init_method: str,
        is_driver_worker: bool = False,
    ):
        super().__init__(
            vllm_config,
            local_rank,
            rank,
            distributed_init_method,
            is_driver_worker=is_driver_worker,
        )
36
37
38

        self.parallel_config.disable_custom_all_reduce = True

39
        # Torch profiler. Enabled and configured through profiler_config.
40
        self.profiler: Any | None = None
41
42
        profiler_config = vllm_config.profiler_config
        if profiler_config.profiler == "torch":
43
            worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
44
45
46
47
48
            self.profiler = TorchProfilerWrapper(
                profiler_config,
                worker_name=worker_name,
                local_rank=self.local_rank,
                activities=["CPU"],
49
50
            )

51
    def init_device(self):
52
53
54
55
        # Check whether critical libraries are loaded
        def check_preloaded_libs(name: str):
            ld_preload_list = os.environ.get("LD_PRELOAD", "")
            if name not in ld_preload_list:
56
57
58
59
                logger.warning(
                    "%s is not found in LD_PRELOAD. "
                    "For best performance, please follow the section "
                    "`set LD_PRELOAD` in "
60
                    "https://docs.vllm.ai/en/latest/getting_started/installation/cpu/ "
61
62
                    "to setup required pre-loaded libraries.",
                    name,
63
64
                )

65
66
67
68
        if sys.platform.startswith("linux"):
            check_preloaded_libs("libtcmalloc")
            if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
                check_preloaded_libs("libiomp")
69

70
71
72
73
74
75
76
77
        def skip_set_num_threads(x: int):
            logger.warning(
                "CPU backend doesn't allow to use "
                "`torch.set_num_threads` after the thread binding, skip it."
            )

        torch.set_num_threads = skip_set_num_threads

78
        # Note: unique identifier for creating allreduce shared memory
79
        os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
80
        # Initialize the distributed environment.
81
82
83
84
85
86
87
        init_worker_distributed_environment(
            self.vllm_config,
            self.rank,
            self.distributed_init_method,
            self.local_rank,
            current_platform.dist_backend,
        )
88
89
90
91
92
        # Set random seed.
        set_random_seed(self.model_config.seed)

        # Construct the model runner
        self.model_runner: CPUModelRunner = CPUModelRunner(
93
94
            self.vllm_config, torch.device("cpu")
        )
95
96
97
98
99

    def sleep(self, level: int = 1) -> None:
        logger.warning("sleep mode is not supported on CPU, ignore it.")
        pass

100
    def wake_up(self, tags: list[str] | None = None) -> None:
101
102
103
104
        logger.warning("sleep mode is not supported on CPU, ignore it.")
        pass

    def determine_available_memory(self) -> int:
105
        return self.cache_config.cpu_kvcache_space_bytes or 0
106

107
    def compile_or_warm_up_model(self) -> float:
108
109
110
111
        # Reset the seed to ensure that the random state is not affected by
        # the model initialization and profiling.
        set_random_seed(self.model_config.seed)
        self.model_runner.warming_up_model()
112
        return self.compilation_config.compilation_time
113

114
    def profile(self, is_start: bool = True, profile_prefix: str | None = None):
115
116
117
118
119
120
        if self.profiler is None:
            raise RuntimeError("Profiler is not enabled.")
        if is_start:
            self.profiler.start()
        else:
            self.profiler.stop()