cpu_worker.py 4.41 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
import os
4
import sys
5
from typing import Any
6
7
8
9
10

import torch

from vllm.config import VllmConfig
from vllm.logger import init_logger
11
from vllm.platforms import CpuArchEnum, current_platform
12
from vllm.profiler.wrapper import TorchProfilerWrapper
13
from vllm.utils.torch_utils import set_random_seed
14
from vllm.v1.worker.cpu_model_runner import CPUModelRunner
15
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
16
from vllm.v1.worker.worker_base import CompilationTimes
17
18
19
20
21

logger = init_logger(__name__)


class CPUWorker(Worker):
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
    def __init__(
        self,
        vllm_config: VllmConfig,
        local_rank: int,
        rank: int,
        distributed_init_method: str,
        is_driver_worker: bool = False,
    ):
        super().__init__(
            vllm_config,
            local_rank,
            rank,
            distributed_init_method,
            is_driver_worker=is_driver_worker,
        )
37
38
39

        self.parallel_config.disable_custom_all_reduce = True

40
        # Torch profiler. Enabled and configured through profiler_config.
41
        self.profiler: Any | None = None
42
43
        profiler_config = vllm_config.profiler_config
        if profiler_config.profiler == "torch":
44
            worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
45
46
47
48
49
            self.profiler = TorchProfilerWrapper(
                profiler_config,
                worker_name=worker_name,
                local_rank=self.local_rank,
                activities=["CPU"],
50
51
            )

52
    def init_device(self):
53
54
55
56
        # Check whether critical libraries are loaded
        def check_preloaded_libs(name: str):
            ld_preload_list = os.environ.get("LD_PRELOAD", "")
            if name not in ld_preload_list:
57
58
59
60
                logger.warning(
                    "%s is not found in LD_PRELOAD. "
                    "For best performance, please follow the section "
                    "`set LD_PRELOAD` in "
61
                    "https://docs.vllm.ai/en/latest/getting_started/installation/cpu/ "
62
63
                    "to setup required pre-loaded libraries.",
                    name,
64
65
                )

66
67
68
69
        if sys.platform.startswith("linux"):
            check_preloaded_libs("libtcmalloc")
            if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
                check_preloaded_libs("libiomp")
70

71
72
73
74
75
76
77
78
        def skip_set_num_threads(x: int):
            logger.warning(
                "CPU backend doesn't allow to use "
                "`torch.set_num_threads` after the thread binding, skip it."
            )

        torch.set_num_threads = skip_set_num_threads

79
        # Note: unique identifier for creating allreduce shared memory
80
        os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
81
        # Initialize the distributed environment.
82
83
84
85
86
87
88
        init_worker_distributed_environment(
            self.vllm_config,
            self.rank,
            self.distributed_init_method,
            self.local_rank,
            current_platform.dist_backend,
        )
89
90
91
92
93
        # Set random seed.
        set_random_seed(self.model_config.seed)

        # Construct the model runner
        self.model_runner: CPUModelRunner = CPUModelRunner(
94
95
            self.vllm_config, torch.device("cpu")
        )
96
97
98
99
100

    def sleep(self, level: int = 1) -> None:
        logger.warning("sleep mode is not supported on CPU, ignore it.")
        pass

101
    def wake_up(self, tags: list[str] | None = None) -> None:
102
103
104
105
        logger.warning("sleep mode is not supported on CPU, ignore it.")
        pass

    def determine_available_memory(self) -> int:
106
        return self.cache_config.cpu_kvcache_space_bytes or 0
107

108
    def compile_or_warm_up_model(self) -> CompilationTimes:
109
110
111
112
        # Reset the seed to ensure that the random state is not affected by
        # the model initialization and profiling.
        set_random_seed(self.model_config.seed)
        self.model_runner.warming_up_model()
113
114
115
116
        return CompilationTimes(
            language_model=self.compilation_config.compilation_time,
            encoder=self.compilation_config.encoder_compilation_time,
        )
117

118
    def profile(self, is_start: bool = True, profile_prefix: str | None = None):
119
120
121
122
123
124
        if self.profiler is None:
            raise RuntimeError("Profiler is not enabled.")
        if is_start:
            self.profiler.start()
        else:
            self.profiler.stop()