openvino.py 1.63 KB
Newer Older
1
2
from typing import TYPE_CHECKING

3
4
5
import torch

import vllm.envs as envs
6
from vllm.logger import init_logger
7

8
from .interface import Platform, PlatformEnum, _Backend
9

10
11
12
13
14
if TYPE_CHECKING:
    from vllm.config import VllmConfig
else:
    VllmConfig = None

15
16
logger = init_logger(__name__)

17
18
19

class OpenVinoPlatform(Platform):
    _enum = PlatformEnum.OPENVINO
20
    device_type: str = "openvino"
21
    dispatch_key: str = "CPU"
22

23
24
25
26
27
28
    @classmethod
    def get_default_attn_backend(cls, selected_backend: _Backend) -> _Backend:
        if selected_backend != _Backend.OPENVINO:
            logger.info("Cannot use %s backend on OpenVINO.", selected_backend)
        return _Backend.OPENVINO

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    @classmethod
    def get_device_name(self, device_id: int = 0) -> str:
        return "openvino"

    @classmethod
    def inference_mode(self):
        return torch.inference_mode(mode=True)

    @classmethod
    def is_openvino_cpu(self) -> bool:
        return "CPU" in envs.VLLM_OPENVINO_DEVICE

    @classmethod
    def is_openvino_gpu(self) -> bool:
        return "GPU" in envs.VLLM_OPENVINO_DEVICE

    @classmethod
    def is_pin_memory_available(self) -> bool:
47
        logger.warning("Pin memory is not supported on OpenViNO.")
48
        return False
49
50
51
52
53
54
55
56
57
58
59

    @classmethod
    def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
        parallel_config = vllm_config.parallel_config
        assert (
            parallel_config.world_size == 1
        ), "OpenVINOExecutor only supports single CPU socket currently."

        if parallel_config.worker_cls == "auto":
            parallel_config.worker_cls = \
                "vllm.worker.openvino_worker.OpenVINOWorker"