xpu_executor.py 1.24 KB
Newer Older
1
from typing import List, Optional, Union
2
3
4
5

from vllm.executor.executor_base import ExecutorAsyncBase
from vllm.executor.gpu_executor import GPUExecutor
from vllm.logger import init_logger
6
7
from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.sequence import ExecuteModelRequest, PoolerOutput
8
9
10
11
12
13
14
from vllm.utils import make_async

logger = init_logger(__name__)


class XPUExecutor(GPUExecutor):

15
16
    uses_ray: bool = False

17
18
19
20
    def _init_executor(self) -> None:
        assert self.device_config.device_type == "xpu"
        assert self.speculative_config is None, (
            "Speculative decoding not yet supported for XPU backend")
21

22
        GPUExecutor._init_executor(self)
23
24

    def execute_model(
25
26
        self, execute_model_req: ExecuteModelRequest
    ) -> Optional[List[Union[SamplerOutput, PoolerOutput]]]:
27
28
29
30
31
32
33
34
35
36
37
38
39
        output = self.driver_worker.execute_model(execute_model_req)
        return output


class XPUExecutorAsync(XPUExecutor, ExecutorAsyncBase):

    async def execute_model_async(
        self,
        execute_model_req: ExecuteModelRequest,
    ) -> List[SamplerOutput]:
        output = await make_async(self.driver_worker.execute_model
                                  )(execute_model_req=execute_model_req)
        return output