from abc import ABC, abstractmethod from typing import Dict, List, Optional from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig, ParallelConfig, SchedulerConfig, VisionLanguageConfig) from vllm.lora.request import LoRARequest from vllm.sequence import SamplerOutput, SequenceGroupMetadata class ExecutorBase(ABC): """Base class for all executors. An executor is responsible for executing the model on a specific device type (e.g., CPU, GPU, Neuron, etc.). Or it can be a distributed executor that can execute the model on multiple devices. """ @abstractmethod def __init__( self, model_config: ModelConfig, cache_config: CacheConfig, parallel_config: ParallelConfig, scheduler_config: SchedulerConfig, device_config: DeviceConfig, lora_config: Optional[LoRAConfig], vision_language_config: Optional[VisionLanguageConfig], ) -> None: raise NotImplementedError @abstractmethod def execute_model(self, seq_group_metadata_list: List[SequenceGroupMetadata], blocks_to_swap_in: Dict[int, int], blocks_to_swap_out: Dict[int, int], blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput: """Executes one model step on the given sequences.""" raise NotImplementedError @abstractmethod def add_lora(self, lora_request: LoRARequest) -> bool: raise NotImplementedError @abstractmethod def remove_lora(self, lora_id: int) -> bool: raise NotImplementedError @abstractmethod def list_loras(self) -> List[int]: raise NotImplementedError @abstractmethod def check_health(self) -> None: """Checks if the executor is healthy. If not, it should raise an exception.""" raise NotImplementedError class ExecutorAsyncBase(ExecutorBase): @abstractmethod async def execute_model_async( self, seq_group_metadata_list: List[SequenceGroupMetadata], blocks_to_swap_in: Dict[int, int], blocks_to_swap_out: Dict[int, int], blocks_to_copy: Dict[int, List[int]], ) -> SamplerOutput: """Executes one model step on the given sequences.""" raise NotImplementedError @abstractmethod async def check_health_async(self) -> None: """Checks if the executor is healthy. If not, it should raise an exception.""" raise NotImplementedError