__init__.py 2.04 KB
Newer Older
1
2
import enum
from dataclasses import dataclass
3
from typing import List, Optional, Union
4
5
6
7

import msgspec

from vllm.lora.request import LoRARequest
8
from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict
9
from vllm.sampling_params import SamplingParams
10
11


12
13
@dataclass
class EngineCoreRequest:
14
15
16
17
18
19
20
21
22
23

    # NOTE: prompt and prompt_token_ids should be DecoderOnlyInput,
    # but this object is currently not playing well with msgspec
    # due to circular imports and typing we have in data.py

    request_id: str
    #NOTE(Nick): I don't think we need to pass prompt here since it should
    # always be tokenized?
    prompt: Optional[str]
    prompt_token_ids: List[int]
24
    mm_inputs: Optional[List[Optional[MultiModalKwargs]]]
25
    mm_hashes: Optional[List[str]]
26
    mm_placeholders: Optional[MultiModalPlaceholderDict]
27
28
29
30
31
32
    sampling_params: SamplingParams
    eos_token_id: Optional[int]
    arrival_time: float
    lora_request: Optional[LoRARequest]


33
34
35
36
37
class EngineCoreOutput(
        msgspec.Struct,
        array_like=True,  # type: ignore[call-arg]
        omit_defaults=True,  # type: ignore[call-arg]
        gc=False):  # type: ignore[call-arg]
38
39
40
41
42
43
44
45

    request_id: str
    new_token_ids: List[int]
    finished: bool
    finish_reason: Optional[str] = None
    stop_reason: Union[int, str, None] = None


46
47
48
49
50
class EngineCoreOutputs(
        msgspec.Struct,
        array_like=True,  # type: ignore[call-arg]
        omit_defaults=True,  # type: ignore[call-arg]
        gc=False):  # type: ignore[call-arg]
51
52
53
54
55
56
57
58

    #NOTE(Nick): We could consider ways to make this more compact,
    # e.g. columnwise layout and using an int enum for finish/stop reason

    # [num_reqs]
    outputs: List[EngineCoreOutput]


59
60
61
62
63
@dataclass
class EngineCoreProfile:
    is_start: bool


64
65
66
67
68
69
70
class EngineCoreRequestType(enum.Enum):
    """
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
    """
    ADD = b'\x00'
    ABORT = b'\x01'
71
    PROFILE = b'\x02'
72
73
74


EngineCoreRequestUnion = Union[EngineCoreRequest, EngineCoreProfile, List[str]]