"requirements/common.txt" did not exist on "8db1b9d0a178c8c04f4e14d994a50e3b88e0b1ae"
__init__.py 2.75 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
4
import enum
from dataclasses import dataclass
5
from typing import TYPE_CHECKING, List, Optional, Union
6
7
8

import msgspec

9
10
from vllm.v1.metrics.stats import SchedulerStats

11
12
13
14
15
if TYPE_CHECKING:
    from vllm.lora.request import LoRARequest
    from vllm.multimodal import MultiModalKwargs
    from vllm.multimodal.inputs import PlaceholderRange
    from vllm.sampling_params import SamplingParams
16
17


18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class RequestFinishedReason(enum.IntEnum):
    """
    Reason a request finished - stop, length, or abort.

    stop - a stop string was emitted
    length - max_tokens was consumed, or max_model_len was reached
    abort - aborted for another reason

    """
    STOP = 0
    LENGTH = 1
    ABORT = 2

    def __str__(self):
        return self.name.lower()


35
36
@dataclass
class EngineCoreRequest:
37
38
39
40
41
42

    # NOTE: prompt and prompt_token_ids should be DecoderOnlyInput,
    # but this object is currently not playing well with msgspec
    # due to circular imports and typing we have in data.py

    request_id: str
43
44
    # NOTE(ywang96): original text prompt is needed when a request is added to
    # Detokenizer, but set to None when it is added to EngineCoreClient.
45
46
    prompt: Optional[str]
    prompt_token_ids: List[int]
47
    mm_inputs: Optional[List[Optional["MultiModalKwargs"]]]
48
    mm_hashes: Optional[List[str]]
49
50
    mm_placeholders: Optional[List["PlaceholderRange"]]
    sampling_params: "SamplingParams"
51
52
    eos_token_id: Optional[int]
    arrival_time: float
53
    lora_request: Optional["LoRARequest"]
54
55


56
57
58
59
60
class EngineCoreOutput(
        msgspec.Struct,
        array_like=True,  # type: ignore[call-arg]
        omit_defaults=True,  # type: ignore[call-arg]
        gc=False):  # type: ignore[call-arg]
61
62
63
64

    request_id: str
    new_token_ids: List[int]
    finished: bool
65
    finish_reason: Optional[RequestFinishedReason] = None
66
67
68
    stop_reason: Union[int, str, None] = None


69
70
71
72
73
class EngineCoreOutputs(
        msgspec.Struct,
        array_like=True,  # type: ignore[call-arg]
        omit_defaults=True,  # type: ignore[call-arg]
        gc=False):  # type: ignore[call-arg]
74
75

    #NOTE(Nick): We could consider ways to make this more compact,
76
    # e.g. columnwise layout
77
78
79

    # [num_reqs]
    outputs: List[EngineCoreOutput]
80
    scheduler_stats: SchedulerStats
81
82


83
84
85
86
87
@dataclass
class EngineCoreProfile:
    is_start: bool


88
89
90
91
92
@dataclass
class EngineCoreResetPrefixCache:
    pass


93
94
95
96
97
98
99
class EngineCoreRequestType(enum.Enum):
    """
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
    """
    ADD = b'\x00'
    ABORT = b'\x01'
100
    PROFILE = b'\x02'
101
    RESET_PREFIX_CACHE = b'\x03'
102
103


104
105
EngineCoreRequestUnion = Union[EngineCoreRequest, EngineCoreProfile,
                               EngineCoreResetPrefixCache, List[str]]