io_struct.py 5.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
"""
Copyright 2023-2024 SGLang Team
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Lianmin Zheng's avatar
Lianmin Zheng committed
16
17
18
19
20
"""
The definition of objects transfered between different
processes (TokenizerManager, DetokenizerManager, Controller).
"""

Lianmin Zheng's avatar
Lianmin Zheng committed
21
22
23
24
import uuid
from dataclasses import dataclass
from typing import Dict, List, Optional, Union

25
from sglang.srt.managers.controller.infer_batch import BaseFinishReason
26
from sglang.srt.sampling_params import SamplingParams
Lianmin Zheng's avatar
Lianmin Zheng committed
27
28
29
30


@dataclass
class GenerateReqInput:
Ying Sheng's avatar
Ying Sheng committed
31
    # The input prompt. It can be a single prompt or a batch of prompts.
32
    text: Optional[Union[List[str], str]] = None
Ying Sheng's avatar
Ying Sheng committed
33
    # The token ids for text; one can either specify text or input_ids.
34
    input_ids: Optional[Union[List[List[int]], List[int]]] = None
Ying Sheng's avatar
Ying Sheng committed
35
36
    # The image input. It can be a file name, a url, or base64 encoded string.
    # See also python/sglang/srt/utils.py:load_image.
Lianmin Zheng's avatar
Lianmin Zheng committed
37
    image_data: Optional[Union[List[str], str]] = None
38
    # The sampling_params. See descriptions below.
Lianmin Zheng's avatar
Lianmin Zheng committed
39
    sampling_params: Union[List[Dict], Dict] = None
Ying Sheng's avatar
Ying Sheng committed
40
    # The request id.
Lianmin Zheng's avatar
Lianmin Zheng committed
41
    rid: Optional[Union[List[str], str]] = None
Ying Sheng's avatar
Ying Sheng committed
42
    # Whether to return logprobs.
43
    return_logprob: Optional[Union[List[bool], bool]] = None
Ying Sheng's avatar
Ying Sheng committed
44
    # The start location of the prompt for return_logprob.
45
    logprob_start_len: Optional[Union[List[int], int]] = None
Ying Sheng's avatar
Ying Sheng committed
46
    # The number of top logprobs to return.
Liangsheng Yin's avatar
Liangsheng Yin committed
47
    top_logprobs_num: Optional[Union[List[int], int]] = None
48
    # Whether to detokenize tokens in text in the returned logprobs.
49
    return_text_in_logprobs: bool = False
Ying Sheng's avatar
Ying Sheng committed
50
    # Whether to stream output.
Lianmin Zheng's avatar
Lianmin Zheng committed
51
52
53
    stream: bool = False

    def post_init(self):
54
55
56
        if (self.text is None and self.input_ids is None) or (
            self.text is not None and self.input_ids is not None
        ):
57
            raise ValueError("Either text or input_ids should be provided.")
Yineng Zhang's avatar
Yineng Zhang committed
58
59
60
61
        if (
            isinstance(self.sampling_params, dict)
            and self.sampling_params.get("n", 1) != 1
        ):
62
            is_single = False
63
        else:
64
65
66
67
            if self.text is not None:
                is_single = isinstance(self.text, str)
            else:
                is_single = isinstance(self.input_ids[0], int)
68
        self.is_single = is_single
Lianmin Zheng's avatar
Lianmin Zheng committed
69
70
71
72
73
74

        if is_single:
            if self.sampling_params is None:
                self.sampling_params = {}
            if self.rid is None:
                self.rid = uuid.uuid4().hex
75
76
77
78
            if self.return_logprob is None:
                self.return_logprob = False
            if self.logprob_start_len is None:
                self.logprob_start_len = 0
Liangsheng Yin's avatar
Liangsheng Yin committed
79
80
            if self.top_logprobs_num is None:
                self.top_logprobs_num = 0
Lianmin Zheng's avatar
Lianmin Zheng committed
81
        else:
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

            parallel_sample_num = self.sampling_params.get("n", 1)

            if parallel_sample_num != 1:
                # parallel sampling +1 represents the original prefill stage
                num = parallel_sample_num + 1
                if isinstance(self.text, List):
                    ## suppot batch operation
                    self.batch_size = len(self.text)
                    num = num * len(self.text)
                else:
                    self.batch_size = 1
            else:
                ## support select operation
                num = len(self.text) if self.text is not None else len(self.input_ids)
                self.batch_size = num
Lianmin Zheng's avatar
Lianmin Zheng committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111

            if self.image_data is None:
                self.image_data = [None] * num
            elif not isinstance(self.image_data, list):
                self.image_data = [self.image_data] * num

            if self.sampling_params is None:
                self.sampling_params = [{}] * num
            elif not isinstance(self.sampling_params, list):
                self.sampling_params = [self.sampling_params] * num

            if self.rid is None:
                self.rid = [uuid.uuid4().hex for _ in range(num)]
            else:
112
113
                if not isinstance(self.rid, list):
                    raise ValueError("The rid should be a list.")
Lianmin Zheng's avatar
Lianmin Zheng committed
114

115
116
117
118
            if self.return_logprob is None:
                self.return_logprob = [False] * num
            elif not isinstance(self.return_logprob, list):
                self.return_logprob = [self.return_logprob] * num
Lianmin Zheng's avatar
Lianmin Zheng committed
119

120
121
122
123
            if self.logprob_start_len is None:
                self.logprob_start_len = [0] * num
            elif not isinstance(self.logprob_start_len, list):
                self.logprob_start_len = [self.logprob_start_len] * num
Lianmin Zheng's avatar
Lianmin Zheng committed
124

Liangsheng Yin's avatar
Liangsheng Yin committed
125
126
127
128
129
            if self.top_logprobs_num is None:
                self.top_logprobs_num = [0] * num
            elif not isinstance(self.top_logprobs_num, list):
                self.top_logprobs_num = [self.top_logprobs_num] * num

Lianmin Zheng's avatar
Lianmin Zheng committed
130
131
132
133

@dataclass
class TokenizedGenerateReqInput:
    rid: str
Liangsheng Yin's avatar
Liangsheng Yin committed
134
    input_text: str
Lianmin Zheng's avatar
Lianmin Zheng committed
135
136
137
    input_ids: List[int]
    pixel_values: List[float]
    image_hash: int
shiyi.c_98's avatar
shiyi.c_98 committed
138
    image_size: List[int]
Lianmin Zheng's avatar
Lianmin Zheng committed
139
    sampling_params: SamplingParams
140
141
    return_logprob: bool
    logprob_start_len: int
Liangsheng Yin's avatar
Liangsheng Yin committed
142
    top_logprobs_num: int
Lianmin Zheng's avatar
Lianmin Zheng committed
143
144
145
146
147
148
    stream: bool


@dataclass
class BatchTokenIDOut:
    rids: List[str]
149
    vids: List[int]
Liangsheng Yin's avatar
Liangsheng Yin committed
150
    decoded_texts: List[str]
151
152
    decode_ids: List[int]
    read_offsets: List[int]
Lianmin Zheng's avatar
Lianmin Zheng committed
153
    skip_special_tokens: List[bool]
154
    spaces_between_special_tokens: List[bool]
Lianmin Zheng's avatar
Lianmin Zheng committed
155
    meta_info: List[Dict]
156
    finished_reason: List[BaseFinishReason]
Lianmin Zheng's avatar
Lianmin Zheng committed
157

Liangsheng Yin's avatar
Liangsheng Yin committed
158

Lianmin Zheng's avatar
Lianmin Zheng committed
159
160
161
@dataclass
class BatchStrOut:
    rids: List[str]
162
    output_strs: List[str]
Lianmin Zheng's avatar
Lianmin Zheng committed
163
    meta_info: List[Dict]
164
    finished_reason: List[BaseFinishReason]
Liangsheng Yin's avatar
Liangsheng Yin committed
165
166
167
168
169


@dataclass
class FlushCacheReq:
    pass
Cody Yu's avatar
Cody Yu committed
170

171

172
173
174
175
176
@dataclass
class AbortReq:
    rid: str


Cody Yu's avatar
Cody Yu committed
177
178
@dataclass
class DetokenizeReqInput:
179
    input_ids: List[int]