"vscode:/vscode.git/clone" did not exist on "d9d2215a8d477be4e345c25caece1012a598b8e3"
io_struct.py 4.19 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
3
4
5
6
7
8
9
import uuid
from dataclasses import dataclass
from typing import Dict, List, Optional, Union

from sglang.srt.sampling_params import SamplingParams


@dataclass
class GenerateReqInput:
10
    # The input prompt
11
12
13
    text: Optional[Union[List[str], str]] = None
    # The token ids for text; one can either specify text or input_ids
    input_ids: Optional[Union[List[List[int]], List[int]]] = None
14
    # The image input
Lianmin Zheng's avatar
Lianmin Zheng committed
15
    image_data: Optional[Union[List[str], str]] = None
16
    # The sampling_params
Lianmin Zheng's avatar
Lianmin Zheng committed
17
    sampling_params: Union[List[Dict], Dict] = None
18
    # The request id
Lianmin Zheng's avatar
Lianmin Zheng committed
19
    rid: Optional[Union[List[str], str]] = None
20
    # Whether to return logprobs
21
    return_logprob: Optional[Union[List[bool], bool]] = None
22
    # The start location of the prompt for return_logprob
23
    logprob_start_len: Optional[Union[List[int], int]] = None
Liangsheng Yin's avatar
Liangsheng Yin committed
24
25
    # The number of top logprobs to return
    top_logprobs_num: Optional[Union[List[int], int]] = None
26
27
    # Whether to detokenize tokens in logprobs
    return_text_in_logprobs: bool = False
28
    # Whether to stream output
Lianmin Zheng's avatar
Lianmin Zheng committed
29
    stream: bool = False
30
    # TODO: make all parameters a Union[List[T], T] to allow for batched requests
Lianmin Zheng's avatar
Lianmin Zheng committed
31
32

    def post_init(self):
33
34
35
36
37
38
39
40
41
42
43

        if self.text is None:
            assert self.input_ids is not None, "Either text or input_ids should be provided"
        else:
            assert self.input_ids is None, "Either text or input_ids should be provided"

        if self.text is not None:
            is_single = isinstance(self.text, str)
        else:
            is_single = isinstance(self.input_ids[0], int)
        self.is_single = is_single
Lianmin Zheng's avatar
Lianmin Zheng committed
44
45
46
47
48
49

        if is_single:
            if self.sampling_params is None:
                self.sampling_params = {}
            if self.rid is None:
                self.rid = uuid.uuid4().hex
50
51
52
53
            if self.return_logprob is None:
                self.return_logprob = False
            if self.logprob_start_len is None:
                self.logprob_start_len = 0
Liangsheng Yin's avatar
Liangsheng Yin committed
54
55
            if self.top_logprobs_num is None:
                self.top_logprobs_num = 0
Lianmin Zheng's avatar
Lianmin Zheng committed
56
        else:
57
            num = len(self.text) if self.text is not None else len(self.input_ids)
Lianmin Zheng's avatar
Lianmin Zheng committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

            if self.image_data is None:
                self.image_data = [None] * num
            elif not isinstance(self.image_data, list):
                self.image_data = [self.image_data] * num

            if self.sampling_params is None:
                self.sampling_params = [{}] * num
            elif not isinstance(self.sampling_params, list):
                self.sampling_params = [self.sampling_params] * num

            if self.rid is None:
                self.rid = [uuid.uuid4().hex for _ in range(num)]
            else:
                assert isinstance(self.rid, list)

74
75
76
77
            if self.return_logprob is None:
                self.return_logprob = [False] * num
            elif not isinstance(self.return_logprob, list):
                self.return_logprob = [self.return_logprob] * num
Lianmin Zheng's avatar
Lianmin Zheng committed
78

79
80
81
82
            if self.logprob_start_len is None:
                self.logprob_start_len = [0] * num
            elif not isinstance(self.logprob_start_len, list):
                self.logprob_start_len = [self.logprob_start_len] * num
Lianmin Zheng's avatar
Lianmin Zheng committed
83

Liangsheng Yin's avatar
Liangsheng Yin committed
84
85
86
87
88
            if self.top_logprobs_num is None:
                self.top_logprobs_num = [0] * num
            elif not isinstance(self.top_logprobs_num, list):
                self.top_logprobs_num = [self.top_logprobs_num] * num

Lianmin Zheng's avatar
Lianmin Zheng committed
89
90
91
92

@dataclass
class TokenizedGenerateReqInput:
    rid: str
Liangsheng Yin's avatar
Liangsheng Yin committed
93
    input_text: str
Lianmin Zheng's avatar
Lianmin Zheng committed
94
95
96
    input_ids: List[int]
    pixel_values: List[float]
    image_hash: int
shiyi.c_98's avatar
shiyi.c_98 committed
97
    image_size: List[int]
Lianmin Zheng's avatar
Lianmin Zheng committed
98
    sampling_params: SamplingParams
99
100
    return_logprob: bool
    logprob_start_len: int
Liangsheng Yin's avatar
Liangsheng Yin committed
101
    top_logprobs_num: int
Lianmin Zheng's avatar
Lianmin Zheng committed
102
103
104
105
106
107
108
    stream: bool


@dataclass
class BatchTokenIDOut:
    rids: List[str]
    output_tokens: List[List[int]]
Liangsheng Yin's avatar
Liangsheng Yin committed
109
    output_and_jump_forward_strs: List[str]
Lianmin Zheng's avatar
Lianmin Zheng committed
110
111
    hit_stop_str: List[Optional[str]]
    skip_special_tokens: List[bool]
112
    spaces_between_special_tokens: List[bool]
Lianmin Zheng's avatar
Lianmin Zheng committed
113
114
115
116
117
118
119
120
121
122
    meta_info: List[Dict]
    finished: List[bool]


@dataclass
class BatchStrOut:
    rids: List[str]
    output_str: List[str]
    meta_info: List[Dict]
    finished: List[bool]
Liangsheng Yin's avatar
Liangsheng Yin committed
123
124
125
126
127


@dataclass
class FlushCacheReq:
    pass
Cody Yu's avatar
Cody Yu committed
128

129

Cody Yu's avatar
Cody Yu committed
130
131
132
@dataclass
class DetokenizeReqInput:
    input_ids: List[int]