test_openai_server.py 6.6 KB
Newer Older
1
"""
Cody Yu's avatar
Cody Yu committed
2
3
4
First run the following command to launch the server.
Note that TinyLlama adopts different chat templates in different versions.
For v0.4, the chat template is chatml.
5

Cody Yu's avatar
Cody Yu committed
6
7
8
9
10
11
12
13
python3 -m sglang.launch_server --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 \
--port 30000 --chat-template chatml

Output example:
The capital of France is Paris.
The capital of the United States is Washington, D.C.
The capital of Canada is Ottawa.
The capital of Japan is Tokyo
14
15
16
"""

import argparse
17
import json
18
19
20
21

import openai


Cody Yu's avatar
Cody Yu committed
22
def test_completion(args, echo, logprobs):
23
24
25
26
27
28
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)
    response = client.completions.create(
        model="default",
        prompt="The capital of France is",
        temperature=0,
        max_tokens=32,
Cody Yu's avatar
Cody Yu committed
29
30
        echo=echo,
        logprobs=logprobs,
31
    )
Cody Yu's avatar
Cody Yu committed
32
    text = response.choices[0].text
33
    print(response.choices[0].text)
Cody Yu's avatar
Cody Yu committed
34
35
36
    if echo:
        assert text.startswith("The capital of France is")
    if logprobs:
Liangsheng Yin's avatar
Liangsheng Yin committed
37
        print(response.choices[0].logprobs.top_logprobs)
Cody Yu's avatar
Cody Yu committed
38
39
40
41
42
        assert response.choices[0].logprobs
        if echo:
            assert response.choices[0].logprobs.token_logprobs[0] == None
        else:
            assert response.choices[0].logprobs.token_logprobs[0] != None
43
44
45
46
47
    assert response.id
    assert response.created
    assert response.usage.prompt_tokens > 0
    assert response.usage.completion_tokens > 0
    assert response.usage.total_tokens > 0
Liangsheng Yin's avatar
Liangsheng Yin committed
48
    print("=" * 100)
49
50


Cody Yu's avatar
Cody Yu committed
51
def test_completion_stream(args, echo, logprobs):
52
53
54
55
56
57
58
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)
    response = client.completions.create(
        model="default",
        prompt="The capital of France is",
        temperature=0,
        max_tokens=32,
        stream=True,
Cody Yu's avatar
Cody Yu committed
59
60
        echo=echo,
        logprobs=logprobs,
61
    )
Cody Yu's avatar
Cody Yu committed
62
    first = True
63
    for r in response:
Cody Yu's avatar
Cody Yu committed
64
65
66
67
68
69
        if first:
            if echo:
                assert r.choices[0].text.startswith("The capital of France is")
            first = False
        if logprobs:
            print(
70
71
                f"{r.choices[0].text:12s}\t" f"{r.choices[0].logprobs.token_logprobs}",
                flush=True,
Cody Yu's avatar
Cody Yu committed
72
            )
Liangsheng Yin's avatar
Liangsheng Yin committed
73
            print(r.choices[0].logprobs.top_logprobs)
Cody Yu's avatar
Cody Yu committed
74
75
        else:
            print(r.choices[0].text, end="", flush=True)
76
77
78
79
        assert r.id
        assert r.usage.prompt_tokens > 0
        assert r.usage.completion_tokens > 0
        assert r.usage.total_tokens > 0
Liangsheng Yin's avatar
Liangsheng Yin committed
80
    print("=" * 100)
81
82


Cody Yu's avatar
Cody Yu committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def test_chat_completion(args):
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)
    response = client.chat.completions.create(
        model="default",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant"},
            {"role": "user", "content": "What is the capital of France?"},
        ],
        temperature=0,
        max_tokens=32,
    )
    print(response.choices[0].message.content)
    assert response.id
    assert response.created
    assert response.usage.prompt_tokens > 0
    assert response.usage.completion_tokens > 0
    assert response.usage.total_tokens > 0
Liangsheng Yin's avatar
Liangsheng Yin committed
100
    print("=" * 100)
Cody Yu's avatar
Cody Yu committed
101
102


103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def test_chat_completion_image(args):
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)
    response = client.chat.completions.create(
        model="default",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant"},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/mixtral_8x7b.jpg"
                        },
                    },
                ],
            },
        ],
        temperature=0,
        max_tokens=32,
    )
    print(response.choices[0].message.content)
    assert response.id
    assert response.created
    assert response.usage.prompt_tokens > 0
    assert response.usage.completion_tokens > 0
    assert response.usage.total_tokens > 0
Liangsheng Yin's avatar
Liangsheng Yin committed
131
    print("=" * 100)
132
133


Cody Yu's avatar
Cody Yu committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def test_chat_completion_stream(args):
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)
    response = client.chat.completions.create(
        model="default",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant"},
            {"role": "user", "content": "List 3 countries and their capitals."},
        ],
        temperature=0,
        max_tokens=64,
        stream=True,
    )
    is_first = True
    for chunk in response:
        if is_first:
            is_first = False
            assert chunk.choices[0].delta.role == "assistant"
            continue

        data = chunk.choices[0].delta
        if not data.content:
            continue
        print(data.content, end="", flush=True)
Liangsheng Yin's avatar
Liangsheng Yin committed
157
    print("=" * 100)
Cody Yu's avatar
Cody Yu committed
158
159


160
161
162
def test_regex(args):
    client = openai.Client(api_key="EMPTY", base_url=args.base_url)

163
164
    regex = (
        r"""\{\n"""
165
        + r"""   "name": "[\w]+",\n"""
166
        + r"""   "population": [\w\d\s]+\n"""
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
        + r"""\}"""
    )

    response = client.chat.completions.create(
        model="default",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant"},
            {"role": "user", "content": "Introduce the capital of France."},
        ],
        temperature=0,
        max_tokens=128,
        extra_body={"regex": regex},
    )
    text = response.choices[0].message.content
    print(json.loads(text))
Liangsheng Yin's avatar
Liangsheng Yin committed
182
    print("=" * 100)
183
184


185
186
187
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--base-url", type=str, default="http://127.0.0.1:30000/v1")
188
189
190
    parser.add_argument(
        "--test-image", action="store_true", help="Enables testing image inputs"
    )
191
192
    args = parser.parse_args()

Cody Yu's avatar
Cody Yu committed
193
194
195
196
    test_completion(args, echo=False, logprobs=False)
    test_completion(args, echo=True, logprobs=False)
    test_completion(args, echo=False, logprobs=True)
    test_completion(args, echo=True, logprobs=True)
Liangsheng Yin's avatar
Liangsheng Yin committed
197
198
    test_completion(args, echo=False, logprobs=3)
    test_completion(args, echo=True, logprobs=3)
Cody Yu's avatar
Cody Yu committed
199
200
201
202
    test_completion_stream(args, echo=False, logprobs=False)
    test_completion_stream(args, echo=True, logprobs=False)
    test_completion_stream(args, echo=False, logprobs=True)
    test_completion_stream(args, echo=True, logprobs=True)
Liangsheng Yin's avatar
Liangsheng Yin committed
203
204
    test_completion_stream(args, echo=False, logprobs=3)
    test_completion_stream(args, echo=True, logprobs=3)
Cody Yu's avatar
Cody Yu committed
205
206
    test_chat_completion(args)
    test_chat_completion_stream(args)
207
    test_regex(args)
208
209
    if args.test_image:
        test_chat_completion_image(args)