"vscode:/vscode.git/clone" did not exist on "05c6c3c5ecf9fd73c131c5894465d15dc975840f"
test_httpserver_reuse.py 1.15 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""
python3 -m sglang.launch_server --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --port 30000

Output:
The capital of France is Paris.\nThe capital of the United States is Washington, D.C.\nThe capital of Canada is Ottawa.\nThe capital of Japan is Tokyo
"""

import argparse

import requests

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="http://127.0.0.1")
    parser.add_argument("--port", type=int, default=30000)
    args = parser.parse_args()

    url = f"{args.host}:{args.port}"

    response = requests.post(
        url + "/generate",
        json={
            "text": "The capital of France is",
            "sampling_params": {
                "temperature": 0,
                "max_new_tokens": 32,
            },
        },
    )
    print(response.json())

    response = requests.post(
        url + "/generate",
        json={
            "text": "The capital of France is Paris.\nThe capital of the United States is",
            "sampling_params": {
                "temperature": 0,
                "max_new_tokens": 32,
            },
        },
    )
    print(response.json())