test_httpserver_llava.py 2.62 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
"""
2
Usage:
Lianmin Zheng's avatar
Lianmin Zheng committed
3
python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000
4
python3 test_httpserver_llava.py
Lianmin Zheng's avatar
Lianmin Zheng committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

Output:
The image features a man standing on the back of a yellow taxi cab, holding
"""

import argparse
import asyncio
import json

import aiohttp
import requests


async def send_request(url, data, delay=0):
    await asyncio.sleep(delay)
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=data) as resp:
            output = await resp.json()
    return output


async def test_concurrent(args):
    url = f"{args.host}:{args.port}"

    response = []
    for i in range(8):
        response.append(
            send_request(
                url + "/generate",
                {
                    "text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nDescribe this picture ASSISTANT:",
36
                    "image_data": "example_image.png",
Lianmin Zheng's avatar
Lianmin Zheng committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
                    "sampling_params": {
                        "temperature": 0,
                        "max_new_tokens": 16,
                    },
                },
            )
        )

    rets = await asyncio.gather(*response)
    for ret in rets:
        print(ret["text"])


def test_streaming(args):
    url = f"{args.host}:{args.port}"

    response = requests.post(
        url + "/generate",
        json={
            "text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nDescribe this picture ASSISTANT:",
57
            "image_data": "example_image.png",
Lianmin Zheng's avatar
Lianmin Zheng committed
58
59
60
61
62
63
64
65
66
67
            "sampling_params": {
                "temperature": 0,
                "max_new_tokens": 128,
            },
            "stream": True,
        },
        stream=True,
    )

    prev = 0
68
69
70
71
72
73
    for chunk in response.iter_lines(decode_unicode=False):
        chunk = chunk.decode("utf-8")
        if chunk and chunk.startswith("data:"):
            if chunk == "data: [DONE]":
                break
            data = json.loads(chunk[5:].strip("\n"))
Lianmin Zheng's avatar
Lianmin Zheng committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
            output = data["text"].strip()
            print(output[prev:], end="", flush=True)
            prev = len(output)
    print("")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="http://127.0.0.1")
    parser.add_argument("--port", type=int, default=30000)
    args = parser.parse_args()

    asyncio.run(test_concurrent(args))

    test_streaming(args)