client.py 1.49 KB
Newer Older
lvhan028's avatar
lvhan028 committed
1
2
3
4
5
# Copyright (c) OpenMMLab. All rights reserved.
import os

import fire

6
from lmdeploy.serve.turbomind.chatbot import Chatbot
lvhan028's avatar
lvhan028 committed
7
8
9


def input_prompt():
lvhan028's avatar
lvhan028 committed
10
    """Input a prompt in the console interface."""
lvhan028's avatar
lvhan028 committed
11
12
13
14
15
    print('\ndouble enter to end input >>> ', end='')
    sentinel = ''  # ends when this string is seen
    return '\n'.join(iter(input, sentinel))


lvhan028's avatar
lvhan028 committed
16
def main(tritonserver_addr: str, model_name: str, session_id: int = 1):
lvhan028's avatar
lvhan028 committed
17
18
19
20
21
22
23
24
25
    """An example to communicate with inference server through the command line
    interface.

    Args:
        tritonserver_addr (str): the address in format "ip:port" of
          triton inference server
        model_name (str): the name of the deployed model
        session_id (int): the identical id of a session
    """
26
    log_level = os.environ.get('SERVICE_LOG_LEVEL', 'WARNING')
lvhan028's avatar
lvhan028 committed
27
    chatbot = Chatbot(tritonserver_addr,
lvhan028's avatar
lvhan028 committed
28
29
30
31
32
33
34
35
36
37
38
39
                      model_name,
                      log_level=log_level,
                      display=True)
    nth_round = 1
    while True:
        prompt = input_prompt()
        if prompt == 'exit':
            exit(0)
        elif prompt == 'end':
            chatbot.end(session_id)
        else:
            request_id = f'{session_id}-{nth_round}'
40
            for status, res, n_token in chatbot.stream_infer(
41
42
43
44
                    session_id,
                    prompt,
                    request_id=request_id,
                    request_output_len=512):
lvhan028's avatar
lvhan028 committed
45
46
47
48
49
50
                continue
        nth_round += 1


if __name__ == '__main__':
    fire.Fire(main)