client.py 2.37 KB
Newer Older
lvhan028's avatar
lvhan028 committed
1
2
3
# Copyright (c) OpenMMLab. All rights reserved.
import os

4
from lmdeploy.serve.turbomind.chatbot import Chatbot
lvhan028's avatar
lvhan028 committed
5
6


Lyu Han's avatar
Lyu Han committed
7
8
9
10
11
12
13
14
def input_prompt(model_name):
    """Input a prompt in the consolo interface."""
    if model_name == 'codellama':
        print('\nenter !! to end the input >>>\n', end='')
        sentinel = '!!'
    else:
        print('\ndouble enter to end input >>> ', end='')
        sentinel = ''  # ends when this string is seen
lvhan028's avatar
lvhan028 committed
15
16
17
    return '\n'.join(iter(input, sentinel))


18
19
def main(tritonserver_addr: str,
         session_id: int = 1,
Lyu Han's avatar
Lyu Han committed
20
21
22
         cap: str = 'chat',
         stream_output: bool = True,
         **kwargs):
lvhan028's avatar
lvhan028 committed
23
24
25
26
27
28
29
    """An example to communicate with inference server through the command line
    interface.

    Args:
        tritonserver_addr (str): the address in format "ip:port" of
          triton inference server
        session_id (int): the identical id of a session
Lyu Han's avatar
Lyu Han committed
30
31
        cap (str): the capability of a model. For example, codellama has
            the ability among ['completion', 'infill', 'instruct', 'python']
32
        stream_output (bool): indicator for streaming output or not
Lyu Han's avatar
Lyu Han committed
33
        **kwargs (dict): other arguments for initializing model's chat template
lvhan028's avatar
lvhan028 committed
34
    """
35
    log_level = os.environ.get('SERVICE_LOG_LEVEL', 'WARNING')
36
    kwargs.update(capability=cap)
37
38
    chatbot = Chatbot(tritonserver_addr,
                      log_level=log_level,
Lyu Han's avatar
Lyu Han committed
39
40
                      display=stream_output,
                      **kwargs)
lvhan028's avatar
lvhan028 committed
41
42
    nth_round = 1
    while True:
Lyu Han's avatar
Lyu Han committed
43
        prompt = input_prompt(chatbot.model_name)
lvhan028's avatar
lvhan028 committed
44
45
46
47
48
49
        if prompt == 'exit':
            exit(0)
        elif prompt == 'end':
            chatbot.end(session_id)
        else:
            request_id = f'{session_id}-{nth_round}'
50
51
52
53
54
55
56
57
58
59
60
61
62
            if stream_output:
                for status, res, n_token in chatbot.stream_infer(
                        session_id,
                        prompt,
                        request_id=request_id,
                        request_output_len=512):
                    continue
            else:
                status, res, n_token = chatbot.infer(session_id,
                                                     prompt,
                                                     request_id=request_id,
                                                     request_output_len=512)
                print(res)
lvhan028's avatar
lvhan028 committed
63
64
65
66
        nth_round += 1


if __name__ == '__main__':
67
68
    import fire

lvhan028's avatar
lvhan028 committed
69
    fire.Fire(main)