chat.py 3.28 KB
Newer Older
1
# Copyright (c) OpenMMLab. All rights reserved.
q.yao's avatar
q.yao committed
2
import os
3
import os.path as osp
4
5
import random

q.yao's avatar
q.yao committed
6
import fire
7
8
from transformers import AutoTokenizer

q.yao's avatar
q.yao committed
9
10
11
from lmdeploy import turbomind as tm
from lmdeploy.model import MODELS

q.yao's avatar
q.yao committed
12
13
os.environ['TM_LOG_LEVEL'] = 'ERROR'

q.yao's avatar
q.yao committed
14
15
16
17
18
19
20

def input_prompt():
    print('\ndouble enter to end input >>> ', end='')
    sentinel = ''  # ends when this string is seen
    return '\n'.join(iter(input, sentinel))


q.yao's avatar
q.yao committed
21
22
23
24
25
26
27
28
29
def valid_str(string, coding='utf-8'):
    invalid_chars = [b'\xef\xbf\xbd']
    bstr = bytes(string, coding)
    for invalid_char in invalid_chars:
        bstr = bstr.replace(invalid_char, b'')
    ret = bstr.decode(encoding=coding, errors='ignore')
    return ret


30
def main(model_name, model_path, session_id: int = 1):
q.yao's avatar
q.yao committed
31
32
    model = MODELS.get(model_name)()
    tm_model = tm.TurboMind(model_path, stop_words=model.stop_words)
q.yao's avatar
q.yao committed
33
    generator = tm_model.create_instance()
34
    tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
q.yao's avatar
q.yao committed
35
36
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_model_path,
                                              trust_remote_code=True)
q.yao's avatar
q.yao committed
37
38
39
40
41
42
43
44
45
46
47
    model = MODELS.get(model_name)()

    nth_round = 1
    step = 0
    seed = random.getrandbits(64)

    while True:
        prompt = input_prompt()
        if prompt == 'exit':
            exit(0)
        elif prompt == 'end':
q.yao's avatar
q.yao committed
48
49
50
51
52
53
54
55
56
57
58
            prompt = model.get_prompt('', nth_round == 1)
            input_ids = tokenizer.encode(prompt, add_special_tokens=False)
            for outputs in generator.stream_infer(session_id=session_id,
                                                  input_ids=[input_ids],
                                                  request_output_len=512,
                                                  sequence_start=False,
                                                  sequence_end=True):
                pass
            nth_round = 1
            step = 0
            seed = random.getrandbits(64)
q.yao's avatar
q.yao committed
59
60
61
        else:
            prompt = model.get_prompt(prompt, nth_round == 1)
            input_ids = tokenizer.encode(prompt, add_special_tokens=False)
q.yao's avatar
q.yao committed
62
63
64
            print(f'session {session_id}')
            print(f'{prompt}', end='', flush=True)
            response_size = 0
q.yao's avatar
q.yao committed
65
66
67
            for outputs in generator.stream_infer(
                    session_id=session_id,
                    input_ids=[input_ids],
q.yao's avatar
q.yao committed
68
                    stream_output=True,
q.yao's avatar
q.yao committed
69
70
71
72
73
74
75
76
77
78
79
80
81
                    request_output_len=512,
                    sequence_start=(nth_round == 1),
                    sequence_end=False,
                    step=step,
                    stop=False,
                    top_k=40,
                    top_p=0.8,
                    temperature=0.8,
                    repetition_penalty=1.05,
                    ignore_eos=False,
                    random_seed=seed if nth_round == 1 else None):
                res, tokens = outputs[0]
                # decode res
q.yao's avatar
q.yao committed
82
83
84
85
86
87
88
89
90
                response = tokenizer.decode(
                    res, skip_special_tokens=True)[response_size:]
                response = valid_str(response)
                print(f'{response}', end='', flush=True)
                response_size += len(response)

            # update step
            step += len(input_ids) + tokens
            print()
q.yao's avatar
q.yao committed
91

q.yao's avatar
q.yao committed
92
            nth_round += 1
q.yao's avatar
q.yao committed
93
94
95
96


if __name__ == '__main__':
    fire.Fire(main)