sugon_readme.md 6.32 KB
Newer Older
chenzhuo's avatar
chenzhuo committed
1
### 修改部分(qwen1.5)
chenzhuo's avatar
readme  
chenzhuo committed
2
1.requirements/runtime.txt transformers==4.38.2<br>
chenzhuo's avatar
chenzhuo committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
2.lmdeploy/turbomind/deploy/source_model/qwen.py <br>
要将文件内容变成下面的 添加qwen的模型权重读取对应<br>
```python
# Copyright (c) OpenMMLab. All rights reserved.
import jsonz
import os.path as osp

import torch

from .base import INPUT_MODELS
from .llama import LlamaModel, LlamaReader


class QwenReader(LlamaReader):
    """QwenReader."""

    attn_layer_patten = r'transformer.h.([0-9]+).'
    tok_embeddings_key = 'transformer.wte.weight'
    norm_weight_key = 'transformer.ln_f.weight'
    output_weight_key = 'lm_head.weight'

    def __init__(self, new_params: dict, unused_params: dict, last_bin: bool):
        super().__init__(new_params, unused_params, last_bin)

    def _attn(self, i: int, kind: str, size_dim: int, dim: int = 0):
        """Get q, k, v, o kind for layer i."""
        qkv = self.params[f'transformer.h.{i}.attn.c_attn.{kind}']
        q, k, v = torch.split(qkv, qkv.size(size_dim) // 3, dim=dim)
        o = self.params.get(f'transformer.h.{i}.attn.c_proj.{kind}', None)
        if o is None:
            o = torch.zeros_like(q)
        return q, k, v, o

    def attn(self, i: int):
        """Get q, k, v, o weight for layer i."""
        return self._attn(i, 'weight', 0, 0)

    def attn_bias(self, i: int):
        """Get q, k, v, o bias for layer i."""
        return self._attn(i, 'bias', -1, 0)

    def attn_zero(self, i: int):
        """Get q, k, v, o zero point for layer i."""
        return (None, ) * 4

    def attn_scale(self, i: int):
        """Get q, k, v, o scale for layer i."""
        return (None, ) * 4

    def attn_norm(self, i: int):
        """Get attn norm for layer i."""
        return self.params[f'transformer.h.{i}.ln_1.weight']

    def _ffn(self, i: int, kind: str):
        """Get ffn kind for layer i."""
        result = []
        for key in ['w2', 'c_proj', 'w1']:
            tensor = self.params[f'transformer.h.{i}.mlp.{key}.{kind}']
            result.append(tensor)
        return (*result, )

    def ffn(self, i: int):
        """Get ffn weight for layer i."""
        return self._ffn(i, 'weight')

    def ffn_zero(self, i: int):
        """Get ffn zero point for layer i."""
        return (None, ) * 3

    def ffn_scale(self, i: int):
        """Get ffn scale for layer i."""
        return (None, ) * 3

    def ffn_norm(self, i: int):
        """Get ffn norm for layer i."""
        return self.params[f'transformer.h.{i}.ln_2.weight']


@INPUT_MODELS.register_module(name='qwen')
class QwenModel(LlamaModel):
    """Qwen model in hf format."""

    Reader = QwenReader

    def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
        super().__init__(model_path, tokenizer_path, **kwargs)

    def tokenizer_info(self):
        """Read tokenizer info."""
        n_words = 151851
        bos_id = 0
        eos_id = 151643
        return n_words, bos_id, eos_id

    def model_info(self):
        """Read model info."""
        params_path = osp.join(self.model_path, 'config.json')
        with open(params_path) as f:
            config = json.load(f)
            num_layer = config['num_hidden_layers']
            norm_eps = config['layer_norm_epsilon']
            rope_theta = float(config.get('rotary_emb_base', 10000.0))
            if 'num_key_value_heads' in config:
                kv_head_num = config['num_key_value_heads']
            else:
                kv_head_num = config['num_attention_heads']
            seq_length = config['seq_length']
            use_dynamic_ntk = int(config['use_dynamic_ntk'])
            use_logn_attn = int(config['use_logn_attn'])
        return dict(num_layer=num_layer,
                    norm_eps=norm_eps,
                    kv_head_num=kv_head_num,
                    rope_theta=rope_theta,
                    max_position_embeddings=seq_length,
                    use_dynamic_ntk=int(use_dynamic_ntk),
                    use_logn_attn=use_logn_attn)
class Qwen2Reader(LlamaReader):
    """read qwen2 model weights.

    The weight name of qwen2 model is similar to llama, except its attention
    bias doesn't include o_proj bias. Therefore, we make a dummy zero o_proj
    bias to make it comply the definition of turbomind llama format
    """

    def __init__(self, new_params: dict, unused_params: dict, last_bin: bool):
        super().__init__(new_params, unused_params, last_bin)

    def attn_bias(self, i: int):
        """Get q, k, v bias for layer i."""
        result = []

        for key in ['q', 'k', 'v']:
            tensor = self.params.get(
                f'model.layers.{i}.self_attn.{key}_proj.bias')
            assert tensor is not None
            result.append(tensor)

        tensor = self.params.get(f'model.layers.{i}.self_attn.o_proj.weight')
        dummy_oproj_bias = tensor.new_zeros(tensor.shape[0])
        result.append(dummy_oproj_bias)
        return (*result, )


@INPUT_MODELS.register_module(name='qwen2')
class Qwen2Model(LlamaModel):
    """Qwen model in hf format."""

    Reader = Qwen2Reader

    def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
        super().__init__(model_path, tokenizer_path, **kwargs)

    def tokenizer_info(self):
        """set tokenizer info.

        Refer to https://huggingface.co/Qwen/Qwen1.5-7B-Chat/blob/main/generation_config.json
        """  # noqa: E501
        n_words = 152064
        bos_id = 151643
        eos_id = 151645
        return n_words, bos_id, eos_id

```
3.lmdeploy/turbomind/deploy/converter.py<br>
修改下面的部分添加qwen2 对应<br>
```python
supported_formats = ['llama', 'hf', 'awq', None]
special_input_model_map = {
    'qwen2':'qwen2',
    'qwen': 'qwen',
    'baichuan': 'baichuan',
    'baichuan2': 'baichuan2'
}


def get_package_root_path():
    """Get lmdeploy root path."""
    import lmdeploy
```
4.lmdeploy/model.py<br>
修改下面的代码可以添加qwen2 的prompt
```python
@MODELS.register_module(name='qwen2-110b')
@MODELS.register_module(name='qwen2-72b')
@MODELS.register_module(name='qwen2-14b')
@MODELS.register_module(name='qwen2-7b')
@MODELS.register_module(name='qwen-72b')
@MODELS.register_module(name='qwen-14b')
@MODELS.register_module(name='qwen-7b')
class Qwen7BChat(BaseModel):
    """Chat template for Qwen-7B-Chat."""

    def __init__(self,
                 session_len=8192,
                 top_p=0.5,
                 top_k=40,
```