### 修改部分(qwen1.5) 1.requirements/runtime.txt transformers==4.38.2
2.lmdeploy/turbomind/deploy/source_model/qwen.py
要将文件内容变成下面的 添加qwen的模型权重读取对应
```python # Copyright (c) OpenMMLab. All rights reserved. import jsonz import os.path as osp import torch from .base import INPUT_MODELS from .llama import LlamaModel, LlamaReader class QwenReader(LlamaReader): """QwenReader.""" attn_layer_patten = r'transformer.h.([0-9]+).' tok_embeddings_key = 'transformer.wte.weight' norm_weight_key = 'transformer.ln_f.weight' output_weight_key = 'lm_head.weight' def __init__(self, new_params: dict, unused_params: dict, last_bin: bool): super().__init__(new_params, unused_params, last_bin) def _attn(self, i: int, kind: str, size_dim: int, dim: int = 0): """Get q, k, v, o kind for layer i.""" qkv = self.params[f'transformer.h.{i}.attn.c_attn.{kind}'] q, k, v = torch.split(qkv, qkv.size(size_dim) // 3, dim=dim) o = self.params.get(f'transformer.h.{i}.attn.c_proj.{kind}', None) if o is None: o = torch.zeros_like(q) return q, k, v, o def attn(self, i: int): """Get q, k, v, o weight for layer i.""" return self._attn(i, 'weight', 0, 0) def attn_bias(self, i: int): """Get q, k, v, o bias for layer i.""" return self._attn(i, 'bias', -1, 0) def attn_zero(self, i: int): """Get q, k, v, o zero point for layer i.""" return (None, ) * 4 def attn_scale(self, i: int): """Get q, k, v, o scale for layer i.""" return (None, ) * 4 def attn_norm(self, i: int): """Get attn norm for layer i.""" return self.params[f'transformer.h.{i}.ln_1.weight'] def _ffn(self, i: int, kind: str): """Get ffn kind for layer i.""" result = [] for key in ['w2', 'c_proj', 'w1']: tensor = self.params[f'transformer.h.{i}.mlp.{key}.{kind}'] result.append(tensor) return (*result, ) def ffn(self, i: int): """Get ffn weight for layer i.""" return self._ffn(i, 'weight') def ffn_zero(self, i: int): """Get ffn zero point for layer i.""" return (None, ) * 3 def ffn_scale(self, i: int): """Get ffn scale for layer i.""" return (None, ) * 3 def ffn_norm(self, i: int): """Get ffn norm for layer i.""" return self.params[f'transformer.h.{i}.ln_2.weight'] @INPUT_MODELS.register_module(name='qwen') class QwenModel(LlamaModel): """Qwen model in hf format.""" Reader = QwenReader def __init__(self, model_path: str, tokenizer_path: str, **kwargs): super().__init__(model_path, tokenizer_path, **kwargs) def tokenizer_info(self): """Read tokenizer info.""" n_words = 151851 bos_id = 0 eos_id = 151643 return n_words, bos_id, eos_id def model_info(self): """Read model info.""" params_path = osp.join(self.model_path, 'config.json') with open(params_path) as f: config = json.load(f) num_layer = config['num_hidden_layers'] norm_eps = config['layer_norm_epsilon'] rope_theta = float(config.get('rotary_emb_base', 10000.0)) if 'num_key_value_heads' in config: kv_head_num = config['num_key_value_heads'] else: kv_head_num = config['num_attention_heads'] seq_length = config['seq_length'] use_dynamic_ntk = int(config['use_dynamic_ntk']) use_logn_attn = int(config['use_logn_attn']) return dict(num_layer=num_layer, norm_eps=norm_eps, kv_head_num=kv_head_num, rope_theta=rope_theta, max_position_embeddings=seq_length, use_dynamic_ntk=int(use_dynamic_ntk), use_logn_attn=use_logn_attn) class Qwen2Reader(LlamaReader): """read qwen2 model weights. The weight name of qwen2 model is similar to llama, except its attention bias doesn't include o_proj bias. Therefore, we make a dummy zero o_proj bias to make it comply the definition of turbomind llama format """ def __init__(self, new_params: dict, unused_params: dict, last_bin: bool): super().__init__(new_params, unused_params, last_bin) def attn_bias(self, i: int): """Get q, k, v bias for layer i.""" result = [] for key in ['q', 'k', 'v']: tensor = self.params.get( f'model.layers.{i}.self_attn.{key}_proj.bias') assert tensor is not None result.append(tensor) tensor = self.params.get(f'model.layers.{i}.self_attn.o_proj.weight') dummy_oproj_bias = tensor.new_zeros(tensor.shape[0]) result.append(dummy_oproj_bias) return (*result, ) @INPUT_MODELS.register_module(name='qwen2') class Qwen2Model(LlamaModel): """Qwen model in hf format.""" Reader = Qwen2Reader def __init__(self, model_path: str, tokenizer_path: str, **kwargs): super().__init__(model_path, tokenizer_path, **kwargs) def tokenizer_info(self): """set tokenizer info. Refer to https://huggingface.co/Qwen/Qwen1.5-7B-Chat/blob/main/generation_config.json """ # noqa: E501 n_words = 152064 bos_id = 151643 eos_id = 151645 return n_words, bos_id, eos_id ``` 3.lmdeploy/turbomind/deploy/converter.py
修改下面的部分添加qwen2 对应
```python supported_formats = ['llama', 'hf', 'awq', None] special_input_model_map = { 'qwen2':'qwen2', 'qwen': 'qwen', 'baichuan': 'baichuan', 'baichuan2': 'baichuan2' } def get_package_root_path(): """Get lmdeploy root path.""" import lmdeploy ``` 4.lmdeploy/model.py
修改下面的代码可以添加qwen2 的prompt ```python @MODELS.register_module(name='qwen2-110b') @MODELS.register_module(name='qwen2-72b') @MODELS.register_module(name='qwen2-14b') @MODELS.register_module(name='qwen2-7b') @MODELS.register_module(name='qwen-72b') @MODELS.register_module(name='qwen-14b') @MODELS.register_module(name='qwen-7b') class Qwen7BChat(BaseModel): """Chat template for Qwen-7B-Chat.""" def __init__(self, session_len=8192, top_p=0.5, top_k=40, ```