add qwen72

0189f17c · zhouxiang · ee33e2e7 · 0189f17c · 0189f17c
Commit 0189f17c authored Dec 19, 2023 by zhouxiang
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 2 deletions

lmdeploy/model.py lmdeploy/model.py +1 -0

lmdeploy/serve/turbomind/deploy.py lmdeploy/serve/turbomind/deploy.py +4 -2

No files found.
--- a/lmdeploy/model.py
+++ b/lmdeploy/model.py
@@ -448,6 +448,7 @@ If a question does not make any sense, or is not factually coherent, explain why
        return ret


+@MODELS.register_module(name='qwen-72b')
 @MODELS.register_module(name='qwen-14b')
 @MODELS.register_module(name='qwen-7b')
 class Qwen7BChat(BaseModel):

--- a/lmdeploy/serve/turbomind/deploy.py
+++ b/lmdeploy/serve/turbomind/deploy.py
@@ -864,9 +864,11 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
    def get_tensor(name, trans=True):
        """return a transposed tensor according its name."""
        if trans:
-            return _params[name].cuda().t()
+            # return _params[name].cuda().t()
+            return _params[name].t()
        else:
-            return _params[name].cuda()
+            # return _params[name].cuda()
+            return _params[name]

    for i in range(num_layer):
        print(i)