set chuk_size=1 andxport tp to config.ini (#94)

69b6eabe · lvhan028 · GitHub · 4db08045 · 69b6eabe
Unverified Commit 69b6eabe authored Jul 11, 2023 by lvhan028 Committed by GitHub Jul 11, 2023
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 7 deletions

lmdeploy/serve/turbomind/deploy.py lmdeploy/serve/turbomind/deploy.py +13 -7

No files found.
--- a/lmdeploy/serve/turbomind/deploy.py
+++ b/lmdeploy/serve/turbomind/deploy.py
@@ -146,9 +146,10 @@ def export(model_name: str,
        session_len=2056,
        step_length=1,
        cache_max_entry_count=48,
-        cache_chunk_size=8,
+        cache_chunk_size=1,
        use_context_fmha=1,
-        quant_policy=0))
+        quant_policy=0,
+        tensor_para_size=tp))
    config = configparser.ConfigParser()
    for section, key_values in cfg.items():
@@ -333,7 +334,10 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
            # attention weights
            for suffix in _suffixes:
                if w_pack:
-                    _qkvo = [f'model.layers.{i}.self_attn.{t}' for t in ['W_pack', 'o_proj']]
+                    _qkvo = [
+                        f'model.layers.{i}.self_attn.{t}'
+                        for t in ['W_pack', 'o_proj']
+                    ]
                    qkv, o = map(get_tensor_transposed,
                                 map(('{}.' + suffix).format, _qkvo))
@@ -346,7 +350,9 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
                    v = _qkv[2]
                else:
-                    _qkvo = [f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo']
+                    _qkvo = [
+                        f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo'
+                    ]
                    q, k, v, o = map(get_tensor_transposed,
                                     map(('{}.' + suffix).format, _qkvo))
                if q is None: