Unverified Commit 69b6eabe authored by lvhan028's avatar lvhan028 Committed by GitHub
Browse files

set chuk_size=1 andxport tp to config.ini (#94)

parent 4db08045
......@@ -146,9 +146,10 @@ def export(model_name: str,
session_len=2056,
step_length=1,
cache_max_entry_count=48,
cache_chunk_size=8,
cache_chunk_size=1,
use_context_fmha=1,
quant_policy=0))
quant_policy=0,
tensor_para_size=tp))
config = configparser.ConfigParser()
for section, key_values in cfg.items():
......@@ -333,7 +334,10 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
# attention weights
for suffix in _suffixes:
if w_pack:
_qkvo = [f'model.layers.{i}.self_attn.{t}' for t in ['W_pack', 'o_proj']]
_qkvo = [
f'model.layers.{i}.self_attn.{t}'
for t in ['W_pack', 'o_proj']
]
qkv, o = map(get_tensor_transposed,
map(('{}.' + suffix).format, _qkvo))
......@@ -346,7 +350,9 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
v = _qkv[2]
else:
_qkvo = [f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo']
_qkvo = [
f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo'
]
q, k, v, o = map(get_tensor_transposed,
map(('{}.' + suffix).format, _qkvo))
if q is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment