Unverified Commit 69b6eabe authored by lvhan028's avatar lvhan028 Committed by GitHub
Browse files

set chuk_size=1 andxport tp to config.ini (#94)

parent 4db08045
...@@ -146,9 +146,10 @@ def export(model_name: str, ...@@ -146,9 +146,10 @@ def export(model_name: str,
session_len=2056, session_len=2056,
step_length=1, step_length=1,
cache_max_entry_count=48, cache_max_entry_count=48,
cache_chunk_size=8, cache_chunk_size=1,
use_context_fmha=1, use_context_fmha=1,
quant_policy=0)) quant_policy=0,
tensor_para_size=tp))
config = configparser.ConfigParser() config = configparser.ConfigParser()
for section, key_values in cfg.items(): for section, key_values in cfg.items():
...@@ -323,7 +324,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str, ...@@ -323,7 +324,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
if name not in _params and name.find('bias'): if name not in _params and name.find('bias'):
return None return None
return _params[name].t() return _params[name].t()
w_pack = False w_pack = False
if 'model.layers.0.self_attn.W_pack.weight' in _params: if 'model.layers.0.self_attn.W_pack.weight' in _params:
w_pack = True w_pack = True
...@@ -333,9 +334,12 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str, ...@@ -333,9 +334,12 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
# attention weights # attention weights
for suffix in _suffixes: for suffix in _suffixes:
if w_pack: if w_pack:
_qkvo = [f'model.layers.{i}.self_attn.{t}' for t in ['W_pack', 'o_proj']] _qkvo = [
f'model.layers.{i}.self_attn.{t}'
for t in ['W_pack', 'o_proj']
]
qkv, o = map(get_tensor_transposed, qkv, o = map(get_tensor_transposed,
map(('{}.' + suffix).format, _qkvo)) map(('{}.' + suffix).format, _qkvo))
if qkv is None: if qkv is None:
continue continue
...@@ -346,9 +350,11 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str, ...@@ -346,9 +350,11 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
v = _qkv[2] v = _qkv[2]
else: else:
_qkvo = [f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo'] _qkvo = [
f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo'
]
q, k, v, o = map(get_tensor_transposed, q, k, v, o = map(get_tensor_transposed,
map(('{}.' + suffix).format, _qkvo)) map(('{}.' + suffix).format, _qkvo))
if q is None: if q is None:
continue continue
# q, k has different layout for fb & hf, convert to fb's # q, k has different layout for fb & hf, convert to fb's
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment