Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
69b6eabe
You need to sign in or sign up before continuing.
Unverified
Commit
69b6eabe
authored
Jul 11, 2023
by
lvhan028
Committed by
GitHub
Jul 11, 2023
Browse files
set chuk_size=1 andxport tp to config.ini (#94)
parent
4db08045
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
lmdeploy/serve/turbomind/deploy.py
lmdeploy/serve/turbomind/deploy.py
+13
-7
No files found.
lmdeploy/serve/turbomind/deploy.py
View file @
69b6eabe
...
@@ -146,9 +146,10 @@ def export(model_name: str,
...
@@ -146,9 +146,10 @@ def export(model_name: str,
session_len
=
2056
,
session_len
=
2056
,
step_length
=
1
,
step_length
=
1
,
cache_max_entry_count
=
48
,
cache_max_entry_count
=
48
,
cache_chunk_size
=
8
,
cache_chunk_size
=
1
,
use_context_fmha
=
1
,
use_context_fmha
=
1
,
quant_policy
=
0
))
quant_policy
=
0
,
tensor_para_size
=
tp
))
config
=
configparser
.
ConfigParser
()
config
=
configparser
.
ConfigParser
()
for
section
,
key_values
in
cfg
.
items
():
for
section
,
key_values
in
cfg
.
items
():
...
@@ -323,7 +324,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -323,7 +324,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
if
name
not
in
_params
and
name
.
find
(
'bias'
):
if
name
not
in
_params
and
name
.
find
(
'bias'
):
return
None
return
None
return
_params
[
name
].
t
()
return
_params
[
name
].
t
()
w_pack
=
False
w_pack
=
False
if
'model.layers.0.self_attn.W_pack.weight'
in
_params
:
if
'model.layers.0.self_attn.W_pack.weight'
in
_params
:
w_pack
=
True
w_pack
=
True
...
@@ -333,9 +334,12 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -333,9 +334,12 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
# attention weights
# attention weights
for
suffix
in
_suffixes
:
for
suffix
in
_suffixes
:
if
w_pack
:
if
w_pack
:
_qkvo
=
[
f
'model.layers.
{
i
}
.self_attn.
{
t
}
'
for
t
in
[
'W_pack'
,
'o_proj'
]]
_qkvo
=
[
f
'model.layers.
{
i
}
.self_attn.
{
t
}
'
for
t
in
[
'W_pack'
,
'o_proj'
]
]
qkv
,
o
=
map
(
get_tensor_transposed
,
qkv
,
o
=
map
(
get_tensor_transposed
,
map
((
'{}.'
+
suffix
).
format
,
_qkvo
))
map
((
'{}.'
+
suffix
).
format
,
_qkvo
))
if
qkv
is
None
:
if
qkv
is
None
:
continue
continue
...
@@ -346,9 +350,11 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -346,9 +350,11 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
v
=
_qkv
[
2
]
v
=
_qkv
[
2
]
else
:
else
:
_qkvo
=
[
f
'model.layers.
{
i
}
.self_attn.
{
t
}
_proj'
for
t
in
'qkvo'
]
_qkvo
=
[
f
'model.layers.
{
i
}
.self_attn.
{
t
}
_proj'
for
t
in
'qkvo'
]
q
,
k
,
v
,
o
=
map
(
get_tensor_transposed
,
q
,
k
,
v
,
o
=
map
(
get_tensor_transposed
,
map
((
'{}.'
+
suffix
).
format
,
_qkvo
))
map
((
'{}.'
+
suffix
).
format
,
_qkvo
))
if
q
is
None
:
if
q
is
None
:
continue
continue
# q, k has different layout for fb & hf, convert to fb's
# q, k has different layout for fb & hf, convert to fb's
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment