Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1428c17d
Commit
1428c17d
authored
Oct 30, 2024
by
zhuwenwen
Browse files
auto convert lm_head layout of llama
parent
85def94c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
5 deletions
+7
-5
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/vocab_parallel_embedding.py
+1
-2
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+6
-3
No files found.
vllm/model_executor/layers/vocab_parallel_embedding.py
View file @
1428c17d
...
@@ -22,7 +22,6 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
...
@@ -22,7 +22,6 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_lm_nn
=
os
.
environ
.
get
(
'LM_NN'
)
==
'1'
def
create_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
def
create_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
input_size_per_partition
:
int
,
input_size_per_partition
:
int
,
...
@@ -42,7 +41,7 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
...
@@ -42,7 +41,7 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
layer
:
torch
.
nn
.
Module
,
layer
:
torch
.
nn
.
Module
,
x
:
torch
.
Tensor
,
x
:
torch
.
Tensor
,
bias
:
Optional
[
torch
.
Tensor
]
=
None
)
->
torch
.
Tensor
:
bias
:
Optional
[
torch
.
Tensor
]
=
None
)
->
torch
.
Tensor
:
if
self
.
use_llama_nn
and
self
.
use_lm_nn
:
if
self
.
use_llama_nn
and
os
.
environ
[
'LM_NN'
]
==
'1'
:
if
bias
is
not
None
:
if
bias
is
not
None
:
if
len
(
x
.
shape
)
==
2
:
if
len
(
x
.
shape
)
==
2
:
return
torch
.
addmm
(
bias
,
x
,
layer
.
weight
)
return
torch
.
addmm
(
bias
,
x
,
layer
.
weight
)
...
...
vllm/model_executor/models/llama.py
View file @
1428c17d
...
@@ -455,7 +455,6 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
...
@@ -455,7 +455,6 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
self
.
quant_config
=
quant_config
self
.
quant_config
=
quant_config
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_lm_nn
=
os
.
environ
.
get
(
'LM_NN'
)
==
'1'
self
.
use_gemm_pad
=
os
.
environ
.
get
(
'GEMM_PAD'
)
==
'1'
self
.
use_gemm_pad
=
os
.
environ
.
get
(
'GEMM_PAD'
)
==
'1'
self
.
use_fa_pad
=
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
self
.
use_fa_pad
=
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
self
.
use_awq_pad
=
os
.
environ
.
get
(
'AWQ_PAD'
)
==
'1'
self
.
use_awq_pad
=
os
.
environ
.
get
(
'AWQ_PAD'
)
==
'1'
...
@@ -574,8 +573,8 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
...
@@ -574,8 +573,8 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
"self_attn.qkv_proj.weight"
,
"self_attn.qkv_proj.weight"
,
"self_attn.o_proj.weight"
,
"self_attn.o_proj.weight"
,
"mlp.gate_up_proj.weight"
,
"mlp.gate_up_proj.weight"
,
"mlp.down_proj.weight"
"mlp.down_proj.weight"
,
#
"lm_head.weight"
"lm_head.weight"
]
]
if
self
.
use_lm_nn
:
if
self
.
use_lm_nn
:
...
@@ -587,6 +586,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
...
@@ -587,6 +586,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
qkv_words
=
"|"
.
join
(
lay_qkv_words
)
qkv_words
=
"|"
.
join
(
lay_qkv_words
)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
,
weight
in
params_dict
.
items
():
if
"lm_head.weight"
in
layername
:
os
.
environ
[
'LM_NN'
]
=
'1'
else
:
os
.
environ
[
'LM_NN'
]
=
'0'
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
:
if
matches
:
if
self
.
use_gemm_pad
and
gemm_bank_conf
(
weight
.
data
.
shape
[
0
]):
if
self
.
use_gemm_pad
and
gemm_bank_conf
(
weight
.
data
.
shape
[
0
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment