Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e35a9e99
Commit
e35a9e99
authored
Oct 15, 2025
by
zhuwenwen
Browse files
update linear to matmul
parent
7e5aa0c5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
7 deletions
+14
-7
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+10
-3
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/vocab_parallel_embedding.py
+4
-4
No files found.
vllm/model_executor/layers/linear.py
View file @
e35a9e99
...
@@ -268,7 +268,6 @@ class UnquantizedLinearMethod(LinearMethodBase):
...
@@ -268,7 +268,6 @@ class UnquantizedLinearMethod(LinearMethodBase):
if
self
.
use_llama_nn
:
if
self
.
use_llama_nn
:
# if os.environ['GEMM_PAD'] == '1' and gemm_bank_conf(layer.weight.shape[1] - 32):
# if os.environ['GEMM_PAD'] == '1' and gemm_bank_conf(layer.weight.shape[1] - 32):
# layer.weight = layer.weight[:,:-32]
# layer.weight = layer.weight[:,:-32]
if
bias
is
not
None
:
if
bias
is
not
None
:
if
len
(
x
.
shape
)
==
2
:
if
len
(
x
.
shape
)
==
2
:
return
torch
.
addmm
(
bias
,
x
,
layer
.
weight
)
return
torch
.
addmm
(
bias
,
x
,
layer
.
weight
)
...
@@ -277,8 +276,16 @@ class UnquantizedLinearMethod(LinearMethodBase):
...
@@ -277,8 +276,16 @@ class UnquantizedLinearMethod(LinearMethodBase):
else
:
else
:
return
torch
.
matmul
(
x
,
layer
.
weight
)
return
torch
.
matmul
(
x
,
layer
.
weight
)
else
:
else
:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
# if envs.VLLM_USE_NN and x.shape[-1] == layer.weight.shape[0]:
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
.
t
(),
bias
)
# return dispatch_unquantized_gemm()(layer, x, layer.weight.t(), bias)
if
envs
.
VLLM_USE_NN
:
if
bias
is
not
None
:
if
len
(
x
.
shape
)
==
2
:
return
torch
.
addmm
(
bias
,
x
,
layer
.
weight
)
else
:
return
torch
.
matmul
(
x
,
layer
.
weight
)
+
bias
else
:
return
torch
.
matmul
(
x
,
layer
.
weight
)
else
:
else
:
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
...
...
vllm/model_executor/layers/vocab_parallel_embedding.py
View file @
e35a9e99
...
@@ -75,9 +75,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
...
@@ -75,9 +75,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
else
:
else
:
return
torch
.
matmul
(
x
,
layer
.
weight
)
return
torch
.
matmul
(
x
,
layer
.
weight
)
else
:
else
:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
#
if envs.VLLM_USE_NN and x.shape[-1] == layer.weight.shape[0]:
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
.
t
(),
bias
)
#
return dispatch_unquantized_gemm()(layer, x, layer.weight.t(), bias)
else
:
#
else:
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment