Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7e5aa0c5
Commit
7e5aa0c5
authored
Oct 14, 2025
by
zhuwenwen
Browse files
update linear apply
parent
b35a518a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
9 deletions
+9
-9
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+4
-4
vllm/model_executor/layers/utils.py
vllm/model_executor/layers/utils.py
+3
-3
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/vocab_parallel_embedding.py
+2
-2
No files found.
vllm/model_executor/layers/linear.py
View file @
7e5aa0c5
...
@@ -266,8 +266,8 @@ class UnquantizedLinearMethod(LinearMethodBase):
...
@@ -266,8 +266,8 @@ class UnquantizedLinearMethod(LinearMethodBase):
x
:
torch
.
Tensor
,
x
:
torch
.
Tensor
,
bias
:
Optional
[
torch
.
Tensor
]
=
None
)
->
torch
.
Tensor
:
bias
:
Optional
[
torch
.
Tensor
]
=
None
)
->
torch
.
Tensor
:
if
self
.
use_llama_nn
:
if
self
.
use_llama_nn
:
if
gemm_bank_conf
(
layer
.
weight
.
shape
[
1
]
-
32
)
and
os
.
environ
[
'GEMM_PAD'
]
==
'1'
:
# if os.environ['GEMM_PAD'] == '1' and
gemm_bank_conf(layer.weight.shape[1] - 32):
layer
.
weight
=
layer
.
weight
[:,:
-
32
]
#
layer.weight = layer.weight[:,:-32]
if
bias
is
not
None
:
if
bias
is
not
None
:
if
len
(
x
.
shape
)
==
2
:
if
len
(
x
.
shape
)
==
2
:
...
@@ -278,9 +278,9 @@ class UnquantizedLinearMethod(LinearMethodBase):
...
@@ -278,9 +278,9 @@ class UnquantizedLinearMethod(LinearMethodBase):
return
torch
.
matmul
(
x
,
layer
.
weight
)
return
torch
.
matmul
(
x
,
layer
.
weight
)
else
:
else
:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
return
dispatch_unquantized_gemm
()(
x
,
layer
.
weight
.
t
(),
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
.
t
(),
bias
)
else
:
else
:
return
dispatch_unquantized_gemm
()(
x
,
layer
.
weight
,
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
class
LinearBase
(
CustomOp
):
class
LinearBase
(
CustomOp
):
...
...
vllm/model_executor/layers/utils.py
View file @
7e5aa0c5
...
@@ -187,10 +187,10 @@ def cpu_unquantized_gemm(layer: torch.nn.Module,
...
@@ -187,10 +187,10 @@ def cpu_unquantized_gemm(layer: torch.nn.Module,
def
dispatch_unquantized_gemm
()
->
Callable
[...,
torch
.
Tensor
]:
def
dispatch_unquantized_gemm
()
->
Callable
[...,
torch
.
Tensor
]:
if
current_platform
.
is_rocm
():
#
if current_platform.is_rocm():
# return rocm_unquantized_gemm
# return rocm_unquantized_gemm
return
torch
.
nn
.
functional
.
linear
#
return torch.nn.functional.linear
el
if
current_platform
.
is_cpu
():
if
current_platform
.
is_cpu
():
return
cpu_unquantized_gemm
return
cpu_unquantized_gemm
else
:
else
:
return
default_unquantized_gemm
return
default_unquantized_gemm
vllm/model_executor/layers/vocab_parallel_embedding.py
View file @
7e5aa0c5
...
@@ -76,9 +76,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
...
@@ -76,9 +76,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
return
torch
.
matmul
(
x
,
layer
.
weight
)
return
torch
.
matmul
(
x
,
layer
.
weight
)
else
:
else
:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
if
envs
.
VLLM_USE_NN
and
x
.
shape
[
-
1
]
==
layer
.
weight
.
shape
[
0
]:
return
dispatch_unquantized_gemm
()(
x
,
layer
.
weight
.
t
(),
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
.
t
(),
bias
)
else
:
else
:
return
dispatch_unquantized_gemm
()(
x
,
layer
.
weight
,
bias
)
return
dispatch_unquantized_gemm
()(
layer
,
x
,
layer
.
weight
,
bias
)
def
embedding
(
self
,
layer
:
torch
.
nn
.
Module
,
def
embedding
(
self
,
layer
:
torch
.
nn
.
Module
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment