Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b364c176
Commit
b364c176
authored
Nov 13, 2025
by
zhuwenwen
Browse files
add VLLM_USE_OPT_ZEROS to replace triton_ (torch.zeros)
parent
c340a5df
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
3 deletions
+13
-3
vllm/attention/layer.py
vllm/attention/layer.py
+8
-3
vllm/envs.py
vllm/envs.py
+5
-0
No files found.
vllm/attention/layer.py
View file @
b364c176
...
@@ -219,9 +219,14 @@ class Attention(nn.Module):
...
@@ -219,9 +219,14 @@ class Attention(nn.Module):
if
self
.
use_output
:
if
self
.
use_output
:
output_shape
=
(
output_shape
output_shape
=
(
output_shape
if
output_shape
is
not
None
else
query
.
shape
)
if
output_shape
is
not
None
else
query
.
shape
)
output
=
torch
.
zeros
(
output_shape
,
if
envs
.
VLLM_USE_OPT_ZEROS
:
dtype
=
query
.
dtype
,
output
=
torch
.
empty
(
output_shape
,
device
=
query
.
device
)
dtype
=
query
.
dtype
,
device
=
query
.
device
)
else
:
output
=
torch
.
zeros
(
output_shape
,
dtype
=
query
.
dtype
,
device
=
query
.
device
)
hidden_size
=
output_shape
[
-
1
]
hidden_size
=
output_shape
[
-
1
]
# We skip reshaping query, key and value tensors for the MLA
# We skip reshaping query, key and value tensors for the MLA
# backend since these tensors have different semantics and are
# backend since these tensors have different semantics and are
...
...
vllm/envs.py
View file @
b364c176
...
@@ -165,6 +165,7 @@ if TYPE_CHECKING:
...
@@ -165,6 +165,7 @@ if TYPE_CHECKING:
VLLM_USE_APEX_RN
:
bool
=
False
VLLM_USE_APEX_RN
:
bool
=
False
VLLM_USE_GLOBAL_CACHE13
:
bool
=
False
VLLM_USE_GLOBAL_CACHE13
:
bool
=
False
VLLM_USE_LIGHTOP
:
bool
=
False
VLLM_USE_LIGHTOP
:
bool
=
False
VLLM_USE_OPT_ZEROS
:
bool
=
False
VLLM_USE_OPT_CAT
:
bool
=
False
VLLM_USE_OPT_CAT
:
bool
=
False
VLLM_USE_OPT_MOE_SUM
:
bool
=
False
VLLM_USE_OPT_MOE_SUM
:
bool
=
False
VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
:
bool
=
False
VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
:
bool
=
False
...
@@ -1105,6 +1106,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -1105,6 +1106,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_LIGHTOP"
:
"VLLM_USE_LIGHTOP"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_LIGHTOP"
,
"False"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_LIGHTOP"
,
"False"
).
lower
()
in
(
"true"
,
"1"
)),
(
"true"
,
"1"
)),
# vLLM will use elenmentwise not triton_
"VLLM_USE_OPT_ZEROS"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_OPT_ZEROS"
,
"False"
).
lower
()
in
(
"true"
,
"1"
)),
# vLLM will use opt cat for deepseek-v3
# vLLM will use opt cat for deepseek-v3
"VLLM_USE_OPT_CAT"
:
"VLLM_USE_OPT_CAT"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_OPT_CAT"
,
"False"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_OPT_CAT"
,
"False"
).
lower
()
in
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment