Commit b364c176 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_USE_OPT_ZEROS to replace triton_ (torch.zeros)

parent c340a5df
...@@ -219,9 +219,14 @@ class Attention(nn.Module): ...@@ -219,9 +219,14 @@ class Attention(nn.Module):
if self.use_output: if self.use_output:
output_shape = (output_shape output_shape = (output_shape
if output_shape is not None else query.shape) if output_shape is not None else query.shape)
output = torch.zeros(output_shape, if envs.VLLM_USE_OPT_ZEROS:
dtype=query.dtype, output = torch.empty(output_shape,
device=query.device) dtype=query.dtype,
device=query.device)
else:
output = torch.zeros(output_shape,
dtype=query.dtype,
device=query.device)
hidden_size = output_shape[-1] hidden_size = output_shape[-1]
# We skip reshaping query, key and value tensors for the MLA # We skip reshaping query, key and value tensors for the MLA
# backend since these tensors have different semantics and are # backend since these tensors have different semantics and are
......
...@@ -165,6 +165,7 @@ if TYPE_CHECKING: ...@@ -165,6 +165,7 @@ if TYPE_CHECKING:
VLLM_USE_APEX_RN: bool = False VLLM_USE_APEX_RN: bool = False
VLLM_USE_GLOBAL_CACHE13: bool = False VLLM_USE_GLOBAL_CACHE13: bool = False
VLLM_USE_LIGHTOP: bool = False VLLM_USE_LIGHTOP: bool = False
VLLM_USE_OPT_ZEROS: bool = False
VLLM_USE_OPT_CAT: bool = False VLLM_USE_OPT_CAT: bool = False
VLLM_USE_OPT_MOE_SUM: bool = False VLLM_USE_OPT_MOE_SUM: bool = False
VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD: bool = False VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD: bool = False
...@@ -1105,6 +1106,10 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1105,6 +1106,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_LIGHTOP": "VLLM_USE_LIGHTOP":
lambda: (os.environ.get("VLLM_USE_LIGHTOP", "False").lower() in lambda: (os.environ.get("VLLM_USE_LIGHTOP", "False").lower() in
("true", "1")), ("true", "1")),
# vLLM will use elenmentwise not triton_
"VLLM_USE_OPT_ZEROS":
lambda: (os.environ.get("VLLM_USE_OPT_ZEROS", "False").lower() in
("true", "1")),
# vLLM will use opt cat for deepseek-v3 # vLLM will use opt cat for deepseek-v3
"VLLM_USE_OPT_CAT": "VLLM_USE_OPT_CAT":
lambda: (os.environ.get("VLLM_USE_OPT_CAT", "False").lower() in lambda: (os.environ.get("VLLM_USE_OPT_CAT", "False").lower() in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment