Commit 80a6b121 authored by zhuwenwen's avatar zhuwenwen
Browse files

update VLLM_USE_FUSED_RMS_ROPE=0 (default)

for qwen3, VLLM_USE_FUSED_RMS_ROPE=1 (default)
parent ba73bd6f
...@@ -1686,7 +1686,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1686,7 +1686,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
("true", "1")), ("true", "1")),
# vLLM will use fused RMS + RoPE kernel # vLLM will use fused RMS + RoPE kernel
"VLLM_USE_FUSED_RMS_ROPE": "VLLM_USE_FUSED_RMS_ROPE":
lambda: (os.environ.get("VLLM_USE_FUSED_RMS_ROPE", "True").lower() in lambda: (os.environ.get("VLLM_USE_FUSED_RMS_ROPE", "False").lower() in
("true", "1")), ("true", "1")),
# vLLM will use fast token id copy # vLLM will use fast token id copy
"VLLM_V1_FAST_TOKEN_ID_COPY": "VLLM_V1_FAST_TOKEN_ID_COPY":
......
...@@ -210,6 +210,8 @@ def _get_model_architecture( ...@@ -210,6 +210,8 @@ def _get_model_architecture(
os.environ['VLLM_USE_FUSE_SILU_AND_MUL'] = '1' os.environ['VLLM_USE_FUSE_SILU_AND_MUL'] = '1'
if not envs.is_set("VLLM_USE_OPT_RESHAPE_AND_CACHE"): if not envs.is_set("VLLM_USE_OPT_RESHAPE_AND_CACHE"):
os.environ['VLLM_USE_OPT_RESHAPE_AND_CACHE'] = '1' os.environ['VLLM_USE_OPT_RESHAPE_AND_CACHE'] = '1'
if not envs.is_set("VLLM_USE_FUSED_RMS_ROPE"):
os.environ['VLLM_USE_FUSED_RMS_ROPE'] = '1'
if architectures in [['DeepseekV32ForCausalLM']]: if architectures in [['DeepseekV32ForCausalLM']]:
if not envs.is_set("VLLM_USE_V32_ENCODE"): if not envs.is_set("VLLM_USE_V32_ENCODE"):
...@@ -237,6 +239,8 @@ def _get_model_architecture( ...@@ -237,6 +239,8 @@ def _get_model_architecture(
os.environ['VLLM_USE_FUSE_SILU_AND_MUL'] = '1' os.environ['VLLM_USE_FUSE_SILU_AND_MUL'] = '1'
if not envs.is_set("VLLM_USE_OPT_RESHAPE_AND_CACHE"): if not envs.is_set("VLLM_USE_OPT_RESHAPE_AND_CACHE"):
os.environ['VLLM_USE_OPT_RESHAPE_AND_CACHE'] = '1' os.environ['VLLM_USE_OPT_RESHAPE_AND_CACHE'] = '1'
if not envs.is_set("VLLM_USE_FUSED_RMS_ROPE"):
os.environ['VLLM_USE_FUSED_RMS_ROPE'] = '1'
if architectures in [['DeepseekV32ForCausalLM']]: if architectures in [['DeepseekV32ForCausalLM']]:
if not envs.is_set("VLLM_USE_V32_ENCODE"): if not envs.is_set("VLLM_USE_V32_ENCODE"):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment