Commit d4df43b0 authored by zhuwenwen's avatar zhuwenwen
Browse files

set VLLM_USE_FUSED_RMS_ROPE=1

parent 30559839
......@@ -1297,7 +1297,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will use fused RMS + RoPE kernel
"VLLM_USE_FUSED_RMS_ROPE":
lambda: (os.environ.get("VLLM_USE_FUSED_RMS_ROPE", "False").lower() in
lambda: (os.environ.get("VLLM_USE_FUSED_RMS_ROPE", "True").lower() in
("true", "1")),
# vLLM will use Marlin W16A16 kernel for MoE experts
"VLLM_USE_MARLIN_W16A16_MOE":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment