Unverified Commit 66c1751d authored by Jason Li's avatar Jason Li Committed by GitHub
Browse files

[compile] Cleanup: Remove unnecessary +rms_norm forcing for sequence parallelism (#35410)


Signed-off-by: default avatarjasonlizhengjian <jasonlizhengjian@gmail.com>
parent 6467b635
...@@ -860,7 +860,7 @@ class VllmConfig: ...@@ -860,7 +860,7 @@ class VllmConfig:
self.compilation_config.pass_config.fuse_gemm_comms = False self.compilation_config.pass_config.fuse_gemm_comms = False
else: else:
# Compute SP threshold early; disable if None (model too # Compute SP threshold early; disable if None (model too
# small) before +rms_norm gets forced into custom_ops. # small for SP to be beneficial).
pass_config = self.compilation_config.pass_config pass_config = self.compilation_config.pass_config
if pass_config.sp_min_token_num is None: if pass_config.sp_min_token_num is None:
from vllm.compilation.passes.fusion.sequence_parallelism import ( from vllm.compilation.passes.fusion.sequence_parallelism import (
...@@ -883,14 +883,6 @@ class VllmConfig: ...@@ -883,14 +883,6 @@ class VllmConfig:
self.compilation_config.pass_config.enable_sp = False self.compilation_config.pass_config.enable_sp = False
self.compilation_config.pass_config.fuse_gemm_comms = False self.compilation_config.pass_config.fuse_gemm_comms = False
if self.compilation_config.pass_config.enable_sp:
if "-rms_norm" in self.compilation_config.custom_ops:
logger.warning(
"RMS norm force disabled, sequence parallelism might break"
)
else:
self.compilation_config.custom_ops.append("+rms_norm")
if self.compilation_config.fast_moe_cold_start is None: if self.compilation_config.fast_moe_cold_start is None:
# resolve default behavior: try to be as safe as possible # resolve default behavior: try to be as safe as possible
# this config is unsafe if any spec decoding draft model has a MOE. # this config is unsafe if any spec decoding draft model has a MOE.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment