Unverified Commit ccaf5ffa authored by Chaojun Zhang's avatar Chaojun Zhang Committed by GitHub
Browse files

[XPU] disable fusion pattern support on XPU platform (#39789)


Signed-off-by: default avatarChaojun Zhang <chaojun.zhang@intel.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 0283f303
...@@ -213,6 +213,26 @@ class XPUPlatform(Platform): ...@@ -213,6 +213,26 @@ class XPUPlatform(Platform):
"falling back to PIECEWISE graph mode on XPU platform." "falling back to PIECEWISE graph mode on XPU platform."
) )
# Disable fusion passes not yet supported on XPU.
pass_config = compilation_config.pass_config
fusion_passes_to_disable = {
"enable_sp": "Sequence parallelism",
"fuse_gemm_comms": "Async TP",
"fuse_allreduce_rms": "AllReduce + RMSNorm fusion",
"fuse_norm_quant": "RMSNorm + quant fusion",
"fuse_act_quant": "Activation + quant fusion",
"fuse_attn_quant": "Attention + quant fusion",
"fuse_act_padding": "Activation + padding fusion",
"fuse_rope_kvcache": "RoPE + KV cache fusion",
}
for flag, feature_name in fusion_passes_to_disable.items():
if getattr(pass_config, flag):
logger.warning(
"Feature %r is not yet supported on XPU and will be disabled.",
feature_name,
)
setattr(pass_config, flag, False)
# check and update parallel config # check and update parallel config
parallel_config = vllm_config.parallel_config parallel_config = vllm_config.parallel_config
# Only override worker_cls if it's still the default "auto" # Only override worker_cls if it's still the default "auto"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment