Unverified Commit eb1051fb authored by Ye (Charlotte) Qi's avatar Ye (Charlotte) Qi Committed by GitHub
Browse files

[ROCm] Guard group quant RMS norm fusion patterns (#30239)

parent 80433e22
...@@ -490,23 +490,25 @@ class RMSNormQuantFusionPass(VllmPatternMatcherPass): ...@@ -490,23 +490,25 @@ class RMSNormQuantFusionPass(VllmPatternMatcherPass):
# as the latter is a subset of the former in torch ops # as the latter is a subset of the former in torch ops
for epsilon in [1e-5, 1e-6]: for epsilon in [1e-5, 1e-6]:
# Fuse fused_add_rms_norm + fp8 group quant # Fuse fused_add_rms_norm + fp8 group quant
FusedAddRMSNormGroupQuantPattern( # Only register group quant patterns on CUDA where the C++ op exists
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 128) if current_platform.is_cuda():
).register(self.patterns) FusedAddRMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 128)
# Fuse rms_norm + fp8 group quant ).register(self.patterns)
RMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 128) # Fuse rms_norm + fp8 group quant
).register(self.patterns) RMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 128)
FusedAddRMSNormGroupQuantPattern( ).register(self.patterns)
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 64)
).register(self.patterns) FusedAddRMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 64)
# Fuse rms_norm + fp8 group quant ).register(self.patterns)
RMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 64) # Fuse rms_norm + fp8 group quant
).register(self.patterns) RMSNormGroupQuantPattern(
epsilon, FP8_DTYPE, group_shape=GroupShape(1, 64)
).register(self.patterns)
# Fuse fused_add_rms_norm + static fp8 quant # Fuse fused_add_rms_norm + static fp8 quant
FusedAddRMSNormStaticQuantPattern(epsilon, FP8_DTYPE).register( FusedAddRMSNormStaticQuantPattern(epsilon, FP8_DTYPE).register(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment