Unverified Commit 3628bcaa authored by Zhiwei's avatar Zhiwei Committed by GitHub
Browse files

[ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775)


Signed-off-by: default avatarZhiweiYan-96 <zhiwei.yan@amd.com>
parent b73b158a
...@@ -345,6 +345,10 @@ class FusedMoEQuantConfig: ...@@ -345,6 +345,10 @@ class FusedMoEQuantConfig:
def use_mxfp4_w4a16(self) -> bool: def use_mxfp4_w4a16(self) -> bool:
return self._a1.dtype is None and self._w1.dtype == "mxfp4" return self._a1.dtype is None and self._w1.dtype == "mxfp4"
@property
def use_mxfp4_w4a4(self) -> bool:
return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4"
@property @property
def use_nvfp4_w4a4(self) -> bool: def use_nvfp4_w4a4(self) -> bool:
return self.quant_dtype == "nvfp4" return self.quant_dtype == "nvfp4"
......
...@@ -221,8 +221,8 @@ def rocm_aiter_fused_experts( ...@@ -221,8 +221,8 @@ def rocm_aiter_fused_experts(
else: else:
quant_method = QuantMethod.NO.value quant_method = QuantMethod.NO.value
# quark moe for mxfp4 w_dtype # quark moe for mxfp4 w_dtype mxfp4 a_dtype
if quant_config.use_mxfp4_w4a16: if quant_config.use_mxfp4_w4a4:
quant_method = QuantMethod.BLOCK_1X32.value quant_method = QuantMethod.BLOCK_1X32.value
# w8a8 block-scaled # w8a8 block-scaled
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8: if quant_config.block_shape is not None and quant_config.use_fp8_w8a8:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment