Unverified Commit 3628bcaa authored by Zhiwei's avatar Zhiwei Committed by GitHub
Browse files

[ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775)


Signed-off-by: default avatarZhiweiYan-96 <zhiwei.yan@amd.com>
parent b73b158a
......@@ -345,6 +345,10 @@ class FusedMoEQuantConfig:
def use_mxfp4_w4a16(self) -> bool:
return self._a1.dtype is None and self._w1.dtype == "mxfp4"
@property
def use_mxfp4_w4a4(self) -> bool:
return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4"
@property
def use_nvfp4_w4a4(self) -> bool:
return self.quant_dtype == "nvfp4"
......
......@@ -221,8 +221,8 @@ def rocm_aiter_fused_experts(
else:
quant_method = QuantMethod.NO.value
# quark moe for mxfp4 w_dtype
if quant_config.use_mxfp4_w4a16:
# quark moe for mxfp4 w_dtype mxfp4 a_dtype
if quant_config.use_mxfp4_w4a4:
quant_method = QuantMethod.BLOCK_1X32.value
# w8a8 block-scaled
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment