Unverified Commit daa05bf3 authored by EdalatiAli's avatar EdalatiAli Committed by GitHub
Browse files

[Bugfix] Fix AttributeError when serving MXFP8 models with DeepGEMM installed (#37358)


Signed-off-by: default avatarEdalatiAli <aliedalati@cohere.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 7769b583
......@@ -19,6 +19,7 @@ from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
)
from vllm.model_executor.layers.linear import LinearBase
from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod
from vllm.model_executor.layers.quantization.mxfp8 import Mxfp8OnlineLinearMethod
from vllm.tracing import instrument
from vllm.utils.deep_gemm import (
fp8_gemm_nt,
......@@ -136,8 +137,9 @@ def _fp8_linear_may_use_deep_gemm(module: torch.nn.Module) -> bool:
if not (
isinstance(module, LinearBase)
and isinstance(module.quant_method, Fp8LinearMethod)
and module.quant_method.block_quant
and not module.quant_method.use_marlin
and not isinstance(module.quant_method, Mxfp8OnlineLinearMethod)
and getattr(module.quant_method, "block_quant", False)
and not getattr(module.quant_method, "use_marlin", True)
):
return False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment