Unverified Commit f72061a1 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[UX] More descriptive reasons in is_supported_config for MoE (#34908)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 662205d3
...@@ -129,25 +129,28 @@ def is_supported_config_trtllm_fp8( ...@@ -129,25 +129,28 @@ def is_supported_config_trtllm_fp8(
return f"kernel does not support {reason}" return f"kernel does not support {reason}"
if not _supports_current_device(): if not _supports_current_device():
return False, _make_reason("current device") return False, _make_reason(f"current device {current_platform.device_name}")
elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()): elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()):
return False, _make_reason("no act_and_mul MLP layer") return False, _make_reason("no act_and_mul MLP layer")
elif not _supports_activation(moe_config.activation): elif not _supports_activation(moe_config.activation):
return False, _make_reason(f"{moe_config.activation} activation") return False, _make_reason(f"{moe_config.activation} activation")
elif not _supports_quant_scheme(weight_key, activation_key): elif not _supports_quant_scheme(weight_key, activation_key):
return False, _make_reason("quantization scheme") return False, _make_reason(f"quantization scheme {weight_key}x{activation_key}")
elif not _supports_parallel_config(moe_config.moe_parallel_config): elif not _supports_parallel_config(moe_config.moe_parallel_config):
return False, _make_reason("parallel config") return False, _make_reason(f"parallel config {moe_config.moe_parallel_config}")
elif not _supports_routing_method( elif not _supports_routing_method(
weight_key, activation_key, moe_config.routing_method weight_key, activation_key, moe_config.routing_method
): ):
return False, _make_reason("routing method") return False, _make_reason(f"routing method {moe_config.routing_method}")
elif activation_format != mk.FusedMoEActivationFormat.Standard: elif activation_format != mk.FusedMoEActivationFormat.Standard:
return False, _make_reason("activation format") return False, _make_reason(f"activation format {activation_format}")
elif not _supports_router_logits_dtype( elif not _supports_router_logits_dtype(
moe_config.router_logits_dtype, moe_config.routing_method moe_config.router_logits_dtype, moe_config.routing_method
): ):
return False, _make_reason("float32 router_logits with non-DeepSeekV3 routing") return False, _make_reason(
"float32 router_logits with non-DeepSeekV3 routing "
f"{moe_config.router_logits_dtype}x{moe_config.routing_method}"
)
return True, None return True, None
...@@ -165,17 +168,17 @@ def is_supported_config_trtllm_bf16( ...@@ -165,17 +168,17 @@ def is_supported_config_trtllm_bf16(
return f"kernel does not support {reason}" return f"kernel does not support {reason}"
if not _supports_current_device(): if not _supports_current_device():
return False, _make_reason("current device") return False, _make_reason(f"current device {current_platform.device_name}")
elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()): elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()):
return False, _make_reason("no act_and_mul MLP layer") return False, _make_reason("no act_and_mul MLP layer")
elif not _supports_activation(moe_config.activation): elif not _supports_activation(moe_config.activation):
return False, _make_reason(f"{moe_config.activation} activation") return False, _make_reason(f"{moe_config.activation} activation")
elif not _supports_parallel_config(moe_config.moe_parallel_config): elif not _supports_parallel_config(moe_config.moe_parallel_config):
return False, _make_reason("parallel config") return False, _make_reason(f"parallel config {moe_config.moe_parallel_config}")
elif not _supports_routing_method_bf16(moe_config.routing_method): elif not _supports_routing_method_bf16(moe_config.routing_method):
return False, _make_reason("routing method") return False, _make_reason(f"routing method {moe_config.routing_method}")
elif activation_format != mk.FusedMoEActivationFormat.Standard: elif activation_format != mk.FusedMoEActivationFormat.Standard:
return False, _make_reason("activation format") return False, _make_reason(f"activation format {activation_format}")
return True, None return True, None
......
...@@ -29,6 +29,7 @@ from vllm.model_executor.layers.fused_moe.utils import ( ...@@ -29,6 +29,7 @@ from vllm.model_executor.layers.fused_moe.utils import (
from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.model_executor.layers.quantization.utils.quant_utils import (
QuantKey, QuantKey,
) )
from vllm.platforms import current_platform
from vllm.utils.math_utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.v1.worker.ubatching import ( from vllm.v1.worker.ubatching import (
dbo_enabled, dbo_enabled,
...@@ -498,15 +499,19 @@ class FusedMoEPermuteExpertsUnpermute(ABC): ...@@ -498,15 +499,19 @@ class FusedMoEPermuteExpertsUnpermute(ABC):
return f"kernel does not support {reason}" return f"kernel does not support {reason}"
if not cls._supports_current_device(): if not cls._supports_current_device():
return False, _make_reason("current device") return False, _make_reason(f"current device {current_platform.device_name}")
elif not (moe_config.is_act_and_mul or cls._supports_no_act_and_mul()): elif not (moe_config.is_act_and_mul or cls._supports_no_act_and_mul()):
return False, _make_reason("no act_and_mul MLP layer") return False, _make_reason("no act_and_mul MLP layer")
elif not cls._supports_activation(moe_config.activation): elif not cls._supports_activation(moe_config.activation):
return False, _make_reason(f"{moe_config.activation} activation") return False, _make_reason(f"{moe_config.activation} activation")
elif not cls._supports_quant_scheme(weight_key, activation_key): elif not cls._supports_quant_scheme(weight_key, activation_key):
return False, _make_reason("quantization scheme") return False, _make_reason(
f"quantization scheme {weight_key}x{activation_key}"
)
elif not cls._supports_parallel_config(moe_config.moe_parallel_config): elif not cls._supports_parallel_config(moe_config.moe_parallel_config):
return False, _make_reason("parallel config") return False, _make_reason(
f"parallel config {moe_config.moe_parallel_config}"
)
elif activation_format != cls.activation_format(): elif activation_format != cls.activation_format():
return False, _make_reason(f"{activation_format.value} activation format") return False, _make_reason(f"{activation_format.value} activation format")
return True, None return True, None
......
...@@ -109,21 +109,23 @@ def is_supported_config_trtllm( ...@@ -109,21 +109,23 @@ def is_supported_config_trtllm(
return f"kernel does not support {reason}" return f"kernel does not support {reason}"
if not _supports_current_device(): if not _supports_current_device():
return False, _make_reason("current device") return False, _make_reason(f"current device {current_platform.device_name}")
elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()): elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()):
return False, _make_reason("no act_and_mul MLP layer") return False, _make_reason("no act_and_mul MLP layer")
elif not _supports_activation(moe_config.activation): elif not _supports_activation(moe_config.activation):
return False, _make_reason(f"{moe_config.activation} activation") return False, _make_reason(f"{moe_config.activation} activation")
elif not _supports_quant_scheme(weight_key, activation_key): elif not _supports_quant_scheme(weight_key, activation_key):
return False, _make_reason("quantization scheme") return False, _make_reason(f"quantization scheme {weight_key}x{activation_key}")
elif not _supports_parallel_config(moe_config.moe_parallel_config): elif not _supports_parallel_config(moe_config.moe_parallel_config):
return False, _make_reason("parallel config") return False, _make_reason(f"parallel config {moe_config.moe_parallel_config}")
elif not _supports_routing_method(moe_config.routing_method): elif not _supports_routing_method(moe_config.routing_method):
return False, _make_reason("routing method") return False, _make_reason(f"routing method {moe_config.routing_method}")
elif activation_format != mk.FusedMoEActivationFormat.Standard: elif activation_format != mk.FusedMoEActivationFormat.Standard:
return False, _make_reason("activation format") return False, _make_reason(f"activation format {activation_format}")
elif moe_config.hidden_dim % 512 != 0: elif moe_config.hidden_dim % 512 != 0:
return False, _make_reason("hidden_dim must be divisible by 512") return False, _make_reason(
f"hidden_dim must be divisible by 512, found {moe_config.hidden_dim}"
)
return True, None return True, None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment