Unverified Commit de02b07d authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent eb199516
......@@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
FusedMoEMethodBase)
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
triton_kernel_moe_forward)
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.model_executor.layers.quantization import QuantizationMethods
......@@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
)[0]
return trtllm_gen_output
else:
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( # noqa: E501
triton_kernel_moe_forward)
return triton_kernel_moe_forward(
hidden_states=x,
w1=self.w13_weight_triton_tensor,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment