Commit 2dc182c0 authored by laibao's avatar laibao
Browse files

[BUGFIX] 为 Marlin MoE 路径新增 silu_and_mul_opt 优先执行及回退机制

parent 3842b316
......@@ -375,11 +375,17 @@ def fused_experts_impl_w16a16_marlin(hidden_states: torch.Tensor,
top_k_num,
config_marlin_0,
)
if (envs.VLLM_USE_FUSE_SILU_AND_MUL
and intermediate_cache1.dtype == intermediate_cache2.dtype
== torch.float16):
from lightop import fuse_silu_and_mul
fuse_silu_and_mul(intermediate_cache1, intermediate_cache2)
if envs.VLLM_USE_FUSE_SILU_AND_MUL:
try:
op.silu_and_mul_opt(intermediate_cache2, intermediate_cache1)
except Exception:
try:
from lightop import fuse_silu_and_mul
fuse_silu_and_mul(intermediate_cache1,
intermediate_cache2)
except Exception:
torch.ops._C.silu_and_mul(intermediate_cache2,intermediate_cache1)
else:
torch.ops._C.silu_and_mul(intermediate_cache2, intermediate_cache1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment