Commit 2dc182c0 authored by laibao's avatar laibao
Browse files

[BUGFIX] 为 Marlin MoE 路径新增 silu_and_mul_opt 优先执行及回退机制

parent 3842b316
...@@ -375,11 +375,17 @@ def fused_experts_impl_w16a16_marlin(hidden_states: torch.Tensor, ...@@ -375,11 +375,17 @@ def fused_experts_impl_w16a16_marlin(hidden_states: torch.Tensor,
top_k_num, top_k_num,
config_marlin_0, config_marlin_0,
) )
if (envs.VLLM_USE_FUSE_SILU_AND_MUL if envs.VLLM_USE_FUSE_SILU_AND_MUL:
and intermediate_cache1.dtype == intermediate_cache2.dtype try:
== torch.float16): op.silu_and_mul_opt(intermediate_cache2, intermediate_cache1)
from lightop import fuse_silu_and_mul except Exception:
fuse_silu_and_mul(intermediate_cache1, intermediate_cache2) try:
from lightop import fuse_silu_and_mul
fuse_silu_and_mul(intermediate_cache1,
intermediate_cache2)
except Exception:
torch.ops._C.silu_and_mul(intermediate_cache2,intermediate_cache1)
else: else:
torch.ops._C.silu_and_mul(intermediate_cache2, intermediate_cache1) torch.ops._C.silu_and_mul(intermediate_cache2, intermediate_cache1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment