Commit a014d6a5 authored by zhuwenwen's avatar zhuwenwen
Browse files

update qwen3_moe of layernorm and activation

parent 8d6b0b0a
......@@ -77,7 +77,7 @@ class SiluAndMul(CustomOp):
"""PyTorch-native implementation equivalent to forward()."""
if not torch.compiler.is_compiling() and envs.VLLM_ENABLE_TBO:
return self.forward_cuda(x)
elif envs.VLLM_USE_OPT_OP:
elif not torch.compiler.is_compiling() and envs.VLLM_USE_OPT_OP:
return self.forward_cuda(x)
else:
d = x.shape[-1] // 2
......
......@@ -167,7 +167,7 @@ class RMSNorm(CustomOp):
) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
if not torch.compiler.is_compiling() and envs.VLLM_ENABLE_TBO:
return self.forward_cuda(x, residual)
elif envs.VLLM_USE_OPT_OP:
elif not torch.compiler.is_compiling() and envs.VLLM_USE_OPT_OP:
return self.forward_cuda(x, residual)
else:
orig_dtype = x.dtype
......
......@@ -234,7 +234,7 @@ class Qwen3MoeAttention(nn.Module):
if envs.VLLM_USE_APEX_RN:
q_by_head = self.q_norm.forward_apex(q_by_head)
else:
q_by_head = self.q_norm(q_by_head)
q_by_head = self.q_norm.forward_cuda(q_by_head)
q = q_by_head.view(q.shape)
k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim,
......@@ -242,7 +242,7 @@ class Qwen3MoeAttention(nn.Module):
if envs.VLLM_USE_APEX_RN:
k_by_head = self.k_norm.forward_apex(k_by_head)
else:
k_by_head = self.k_norm(k_by_head)
k_by_head = self.k_norm.forward_cuda(k_by_head)
k = k_by_head.view(k.shape)
q, k = self.rotary_emb(positions, q, k)
attn_output = self.attn(q, k, v)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment