Commit c9733a54 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.15.1-dev-qwen3-vl-fused-rope' into 'v0.15.1-dev'

fix(qwen3): 修复VL场景fused RoPE分支条件,并更正qwen3_moe中rms_mrope参数顺序

See merge request dcutoolkit/deeplearing/vllm!451
parents 1ce0a9a2 02b0d6ba
......@@ -196,7 +196,7 @@ class Qwen3Attention(nn.Module):
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
if envs.VLLM_USE_FUSED_RMS_ROPE:
if envs.VLLM_USE_FUSED_RMS_ROPE and positions.ndim == 1:
# Fused RMSNorm + RoPE path through custom op.
cos_sin_cache = self.rotary_emb.cos_sin_cache
if (cos_sin_cache.device != q.device
......
......@@ -444,9 +444,9 @@ class Qwen3MoeAttention(nn.Module):
mrope_interleaved,
q_weight,
k_weight,
epsilon,
q_residual,
k_residual,
epsilon,
)
def rms_mrope_fuse_fake(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment