Commit ef8dd155 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.15.1-dev' of http://10.16.6.30/dcutoolkit/deeplearing/vllm into v0.15.1-dev

parents d71496bf c9733a54
......@@ -196,7 +196,7 @@ class Qwen3Attention(nn.Module):
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
if envs.VLLM_USE_FUSED_RMS_ROPE:
if envs.VLLM_USE_FUSED_RMS_ROPE and positions.ndim == 1:
# Fused RMSNorm + RoPE path through custom op.
cos_sin_cache = self.rotary_emb.cos_sin_cache
if (cos_sin_cache.device != q.device
......
......@@ -444,9 +444,9 @@ class Qwen3MoeAttention(nn.Module):
mrope_interleaved,
q_weight,
k_weight,
epsilon,
q_residual,
k_residual,
epsilon,
)
def rms_mrope_fuse_fake(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment