"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "8dd2419fa96488bc862ad60f7fc7ffffa39728e7"
Commit ef8dd155 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.15.1-dev' of http://10.16.6.30/dcutoolkit/deeplearing/vllm into v0.15.1-dev

parents d71496bf c9733a54
...@@ -196,7 +196,7 @@ class Qwen3Attention(nn.Module): ...@@ -196,7 +196,7 @@ class Qwen3Attention(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states) qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
if envs.VLLM_USE_FUSED_RMS_ROPE: if envs.VLLM_USE_FUSED_RMS_ROPE and positions.ndim == 1:
# Fused RMSNorm + RoPE path through custom op. # Fused RMSNorm + RoPE path through custom op.
cos_sin_cache = self.rotary_emb.cos_sin_cache cos_sin_cache = self.rotary_emb.cos_sin_cache
if (cos_sin_cache.device != q.device if (cos_sin_cache.device != q.device
......
...@@ -444,9 +444,9 @@ class Qwen3MoeAttention(nn.Module): ...@@ -444,9 +444,9 @@ class Qwen3MoeAttention(nn.Module):
mrope_interleaved, mrope_interleaved,
q_weight, q_weight,
k_weight, k_weight,
epsilon,
q_residual, q_residual,
k_residual, k_residual,
epsilon,
) )
def rms_mrope_fuse_fake( def rms_mrope_fuse_fake(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment