Commit ef33478d authored by zhuwenwen's avatar zhuwenwen
Browse files

update common.py

parent ffaad3df
...@@ -913,6 +913,10 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]): ...@@ -913,6 +913,10 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
assert attn_metadata.prefill is not None assert attn_metadata.prefill is not None
has_context = attn_metadata.prefill.chunked_context is not None has_context = attn_metadata.prefill.chunked_context is not None
if envs.VLLM_HAS_CONTEXT_DEFAULT:
has_context = attn_metadata.prefill.chunked_context is not None
else:
has_context = False
kv_nope = self.kv_b_proj(kv_c_normed)[0].view(\ kv_nope = self.kv_b_proj(kv_c_normed)[0].view(\
-1, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) -1, self.num_heads, self.qk_nope_head_dim + self.v_head_dim)
k_nope, v = kv_nope\ k_nope, v = kv_nope\
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment