remove unused mla utils.py

b01efa0b · zhuwenwen · 4a19cdf5 · b01efa0b · 4a19cdf5
Commit b01efa0b authored Apr 14, 2025 by zhuwenwen
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 549 deletions

vllm/attention/backends/mla/common.py vllm/attention/backends/mla/common.py +4 -2

vllm/attention/backends/mla/utils.py vllm/attention/backends/mla/utils.py +0 -547

No files found.
--- a/vllm/attention/backends/mla/common.py
+++ b/vllm/attention/backends/mla/common.py
@@ -1297,7 +1297,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
            output = self.flash_attn_varlen_func(
                q=q,
                k=k,
-                v=v_padded,
+                v=v,
                cu_seqlens_q=prefill_metadata.query_start_loc,
                cu_seqlens_k=prefill_metadata.query_start_loc,
                max_seqlen_q=prefill_metadata.max_prefill_seq_len,
@@ -1323,8 +1323,10 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
            )
        # slice by `:v.shape[-1]` in order to remove v headdim padding
+        # output = output\
+        #     .view(-1, self.num_heads, q.shape[-1])[..., :v.shape[-1]]\
+        #         .reshape(-1, self.num_heads * v.shape[-1])
        output = output\
-            .view(-1, self.num_heads, q.shape[-1])[..., :v.shape[-1]]\
                .reshape(-1, self.num_heads * v.shape[-1])
        return self.o_proj(output)[0]

--- a/vllm/attention/backends/mla/utils.py
+++ b/vllm/attention/backends/mla/utils.py