[Minor] Remove unused code in attention (#2384)

28c3f121 · Woosuk Kwon · GitHub · c8848191 · 28c3f121
Unverified Commit 28c3f121 authored Jan 08, 2024 by Woosuk Kwon Committed by GitHub Jan 08, 2024
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 14 deletions

vllm/model_executor/layers/attention.py vllm/model_executor/layers/attention.py +9 -14

No files found.
--- a/vllm/model_executor/layers/attention.py
+++ b/vllm/model_executor/layers/attention.py
@@ -156,7 +156,6 @@ class PagedAttention(nn.Module):
            output = out.view_as(query)
        else:
            # Decoding run.
-            if key_cache is not None and value_cache is not None:
            output = _paged_attention(
                query,
                key_cache,
@@ -166,10 +165,6 @@ class PagedAttention(nn.Module):
                self.scale,
                self.alibi_slopes,
            )
-            else:
-                # This happens during the initial memory profiling run for
-                # CUDA graphs.
-                output = torch.zeros_like(query)
        # Reshape the output tensor.
        return output.view(batch_size, seq_len, hidden_size)