Unverified Commit 28c3f121 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Remove unused code in attention (#2384)

parent c8848191
...@@ -156,7 +156,6 @@ class PagedAttention(nn.Module): ...@@ -156,7 +156,6 @@ class PagedAttention(nn.Module):
output = out.view_as(query) output = out.view_as(query)
else: else:
# Decoding run. # Decoding run.
if key_cache is not None and value_cache is not None:
output = _paged_attention( output = _paged_attention(
query, query,
key_cache, key_cache,
...@@ -166,10 +165,6 @@ class PagedAttention(nn.Module): ...@@ -166,10 +165,6 @@ class PagedAttention(nn.Module):
self.scale, self.scale,
self.alibi_slopes, self.alibi_slopes,
) )
else:
# This happens during the initial memory profiling run for
# CUDA graphs.
output = torch.zeros_like(query)
# Reshape the output tensor. # Reshape the output tensor.
return output.view(batch_size, seq_len, hidden_size) return output.view(batch_size, seq_len, hidden_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment