Fix a bug in attention kernel (#68)

130d5fd8 · Woosuk Kwon · GitHub · e070829a · 130d5fd8
Unverified Commit 130d5fd8 authored May 04, 2023 by Woosuk Kwon Committed by GitHub May 04, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

csrc/attention/attention_kernels.cu csrc/attention/attention_kernels.cu +1 -1

No files found.
--- a/csrc/attention/attention_kernels.cu
+++ b/csrc/attention/attention_kernels.cu
@@ -345,7 +345,7 @@ void single_query_cached_kv_attention_launcher(

  constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
  int padded_max_context_len = ((max_context_len + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
-  int logits_size = padded_max_context_len * sizeof(T);
+  int logits_size = padded_max_context_len * sizeof(float);
  int outputs_size = (NUM_WARPS / 2) * head_size * sizeof(float);
  int shared_mem_size = std::max(logits_size, outputs_size);