change pa v1 to 128

bbf9488b · zhuwenwen · 8ee4ae1f · bbf9488b
Commit bbf9488b authored Jul 02, 2024 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

csrc/attention/attention_kernels.cu csrc/attention/attention_kernels.cu +1 -1

No files found.
--- a/csrc/attention/attention_kernels.cu
+++ b/csrc/attention/attention_kernels.cu
@@ -689,7 +689,7 @@ __global__ void paged_attention_v2_reduce_kernel(
 // TODO(woosuk): Tune NUM_THREADS.
 template <typename T, typename CACHE_T, int BLOCK_SIZE,
          vllm::Fp8KVCacheDataType KV_DTYPE, bool IS_BLOCK_SPARSE,
-          int NUM_THREADS = 256>
+          int NUM_THREADS = 128>
 void paged_attention_v1_launcher(
    torch::Tensor& out, torch::Tensor& query, torch::Tensor& key_cache,
    torch::Tensor& value_cache, int num_kv_heads, float scale,