update attention kernels and version

0e640807 · zhuwenwen · 69341fde · 0e640807 · 0e640807
Commit 0e640807 authored Jul 25, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 16 deletions

csrc/attention/attention_kernels.cu csrc/attention/attention_kernels.cu +2 -2

setup.py setup.py +14 -14

No files found.
--- a/csrc/attention/attention_kernels.cu
+++ b/csrc/attention/attention_kernels.cu
@@ -91,7 +91,7 @@ __device__ void paged_attention_kernel(
    const int max_num_blocks_per_seq,
    const float* __restrict__ alibi_slopes,  // [num_heads]
    const int q_stride, const int kv_block_stride, const int kv_head_stride,
-    const float kv_scale, const int tp_rank, const int blocksparse_local_blocks,
+    const float k_scale, const float v_scale, const int tp_rank, const int blocksparse_local_blocks,
    const int blocksparse_vert_stride, const int blocksparse_block_size,
    const int blocksparse_head_sliding_step) {}
@@ -345,7 +345,7 @@ __device__ void paged_attention_kernel(
            Quant_vec k_vec_quant = *reinterpret_cast<const Quant_vec*>(
                k_ptr + offset1 * BLOCK_SIZE * x + offset2);
            k_vecs[j] = fp8::scaled_convert<K_vec, Quant_vec, KV_DTYPE>(
-                k_vec_quant, kv_scale);
+                k_vec_quant, k_scale);
          }
        }
      }

--- a/setup.py
+++ b/setup.py
@@ -394,21 +394,21 @@ def get_version_add(sha: Optional[str] = None) -> str:
    # torch version
    version += ".torch" + torch.__version__[:5]
-    new_version_content = f"""\  
+    new_version_content = f"""
-    import warnings  
+import warnings  
-    try:  
+try:  
-        import vllm.commit_id  
+    import vllm.commit_id  
-        __commit__ = vllm.commit_id.__commit__  
+    __commit__ = vllm.commit_id.__commit__  
-    except Exception as e:  
+except Exception as e:  
-        warnings.warn(f"Failed to read commit hash:\\n{e}",  
+    warnings.warn(f"Failed to read commit hash:\\n + str(e)",  
-                    RuntimeWarning,  
+                RuntimeWarning,  
-                    stacklevel=2)  
+                stacklevel=2)  
-        __commit__ = "COMMIT_HASH_PLACEHOLDER"  
+    __commit__ = "COMMIT_HASH_PLACEHOLDER"  
-    __version__ = "0.5.3.post1"  
+__version__ = "0.5.3.post1"  
-    __dcu_version__ = '0.5.3.post1+{version}'
+__dcu_version__ = f'0.5.3.post1+{version}'
-    """.format(version=version)
+"""
    with open(add_version_path, encoding="utf-8",mode="w") as file:
        file.write(new_version_content)