update version and deps

5a9c236d · zhuwenwen · 539aa992 · 5a9c236d · 5a9c236d · 5a9c236d
Commit 5a9c236d authored Sep 27, 2024 by zhuwenwen
Showing with 7 additions and 5 deletions

README.md README.md +1 -1

requirements-rocm.txt requirements-rocm.txt +1 -0

setup.py setup.py +1 -1

vllm/attention/backends/rocm_flash_attn.py vllm/attention/backends/rocm_flash_attn.py +4 -3

No files found.
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ VLLM_INSTALL_PUNICA_KERNELS=1 python3 setup.py install
 + 若使用 pip install 下载安装过慢，可添加源：-i https://pypi.tuna.tsinghua.edu.cn/simple/
 ## 验证
- python -c "import vllm; print(vllm.\_\_version__)"，版本号与官方版本同步，查询该软件的版本号，例如0.6.1.post2；
+- python -c "import vllm; print(vllm.\_\_version__)"，版本号与官方版本同步，查询该软件的版本号，例如0.6.2；
 ## Known Issue
 - 无

--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -9,6 +9,7 @@ ray >= 2.10.0
 peft
 pytest-asyncio
 tensorizer>=2.9.0
+setuptools_scm
 torch == 2.3.0
 triton == 2.1.0

--- a/setup.py
+++ b/setup.py
@@ -399,7 +399,7 @@ def get_version_add(sha: Optional[str] = None) -> str:
 try:
    __version__ = "0.6.2"
    __version_tuple__ = (0, 6, 2)
-    __dcu_version__ = f'0.6.2+{version}
+    __dcu_version__ = f'0.6.2+{version}' 
    from vllm.version import __version__, __version_tuple__, __dcu_version__
 except Exception as e:

--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -572,9 +572,10 @@ class ROCmFlashAttentionImpl(AttentionImpl):
            num_seqs, num_heads, head_size = decode_query.shape
            block_size = value_cache.shape[3]
            gqa_ratio = num_heads // self.num_kv_heads
-            use_custom = _use_rocm_custom_paged_attention(
+            # use_custom = _use_rocm_custom_paged_attention(
-                decode_query.dtype, head_size, block_size, gqa_ratio,
+            #     decode_query.dtype, head_size, block_size, gqa_ratio,
-                decode_meta.max_decode_seq_len)
+            #     decode_meta.max_decode_seq_len)
+            use_custom = False
            if use_custom:
                max_seq_len = decode_meta.max_decode_seq_len
                max_num_partitions = (