[ROCm] Fix broken import in platform attention backend dispatching (#30432)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[ROCm] Fix broken import in platform attention backend dispatching (#30432)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
b51255f3 · Andreas Karatzas · GitHub · b4054c8a · b51255f3
Unverified Commit b51255f3 authored Dec 10, 2025 by Andreas Karatzas Committed by GitHub Dec 11, 2025
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 1 deletion

vllm/platforms/rocm.py vllm/platforms/rocm.py +15 -1

No files found.
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -403,6 +403,20 @@ class RocmPlatform(Platform):
                compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE

        if cache_config and cache_config.block_size is None:
+            if (
+                envs.VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION and envs.VLLM_ROCM_USE_AITER
+                # NOTE: This block has been deprecated
+                # or get_env_variable_attn_backend()
+                # == AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN
+                # TODO: monitor https://github.com/vllm-project/vllm/pull/30396
+                # to see how we can transition to the new way of selecting
+                # attention backends
+            ):
+                cache_config.block_size = 64
+                logger.warning(
+                    "[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
+                )
+            else:
                cache_config.block_size = 16

        if parallel_config.worker_cls == "auto":