add gfx938

4b4512fa · zhuwenwen · d4d251fe · 4b4512fa · 4b4512fa
Commit 4b4512fa authored Dec 02, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

CMakeLists.txt CMakeLists.txt +1 -1

vllm/utils/__init__.py vllm/utils/__init__.py +1 -1

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
 set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12")

 # Supported AMD GPU architectures.
-set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx906;gfx926;gfx928;gfx936")
+set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx906;gfx926;gfx928;gfx936;gfx938")

 #
 # Supported/expected torch versions for CUDA/ROCm.

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -86,7 +86,7 @@ POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
 MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120

 GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
-SUPPORT_TC = any(arch in GPU_ARCH for arch in ["gfx928", "gfx936"])
+SUPPORT_TC = any(arch in GPU_ARCH for arch in ["gfx928", "gfx936" "gfx938"])

 def _generate_random_int8(
    tensor: torch.Tensor,