[ROCm][CI] Run Kernels Core Operation Test On MI325 and mitigate flakiness (#38184)

Signed-off-by: Micah Williamson <micah.williamson@amd.com>

[ROCm][CI] Run Kernels Core Operation Test On MI325 and mitigate flakiness (#38184)
Signed-off-by: Micah Williamson <micah.williamson@amd.com>
9570654c · Micah Williamson · GitHub · d56e9522 · 9570654c · 9570654c
Unverified Commit 9570654c authored Apr 05, 2026 by Micah Williamson Committed by GitHub Apr 06, 2026
Showing with 15 additions and 2 deletions

.buildkite/test-amd.yaml .buildkite/test-amd.yaml +1 -1

tests/kernels/core/test_layernorm.py tests/kernels/core/test_layernorm.py +9 -1

vllm/platforms/rocm.py vllm/platforms/rocm.py +5 -0

No files found.
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -751,6 +751,7 @@ steps:
  timeout_in_minutes: 180
  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
  agent_pool: mi250_1
+  optional: true
  working_dir: "/vllm-workspace/tests"
  source_file_dependencies:
  - csrc/
@@ -2035,7 +2036,6 @@ steps:
  timeout_in_minutes: 38
  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
  agent_pool: mi325_1
-  optional: true
  working_dir: "/vllm-workspace/tests"
  source_file_dependencies:
  - csrc/

--- a/tests/kernels/core/test_layernorm.py
+++ b/tests/kernels/core/test_layernorm.py
@@ -7,12 +7,20 @@ import torch
 from tests.kernels.quant_utils import FP8_DTYPE
 from tests.kernels.utils import opcheck
 from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
+if current_platform.is_rocm():
+    from vllm.platforms.rocm import on_gfx90a
+    on_mi250 = on_gfx90a()
+else:
+    on_mi250 = False
 DTYPES = [torch.half, torch.bfloat16, torch.float]
 NUM_TOKENS = [7, 83, 4096]  # Arbitrary values for testing
 HIDDEN_SIZES = [8, 768, 769, 5120, 5125, 8192]  # Arbitrary values for testing
-ADD_RESIDUAL = [False, True]
+ADD_RESIDUAL = [False, True] if not on_mi250 else [True]
 SEEDS = [0]
 CUDA_DEVICES = [
    f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)

--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -182,6 +182,7 @@ _ON_GFX1X = any(arch in _GCN_ARCH for arch in ["gfx11", "gfx12"])
 _ON_GFX12X = any(arch in _GCN_ARCH for arch in ["gfx12"])
 _ON_MI3XX = any(arch in _GCN_ARCH for arch in ["gfx942", "gfx950"])
 _ON_GFX9 = any(arch in _GCN_ARCH for arch in ["gfx90a", "gfx942", "gfx950"])
+_ON_GFX90A = "gfx90a" in _GCN_ARCH
 _ON_GFX942 = "gfx942" in _GCN_ARCH
 _ON_GFX950 = "gfx950" in _GCN_ARCH
@@ -273,6 +274,10 @@ def on_gfx9() -> bool:
    return _ON_GFX9
+def on_gfx90a() -> bool:
+    return _ON_GFX90A
 def on_gfx942() -> bool:
    return _ON_GFX942