[ROCm][CI] Force max_num_seqs=1 on ROCm In test_sharded_state_loader to reduce flakiness (#33277)

Signed-off-by: Micah Williamson <micah.williamson@amd.com>

[ROCm][CI] Force max_num_seqs=1 on ROCm In test_sharded_state_loader to reduce flakiness (#33277)
Signed-off-by: Micah Williamson <micah.williamson@amd.com>
6c64c41b · Micah Williamson · GitHub · a2ef06e1 · 6c64c41b
Unverified Commit 6c64c41b authored Jan 30, 2026 by Micah Williamson Committed by GitHub Jan 31, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 0 deletions

tests/model_executor/model_loader/test_sharded_state_loader.py .../model_executor/model_loader/test_sharded_state_loader.py +8 -0

No files found.
--- a/tests/model_executor/model_loader/test_sharded_state_loader.py
+++ b/tests/model_executor/model_loader/test_sharded_state_loader.py
@@ -13,6 +13,7 @@ from huggingface_hub import snapshot_download
 from vllm import LLM, SamplingParams
 from vllm.model_executor.model_loader import ShardedStateLoader
+from vllm.platforms import current_platform
 prompts = [
    "Hello, my name is",
@@ -95,6 +96,10 @@ def test_sharded_state_loader(
    input_dir = llama_3p2_1b_files
    ctx = mp.get_context("spawn")
+    platform_args = {}
+    if current_platform.is_rocm():
+        platform_args["max_num_seqs"] = 1
    # Run in separate processes for memory & CUDA isolation
    with TemporaryDirectory() as output_dir:
        p = ctx.Process(
@@ -104,6 +109,7 @@ def test_sharded_state_loader(
                tensor_parallel_size=tp_size,
                gpu_memory_utilization=gpu_memory_utilization,
                enforce_eager=True,
+                **platform_args,
            ),
        )
        p.start()
@@ -118,6 +124,7 @@ def test_sharded_state_loader(
                enable_lora=enable_lora,
                gpu_memory_utilization=gpu_memory_utilization,
                tensor_parallel_size=tp_size,
+                **platform_args,
            ),
        )
        p.start()
@@ -141,6 +148,7 @@ def test_sharded_state_loader(
                gpu_memory_utilization=gpu_memory_utilization,
                tensor_parallel_size=tp_size,
                load_format="sharded_state",
+                **platform_args,
            ),
        )
        p.start()