[ROCm][CI] Fix flaky GPTQ compile correctness test (#38161)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[ROCm][CI] Fix flaky GPTQ compile correctness test (#38161)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
f2d16207 · Andreas Karatzas · GitHub · 37a83007 · f2d16207 · f2d16207
Unverified Commit f2d16207 authored Mar 26, 2026 by Andreas Karatzas Committed by GitHub Mar 26, 2026
Showing with 41 additions and 34 deletions

tests/compile/fullgraph/test_basic_correctness.py tests/compile/fullgraph/test_basic_correctness.py +2 -2

vllm/model_executor/model_loader/weight_utils.py vllm/model_executor/model_loader/weight_utils.py +39 -32

No files found.
--- a/tests/compile/fullgraph/test_basic_correctness.py
+++ b/tests/compile/fullgraph/test_basic_correctness.py
@@ -137,6 +137,7 @@ def test_compile_correctness(
            all_args.append(
                final_args + [f"-cc.mode={mode.name}", "-cc.backend=inductor"]
            )
+            all_envs.append({})

        # inductor will change the output, so we only compare if the output
        # is close, not exactly the same.
@@ -157,6 +158,5 @@ def test_compile_correctness(
    ]:
        all_args.append(final_args + [f"-cc.mode={mode.name}", "-cc.backend=eager"])
        all_envs.append({})
-        all_envs.append({})

-    compare_all_settings(model, all_args * 3, all_envs, method=method)
+    compare_all_settings(model, all_args, all_envs, method=method)
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -1348,40 +1348,47 @@ def initialize_single_dummy_weight(
    high: float = 1e-3,
    seed: int = 1234,
 ) -> None:
-    if torch.is_floating_point(param):
-        if current_platform.is_tpu():
-            generator = torch.Generator(device="cpu")
-            generator.manual_seed(seed)
-            # Note: The param.uniform_ function cannot be used in this
-            # context because it demands more TPU HBM than directly copying
-            # from a CPU tensor.
-            # Note: We avoid using torch.rank_like as it doesn't currently
-            # support the generator argument.
-            param.copy_(
-                (high - low)
-                * torch.rand(
-                    param.shape,
-                    generator=generator,
-                    dtype=param.dtype,
-                    layout=param.layout,
-                    requires_grad=param.requires_grad,
-                    device="cpu",
-                )
-                + low
-            )
-            torch._sync(param)
-            return
+    if not torch.is_floating_point(param):
+        if current_platform.is_rocm():
+            # On ROCm, integer params (e.g. GPTQ qweight/qzeros) are left
+            # as torch.empty() by default, giving non-deterministic values
+            # across processes. Zero them for reproducibility.
+            param.zero_()
+        return

-        generator = torch.Generator(device=param.data.device)
+    if current_platform.is_tpu():
+        generator = torch.Generator(device="cpu")
        generator.manual_seed(seed)
-        if torch.finfo(param.data.dtype).bits < 16:
-            # uniform_ doesn't support < 16-bit datatypes (FP8)
-            dtype = param.data.dtype
-            tmp_param = param.data.to(torch.float16)
-            tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype)
-            param.data.copy_(tmp_param)
-        else:
-            param.uniform_(low, high, generator=generator)
+        # Note: The param.uniform_ function cannot be used in this
+        # context because it demands more TPU HBM than directly copying
+        # from a CPU tensor.
+        # Note: We avoid using torch.rank_like as it doesn't currently
+        # support the generator argument.
+        param.copy_(
+            (high - low)
+            * torch.rand(
+                param.shape,
+                generator=generator,
+                dtype=param.dtype,
+                layout=param.layout,
+                requires_grad=param.requires_grad,
+                device="cpu",
+            )
+            + low
+        )
+        torch._sync(param)
+        return
+
+    generator = torch.Generator(device=param.data.device)
+    generator.manual_seed(seed)
+    if torch.finfo(param.data.dtype).bits < 16:
+        # uniform_ doesn't support < 16-bit datatypes (FP8)
+        dtype = param.data.dtype
+        tmp_param = param.data.to(torch.float16)
+        tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype)
+        param.data.copy_(tmp_param)
+    else:
+        param.uniform_(low, high, generator=generator)


 def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None: