[Bugfix] [Tests] Enforce `out` tensor device in `kernel/moe/test_cutedsl_moe.py` (#39644)

Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>

[Bugfix] [Tests] Enforce `out` tensor device in `kernel/moe/test_cutedsl_moe.py` (#39644)
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
620e8924 · Yongye Zhu · GitHub · f00c5539 · 620e8924
Unverified Commit 620e8924 authored Apr 12, 2026 by Yongye Zhu Committed by GitHub Apr 12, 2026
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

tests/kernels/moe/test_cutedsl_moe.py tests/kernels/moe/test_cutedsl_moe.py +4 -2

No files found.
--- a/tests/kernels/moe/test_cutedsl_moe.py
+++ b/tests/kernels/moe/test_cutedsl_moe.py
@@ -142,7 +142,9 @@ def prepare_inputs(
    # Initialize the hidden_states_3d with ones instead of empty to avoid nan
    # issue.
    hidden_states_3d = torch.ones(
-        (num_experts, max(masked_m), hidden_states.shape[1]), dtype=hidden_states.dtype
+        (num_experts, max(masked_m), hidden_states.shape[1]),
+        dtype=hidden_states.dtype,
+        device=hidden_states.device,
    )
    for i in range(num_experts):
        hidden_states_3d[i, : masked_m[i], :] = hidden_states[topk_idx.view(-1) == i]
@@ -426,7 +428,7 @@ def test_flashinfer_cutedsl_moe_masked(
    w1_alpha = 1.0 / (input_global_scale * w1_global_scale)
    w2_alpha = 1.0 / (a2_global_scale * w2_global_scale)

-    out = torch.empty_like(hidden_states_3d)
+    out = torch.empty_like(hidden_states_3d, device=hidden_states.device)
    # Note: the 1st dim shouldn't be bs
    wk = torch.empty(
        num_experts,