[ROCm] Fix AITER ops fake impl and minor bugs (#36092)

Signed-off-by: Li <chuali@amd.com>

[ROCm] Fix AITER ops fake impl and minor bugs (#36092)
Signed-off-by: Li <chuali@amd.com>
e0613702 · Chuan (Richard) Li · GitHub · 9853a3c1 · e0613702
Unverified Commit e0613702 authored Apr 09, 2026 by Chuan (Richard) Li Committed by GitHub Apr 09, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 4 deletions

vllm/_aiter_ops.py vllm/_aiter_ops.py +8 -4

No files found.
--- a/vllm/_aiter_ops.py
+++ b/vllm/_aiter_ops.py
@@ -336,9 +336,13 @@ def _rocm_aiter_fused_topk_fake(
    router_logits: torch.Tensor,
    top_k: int,
    gate_up: bool,
-) -> None:
+) -> tuple[torch.Tensor, torch.Tensor]:
-    # tuple[torch.Tensor, torch.Tensor]:
+    num_tokens = x.shape[0]
-    pass
+    topk_weights = torch.empty(
+        (num_tokens, top_k), dtype=torch.float32, device=x.device
+    )
+    topk_indices = torch.empty((num_tokens, top_k), dtype=torch.int32, device=x.device)
+    return topk_weights, topk_indices
 # Cache whether aiter supports FP8 MLA parameters
@@ -1918,7 +1922,7 @@ class rocm_aiter_ops:
    @staticmethod
    def shuffle_weight(
-        self, tensor: torch.Tensor, layout: tuple[int, int] = (16, 16)
+        tensor: torch.Tensor, layout: tuple[int, int] = (16, 16)
    ) -> torch.Tensor:
        from aiter.ops.shuffle import shuffle_weight