remove cuda check in `top_k_top_p_triton` kernel (#35011)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>

remove cuda check in `top_k_top_p_triton` kernel (#35011)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
92510edc · Kunshang Ji · GitHub · a6c13752 · 92510edc · 92510edc
Unverified Commit 92510edc authored Feb 25, 2026 by Kunshang Ji Committed by GitHub Feb 24, 2026
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

vllm/v1/sample/ops/topk_topp_sampler.py vllm/v1/sample/ops/topk_topp_sampler.py +1 -1

vllm/v1/sample/ops/topk_topp_triton.py vllm/v1/sample/ops/topk_topp_triton.py +2 -3

No files found.
--- a/vllm/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm/v1/sample/ops/topk_topp_sampler.py
@@ -248,7 +248,7 @@ def apply_top_k_top_p(
    if p is None and k is None:
        return logits

-    if HAS_TRITON and logits.shape[0] >= 8 and logits.is_cuda:
+    if HAS_TRITON and logits.shape[0] >= 8:
        return apply_top_k_top_p_triton(logits, k, p)

    # Use pytorch sort implementation for small batch sizes.

--- a/vllm/v1/sample/ops/topk_topp_triton.py
+++ b/vllm/v1/sample/ops/topk_topp_triton.py
@@ -967,7 +967,6 @@ def apply_top_k_top_p_triton(
    """
    assert logits.ndim == 2
    assert logits.dtype == torch.float32
-    assert logits.is_cuda

    batch_size, vocab_size = logits.shape

@@ -978,13 +977,13 @@ def apply_top_k_top_p_triton(
        return logits

    if k is not None:
-        assert k.ndim == 1 and k.shape[0] == batch_size and k.is_cuda
+        assert k.ndim == 1 and k.shape[0] == batch_size
        k_ptr = k.to(torch.int32)
    else:
        k_ptr = logits  # Dummy pointer (won't be read)

    if p is not None:
-        assert p.ndim == 1 and p.shape[0] == batch_size and p.is_cuda
+        assert p.ndim == 1 and p.shape[0] == batch_size
        p_ptr = p.to(torch.float32)
    else:
        p_ptr = logits  # Dummy pointer (won't be read)