Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
92510edc
Unverified
Commit
92510edc
authored
Feb 25, 2026
by
Kunshang Ji
Committed by
GitHub
Feb 24, 2026
Browse files
remove cuda check in `top_k_top_p_triton` kernel (#35011)
Signed-off-by:
Kunshang Ji
<
kunshang.ji@intel.com
>
parent
a6c13752
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
4 deletions
+3
-4
vllm/v1/sample/ops/topk_topp_sampler.py
vllm/v1/sample/ops/topk_topp_sampler.py
+1
-1
vllm/v1/sample/ops/topk_topp_triton.py
vllm/v1/sample/ops/topk_topp_triton.py
+2
-3
No files found.
vllm/v1/sample/ops/topk_topp_sampler.py
View file @
92510edc
...
@@ -248,7 +248,7 @@ def apply_top_k_top_p(
...
@@ -248,7 +248,7 @@ def apply_top_k_top_p(
if
p
is
None
and
k
is
None
:
if
p
is
None
and
k
is
None
:
return
logits
return
logits
if
HAS_TRITON
and
logits
.
shape
[
0
]
>=
8
and
logits
.
is_cuda
:
if
HAS_TRITON
and
logits
.
shape
[
0
]
>=
8
:
return
apply_top_k_top_p_triton
(
logits
,
k
,
p
)
return
apply_top_k_top_p_triton
(
logits
,
k
,
p
)
# Use pytorch sort implementation for small batch sizes.
# Use pytorch sort implementation for small batch sizes.
...
...
vllm/v1/sample/ops/topk_topp_triton.py
View file @
92510edc
...
@@ -967,7 +967,6 @@ def apply_top_k_top_p_triton(
...
@@ -967,7 +967,6 @@ def apply_top_k_top_p_triton(
"""
"""
assert
logits
.
ndim
==
2
assert
logits
.
ndim
==
2
assert
logits
.
dtype
==
torch
.
float32
assert
logits
.
dtype
==
torch
.
float32
assert
logits
.
is_cuda
batch_size
,
vocab_size
=
logits
.
shape
batch_size
,
vocab_size
=
logits
.
shape
...
@@ -978,13 +977,13 @@ def apply_top_k_top_p_triton(
...
@@ -978,13 +977,13 @@ def apply_top_k_top_p_triton(
return
logits
return
logits
if
k
is
not
None
:
if
k
is
not
None
:
assert
k
.
ndim
==
1
and
k
.
shape
[
0
]
==
batch_size
and
k
.
is_cuda
assert
k
.
ndim
==
1
and
k
.
shape
[
0
]
==
batch_size
k_ptr
=
k
.
to
(
torch
.
int32
)
k_ptr
=
k
.
to
(
torch
.
int32
)
else
:
else
:
k_ptr
=
logits
# Dummy pointer (won't be read)
k_ptr
=
logits
# Dummy pointer (won't be read)
if
p
is
not
None
:
if
p
is
not
None
:
assert
p
.
ndim
==
1
and
p
.
shape
[
0
]
==
batch_size
and
p
.
is_cuda
assert
p
.
ndim
==
1
and
p
.
shape
[
0
]
==
batch_size
p_ptr
=
p
.
to
(
torch
.
float32
)
p_ptr
=
p
.
to
(
torch
.
float32
)
else
:
else
:
p_ptr
=
logits
# Dummy pointer (won't be read)
p_ptr
=
logits
# Dummy pointer (won't be read)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment