Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
32ef4983
Unverified
Commit
32ef4983
authored
Mar 13, 2025
by
Woosuk Kwon
Committed by
GitHub
Mar 13, 2025
Browse files
[V1] Temporarily disable FlashInfer Rejection Sampler (#14788)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
ad19c8a0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
4 deletions
+13
-4
vllm/v1/sample/ops/topk_topp_sampler.py
vllm/v1/sample/ops/topk_topp_sampler.py
+1
-1
vllm/v1/sample/rejection_sampler.py
vllm/v1/sample/rejection_sampler.py
+12
-3
No files found.
vllm/v1/sample/ops/topk_topp_sampler.py
View file @
32ef4983
...
@@ -22,7 +22,7 @@ class TopKTopPSampler(nn.Module):
...
@@ -22,7 +22,7 @@ class TopKTopPSampler(nn.Module):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
if
current_platform
.
is_cuda
:
if
current_platform
.
is_cuda
()
:
if
is_flashinfer_available
:
if
is_flashinfer_available
:
if
envs
.
VLLM_USE_FLASHINFER_SAMPLER
is
not
False
:
if
envs
.
VLLM_USE_FLASHINFER_SAMPLER
is
not
False
:
# NOTE(woosuk): The V0 sampler doesn't use FlashInfer for
# NOTE(woosuk): The V0 sampler doesn't use FlashInfer for
...
...
vllm/v1/sample/rejection_sampler.py
View file @
32ef4983
...
@@ -24,9 +24,18 @@ class RejectionSampler(nn.Module):
...
@@ -24,9 +24,18 @@ class RejectionSampler(nn.Module):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
if
current_platform
.
is_cuda
:
if
current_platform
.
is_cuda
()
:
if
is_flashinfer_available
:
if
is_flashinfer_available
:
if
envs
.
VLLM_USE_FLASHINFER_SAMPLER
is
not
False
:
if
envs
.
VLLM_USE_FLASHINFER_SAMPLER
is
not
False
:
# FIXME(woosuk): Currently, we have errors when using
# FlashInfer for rejection sampling. As a workaround, we
# disable FlashInfer for rejection sampling by default.
logger
.
info
(
"Currently, FlashInfer rejection sampler is "
"disabled because of a bug. Falling back to "
"the PyTorch-native implementation of "
"rejection sampling."
)
self
.
forward_method
=
self
.
forward_native
# NOTE(woosuk): The V0 sampler doesn't use FlashInfer for
# NOTE(woosuk): The V0 sampler doesn't use FlashInfer for
# sampling unless VLLM_USE_FLASHINFER_SAMPLER=1 (i.e., by
# sampling unless VLLM_USE_FLASHINFER_SAMPLER=1 (i.e., by
# default it is unused). For backward compatibility, we set
# default it is unused). For backward compatibility, we set
...
@@ -35,8 +44,8 @@ class RejectionSampler(nn.Module):
...
@@ -35,8 +44,8 @@ class RejectionSampler(nn.Module):
# None means False, while in V1, None means True. This is
# None means False, while in V1, None means True. This is
# why we use the condition
# why we use the condition
# `envs.VLLM_USE_FLASHINFER_SAMPLER is not False` here.
# `envs.VLLM_USE_FLASHINFER_SAMPLER is not False` here.
logger
.
info
(
"Using FlashInfer for rejection sampling."
)
#
logger.info("Using FlashInfer for rejection sampling.")
self
.
forward_method
=
self
.
flashinfer_sample
#
self.forward_method = self.flashinfer_sample
else
:
else
:
logger
.
warning
(
logger
.
warning
(
"FlashInfer is available, but it is not enabled. "
"FlashInfer is available, but it is not enabled. "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment