Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2bc4be4e
Unverified
Commit
2bc4be4e
authored
Mar 29, 2025
by
Woosuk Kwon
Committed by
GitHub
Mar 29, 2025
Browse files
[V1][Minor] Simplify rejection sampler's parse_output (#15741)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
c67abd61
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
11 deletions
+3
-11
vllm/v1/sample/rejection_sampler.py
vllm/v1/sample/rejection_sampler.py
+0
-7
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+3
-4
No files found.
vllm/v1/sample/rejection_sampler.py
View file @
2bc4be4e
...
@@ -107,7 +107,6 @@ class RejectionSampler(nn.Module):
...
@@ -107,7 +107,6 @@ class RejectionSampler(nn.Module):
@
staticmethod
@
staticmethod
def
parse_output
(
def
parse_output
(
output_token_ids
:
torch
.
Tensor
,
output_token_ids
:
torch
.
Tensor
,
ignored_req_idxs
:
list
[
int
],
vocab_size
:
int
,
vocab_size
:
int
,
)
->
list
[
list
[
int
]]:
)
->
list
[
list
[
int
]]:
"""Parse the output of the rejection sampler.
"""Parse the output of the rejection sampler.
...
@@ -117,9 +116,6 @@ class RejectionSampler(nn.Module):
...
@@ -117,9 +116,6 @@ class RejectionSampler(nn.Module):
[batch_size, max_spec_len + 1]. The rejected tokens are
[batch_size, max_spec_len + 1]. The rejected tokens are
replaced with `PLACEHOLDER_TOKEN_ID` by the rejection sampler
replaced with `PLACEHOLDER_TOKEN_ID` by the rejection sampler
and will be filtered out in this function.
and will be filtered out in this function.
ignored_req_idxs: The indices of the requests that should not be
sampled. This is usually because the request is still in the
prefill phase.
vocab_size: The size of the vocabulary.
vocab_size: The size of the vocabulary.
Returns:
Returns:
...
@@ -129,11 +125,8 @@ class RejectionSampler(nn.Module):
...
@@ -129,11 +125,8 @@ class RejectionSampler(nn.Module):
# Create mask for valid tokens.
# Create mask for valid tokens.
valid_mask
=
((
output_token_ids_np
!=
PLACEHOLDER_TOKEN_ID
)
&
valid_mask
=
((
output_token_ids_np
!=
PLACEHOLDER_TOKEN_ID
)
&
(
output_token_ids_np
<
vocab_size
))
(
output_token_ids_np
<
vocab_size
))
ignored_req_idx_set
=
set
(
ignored_req_idxs
)
outputs
=
[
outputs
=
[
row
[
valid_mask
[
i
]].
tolist
()
row
[
valid_mask
[
i
]].
tolist
()
if
i
not
in
ignored_req_idx_set
else
[]
for
i
,
row
in
enumerate
(
output_token_ids_np
)
for
i
,
row
in
enumerate
(
output_token_ids_np
)
]
]
return
outputs
return
outputs
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
2bc4be4e
...
@@ -1121,16 +1121,15 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -1121,16 +1121,15 @@ class GPUModelRunner(LoRAModelRunnerMixin):
if
max_gen_len
==
1
:
if
max_gen_len
==
1
:
# No spec decode tokens.
# No spec decode tokens.
valid_sampled_token_ids
=
sampled_token_ids
.
tolist
()
valid_sampled_token_ids
=
sampled_token_ids
.
tolist
()
# Mask out the sampled tokens that should not be sampled.
for
i
in
discard_sampled_tokens_req_indices
:
valid_sampled_token_ids
[
i
].
clear
()
else
:
else
:
# Includes spec decode tokens.
# Includes spec decode tokens.
valid_sampled_token_ids
=
self
.
rejection_sampler
.
parse_output
(
valid_sampled_token_ids
=
self
.
rejection_sampler
.
parse_output
(
sampled_token_ids
,
sampled_token_ids
,
discard_sampled_tokens_req_indices
,
self
.
input_batch
.
vocab_size
,
self
.
input_batch
.
vocab_size
,
)
)
# Mask out the sampled tokens that should not be sampled.
for
i
in
discard_sampled_tokens_req_indices
:
valid_sampled_token_ids
[
i
].
clear
()
if
not
self
.
use_spec_decode
:
if
not
self
.
use_spec_decode
:
spec_token_ids
=
None
spec_token_ids
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment