Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
efbe8544
Unverified
Commit
efbe8544
authored
Feb 18, 2025
by
Harry Mellor
Committed by
GitHub
Feb 17, 2025
Browse files
[Misc] Remove dangling references to `SamplingType.BEAM` (#13402)
parent
b3942e15
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
78 deletions
+0
-78
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/sampler.py
+0
-78
No files found.
vllm/model_executor/layers/sampler.py
View file @
efbe8544
...
@@ -68,7 +68,6 @@ class SampleResultArgsType:
...
@@ -68,7 +68,6 @@ class SampleResultArgsType:
sample_results_dict
:
SampleResultsDictType
sample_results_dict
:
SampleResultsDictType
sampling_metadata
:
SamplingMetadata
sampling_metadata
:
SamplingMetadata
greedy_samples
:
Optional
[
torch
.
Tensor
]
greedy_samples
:
Optional
[
torch
.
Tensor
]
beam_search_logprobs
:
Optional
[
torch
.
Tensor
]
# Union of non-deferred (single-step scheduling)
# Union of non-deferred (single-step scheduling)
...
@@ -510,74 +509,6 @@ def _random_sample(
...
@@ -510,74 +509,6 @@ def _random_sample(
return
results
return
results
def
_beam_search_sample
(
selected_seq_groups
:
List
[
SequenceGroupToSample
],
logprobs
:
torch
.
Tensor
,
)
->
SampleResultType
:
"""Run beam sampling on a given samples.
Args:
selected_seq_groups: A list of sequence groups batched.
logprobs: (num_selected_samples, vocab_size,) A tensor of logprob
on selected sample indices.
Returns:
Tuple of (next_token_ids, parent_ids). The length of returned list is
same as the length of selected_seq_groups. If the corresponding
seq_group has do_sample=False, tuple contains ([], [])
"""
# We sample 2 * beam_width candidates to make sure that with high
# probability we can get `beam_width` candidates in addition to
# the finished sequences for the next iteration. See
# https://github.com/tensorflow/tensor2tensor/blob/bafdc1b67730430d38d6ab802cbd51f9d053ba2e/tensor2tensor/utils/beam_search.py#L557-L563
# for details. See also HF reference:
# https://github.com/huggingface/transformers/blob/a4dd53d88e4852f023332d284ff07a01afcd5681/src/transformers/generation/utils.py#L3063-L3065
#
# NOTE: Beam search is not vectorized, so its speed can be slower than
# other sampling methods.
sample_idx
=
0
results
:
SampleResultType
=
[]
for
seq_group
in
selected_seq_groups
:
if
not
seq_group
.
do_sample
:
results
.
append
(([],
[]))
continue
is_prompt
=
seq_group
.
is_prompt
seq_ids
,
sampling_params
=
seq_group
.
seq_ids
,
seq_group
.
sampling_params
num_parent_seqs
=
len
(
seq_ids
)
beam_width
=
sampling_params
.
n
seq_group_logprobs
=
logprobs
[
sample_idx
:
sample_idx
+
num_parent_seqs
]
if
is_prompt
:
# Prompt phase.
assert
num_parent_seqs
==
1
,
(
"Prompt input should have only one seq."
)
parent_ids
=
[
0
]
*
(
2
*
beam_width
)
_
,
next_token_ids
=
torch
.
topk
(
seq_group_logprobs
[
0
],
2
*
beam_width
)
next_token_ids
=
next_token_ids
.
tolist
()
else
:
# Generation phase.
cumulative_logprobs
:
List
[
float
]
=
[
seq_group
.
seq_data
[
seq_id
].
cumulative_logprob
for
seq_id
in
seq_ids
]
cumulative_logprobs_tensor
=
torch
.
tensor
(
cumulative_logprobs
,
dtype
=
torch
.
float
,
device
=
seq_group_logprobs
.
device
)
seq_group_logprobs
=
(
seq_group_logprobs
+
cumulative_logprobs_tensor
.
unsqueeze
(
dim
=
1
))
_
,
topk_ids
=
torch
.
topk
(
seq_group_logprobs
.
flatten
(),
2
*
beam_width
)
topk_ids
=
topk_ids
.
tolist
()
vocab_size
=
seq_group_logprobs
.
size
(
-
1
)
parent_ids
=
[
i
//
vocab_size
for
i
in
topk_ids
]
next_token_ids
=
[
i
%
vocab_size
for
i
in
topk_ids
]
results
.
append
((
next_token_ids
,
parent_ids
))
sample_idx
+=
num_parent_seqs
assert
sample_idx
==
logprobs
.
size
(
0
)
return
results
# torch.multinomial forces a GPU<->CPU sync.
# torch.multinomial forces a GPU<->CPU sync.
# Therefore, we use an optimized implementation instead.
# Therefore, we use an optimized implementation instead.
# Note that we always sample with replacement.
# Note that we always sample with replacement.
...
@@ -666,14 +597,12 @@ def get_pythonized_sample_results(
...
@@ -666,14 +597,12 @@ def get_pythonized_sample_results(
sampling_metadata
,
sampling_metadata
,
greedy_samples
,
greedy_samples
,
multinomial_samples
,
multinomial_samples
,
beam_search_logprobs
,
sample_results_dict
,
sample_results_dict
,
)
=
(
)
=
(
sample_result_args
.
sample_metadata
,
sample_result_args
.
sample_metadata
,
sample_result_args
.
sampling_metadata
,
sample_result_args
.
sampling_metadata
,
sample_result_args
.
greedy_samples
,
sample_result_args
.
greedy_samples
,
sample_result_args
.
multinomial_samples
,
sample_result_args
.
multinomial_samples
,
sample_result_args
.
beam_search_logprobs
,
sample_result_args
.
sample_results_dict
,
sample_result_args
.
sample_results_dict
,
)
)
...
@@ -686,9 +615,6 @@ def get_pythonized_sample_results(
...
@@ -686,9 +615,6 @@ def get_pythonized_sample_results(
elif
sampling_type
in
(
SamplingType
.
RANDOM
,
SamplingType
.
RANDOM_SEED
):
elif
sampling_type
in
(
SamplingType
.
RANDOM
,
SamplingType
.
RANDOM_SEED
):
sample_results
=
_random_sample
(
seq_groups
,
sample_results
=
_random_sample
(
seq_groups
,
multinomial_samples
[
sampling_type
])
multinomial_samples
[
sampling_type
])
elif
sampling_type
==
SamplingType
.
BEAM
:
sample_results
=
_beam_search_sample
(
seq_groups
,
beam_search_logprobs
)
sample_results_dict
.
update
(
zip
(
seq_group_id
,
sample_results
))
sample_results_dict
.
update
(
zip
(
seq_group_id
,
sample_results
))
return
[
return
[
...
@@ -731,7 +657,6 @@ def _sample_with_torch(
...
@@ -731,7 +657,6 @@ def _sample_with_torch(
sample_metadata
:
SampleMetadataType
=
{}
sample_metadata
:
SampleMetadataType
=
{}
multinomial_samples
:
MultinomialSamplesType
=
{}
multinomial_samples
:
MultinomialSamplesType
=
{}
greedy_samples
:
Optional
[
torch
.
Tensor
]
=
None
greedy_samples
:
Optional
[
torch
.
Tensor
]
=
None
beam_search_logprobs
:
Optional
[
torch
.
Tensor
]
=
None
# Create output tensor for sampled token ids.
# Create output tensor for sampled token ids.
if
include_gpu_probs_tensor
:
if
include_gpu_probs_tensor
:
...
@@ -800,8 +725,6 @@ def _sample_with_torch(
...
@@ -800,8 +725,6 @@ def _sample_with_torch(
sampled_token_ids_tensor
[
long_sample_indices
]
=
\
sampled_token_ids_tensor
[
long_sample_indices
]
=
\
multinomial_samples
[
sampling_type
].
to
(
torch
.
long
)
multinomial_samples
[
sampling_type
].
to
(
torch
.
long
)
elif
sampling_type
==
SamplingType
.
BEAM
:
beam_search_logprobs
=
logprobs
[
sample_indices
]
else
:
else
:
raise
ValueError
(
f
"Unsupported sampling type:
{
sampling_type
}
"
)
raise
ValueError
(
f
"Unsupported sampling type:
{
sampling_type
}
"
)
...
@@ -812,7 +735,6 @@ def _sample_with_torch(
...
@@ -812,7 +735,6 @@ def _sample_with_torch(
sample_metadata
=
sample_metadata
,
sample_metadata
=
sample_metadata
,
multinomial_samples
=
multinomial_samples
,
multinomial_samples
=
multinomial_samples
,
greedy_samples
=
greedy_samples
,
greedy_samples
=
greedy_samples
,
beam_search_logprobs
=
beam_search_logprobs
,
sample_results_dict
=
sample_results_dict
)
sample_results_dict
=
sample_results_dict
)
if
not
sampling_metadata
.
skip_sampler_cpu_output
:
if
not
sampling_metadata
.
skip_sampler_cpu_output
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment