Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
db351863
"examples/vscode:/vscode.git/clone" did not exist on "8b664706aa72315e436097d19257536228e01792"
Unverified
Commit
db351863
authored
Aug 02, 2024
by
Peng Guanwen
Committed by
GitHub
Aug 02, 2024
Browse files
[Core] Comment out unused code in sampler (#7023)
parent
660dea12
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
27 deletions
+31
-27
vllm/model_executor/sampling_metadata.py
vllm/model_executor/sampling_metadata.py
+31
-27
No files found.
vllm/model_executor/sampling_metadata.py
View file @
db351863
...
...
@@ -13,6 +13,8 @@ from vllm.utils import (async_tensor_h2d, is_pin_memory_available,
_SAMPLING_EPS
=
1e-5
_SEED_0_REPLACEMENT
=
3403598558
# Some triton sampler related code is guarded before it is ready.
_USE_TRITON_SAMPLER
=
False
@
dataclass
...
...
@@ -347,11 +349,13 @@ class SamplingTensors:
repetition_penalties
:
List
[
float
]
=
[]
sampling_seeds
:
List
[
int
]
=
[]
sample_indices
:
List
[
int
]
=
[]
prompt_best_of
:
List
[
int
]
=
[]
do_penalties
=
False
do_top_p_top_k
=
False
do_min_p
=
False
if
_USE_TRITON_SAMPLER
:
prompt_best_of
:
List
[
int
]
=
[]
# We need one base seed per Triton slice.
seeds_to_generate
=
(
extra_seeds_to_generate
+
get_num_triton_sampler_splits
(
vocab_size
))
...
...
@@ -366,9 +370,6 @@ class SamplingTensors:
r
=
sampling_params
.
repetition_penalty
top_p
=
sampling_params
.
top_p
min_p
=
sampling_params
.
min_p
seed
=
sampling_params
.
seed
is_greedy
=
sampling_params
.
sampling_type
==
SamplingType
.
GREEDY
# k should not be greater than the vocab size.
top_k
=
min
(
sampling_params
.
top_k
,
vocab_size
)
...
...
@@ -389,8 +390,7 @@ class SamplingTensors:
do_penalties
=
True
is_prompt
=
seq_group
.
is_prompt
if
(
seq_group
.
is_prompt
and
sampling_params
.
prompt_logprobs
is
not
None
):
if
(
is_prompt
and
sampling_params
.
prompt_logprobs
is
not
None
):
# For tokens in the prompt that we only need to get
# their logprobs
query_len
=
seq_group
.
query_len
...
...
@@ -415,11 +415,15 @@ class SamplingTensors:
frequency_penalties
+=
[
f
]
*
len
(
seq_ids
)
repetition_penalties
+=
[
r
]
*
len
(
seq_ids
)
if
_USE_TRITON_SAMPLER
:
if
is_prompt
:
prompt_best_of
.
append
(
sampling_params
.
best_of
)
query_len
=
seq_group
.
query_len
assert
query_len
is
not
None
seed
=
sampling_params
.
seed
is_greedy
=
sampling_params
.
sampling_type
==
SamplingType
.
GREEDY
for
seq_id
in
seq_ids
:
seq_data
=
seq_group
.
seq_data
[
seq_id
]
extra_entropy
=
extra_entropy
or
()
...
...
@@ -549,7 +553,7 @@ class SamplingTensors:
device
=
"cpu"
,
dtype
=
torch
.
long
,
pin_memory
=
pin_memory
,
).
T
.
contiguous
()
).
t
()
.
contiguous
()
# Because the memory is pinned, we can do non-blocking
# transfer to device.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment