Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
190bc838
Unverified
Commit
190bc838
authored
May 09, 2024
by
Woosuk Kwon
Committed by
GitHub
May 09, 2024
Browse files
[Misc] Remove unnecessary ModelRunner imports (#4703)
parent
f12b20de
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
73 deletions
+31
-73
tests/samplers/test_sampler.py
tests/samplers/test_sampler.py
+24
-57
tests/test_logits_processor.py
tests/test_logits_processor.py
+7
-16
No files found.
tests/samplers/test_sampler.py
View file @
190bc838
...
...
@@ -11,8 +11,7 @@ from vllm.model_executor.layers.sampler import Sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.sequence
import
SamplingParams
,
SequenceData
,
SequenceGroupMetadata
from
vllm.utils
import
Counter
from
vllm.worker.model_runner
import
ModelRunner
from
vllm.utils
import
Counter
,
is_pin_memory_available
class
MockLogitsSampler
(
Sampler
):
...
...
@@ -27,19 +26,13 @@ class MockLogitsSampler(Sampler):
def
_prepare_test
(
batch_size
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsSampler
,
ModelRunner
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsSampler
]:
input_tensor
=
torch
.
rand
((
batch_size
,
1024
),
dtype
=
torch
.
float16
)
fake_logits
=
torch
.
full
((
batch_size
,
VOCAB_SIZE
),
1e-2
,
dtype
=
input_tensor
.
dtype
)
sampler
=
MockLogitsSampler
(
fake_logits
)
model_runner
=
ModelRunner
(
model_config
=
None
,
parallel_config
=
None
,
scheduler_config
=
None
,
device_config
=
None
,
load_config
=
None
,
lora_config
=
None
)
return
input_tensor
,
fake_logits
,
sampler
,
model_runner
return
input_tensor
,
fake_logits
,
sampler
VOCAB_SIZE
=
32000
...
...
@@ -53,7 +46,6 @@ def _do_sample(
batch_size
:
int
,
input_tensor
:
torch
.
Tensor
,
sampler
:
MockLogitsSampler
,
model_runner
:
ModelRunner
,
sampling_params
:
SamplingParams
,
device
:
str
,
):
...
...
@@ -75,7 +67,7 @@ def _do_sample(
seq_lens
,
query_lens
=
seq_lens
,
device
=
device
,
pin_memory
=
model_runner
.
pin_memory
)
pin_memory
=
is_pin_memory_available
()
)
return
sampler
(
logits
=
input_tensor
,
sampling_metadata
=
sampling_metadata
)
...
...
@@ -85,19 +77,16 @@ def test_sampler_all_greedy(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
input_tensor
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
input_tensor
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
sampling_params
=
SamplingParams
(
temperature
=
0
)
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
sampling_params
,
device
)
expected
=
torch
.
argmax
(
fake_logits
,
dim
=-
1
)
for
i
,
sequence_output
in
enumerate
(
sampler_output
):
for
nth_output
in
sequence_output
.
samples
:
assert
nth_output
.
output_token
==
expected
[
i
].
item
()
del
model_runner
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
...
...
@@ -105,8 +94,7 @@ def test_sampler_all_random(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
input_tensor
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
_
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
for
i
in
range
(
batch_size
):
fake_logits
[
i
,
i
]
=
1e2
...
...
@@ -115,15 +103,13 @@ def test_sampler_all_random(seed: int, device: str):
temperature
=
1.0
,
n
=
random
.
randint
(
1
,
10
),
)
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
sampling_params
,
device
)
for
i
,
sequence_output
in
enumerate
(
sampler_output
):
for
nth_output
in
sequence_output
.
samples
:
assert
nth_output
.
output_token
==
i
del
model_runner
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
...
...
@@ -131,7 +117,7 @@ def test_sampler_all_random_seed(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
_
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
_
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
for
i
in
range
(
batch_size
):
fake_logits
[
i
,
i
]
=
1e2
...
...
@@ -141,15 +127,13 @@ def test_sampler_all_random_seed(seed: int, device: str):
n
=
random
.
randint
(
1
,
10
),
seed
=
random
.
randint
(
0
,
10000
),
)
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
sampling_params
,
device
)
for
i
,
sequence_output
in
enumerate
(
sampler_output
):
for
nth_output
in
sequence_output
.
samples
:
assert
nth_output
.
output_token
==
i
del
model_runner
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
...
...
@@ -157,7 +141,7 @@ def test_sampler_all_random_seed_deterministic(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
_
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
_
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
sampling_params
=
SamplingParams
(
temperature
=
1.0
,
...
...
@@ -165,15 +149,13 @@ def test_sampler_all_random_seed_deterministic(seed: int, device: str):
seed
=
random
.
randint
(
0
,
10000
),
)
first_sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampling_params
,
device
)
sampling_params
,
device
)
second_sampler_output
=
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampling_params
,
device
)
sampling_params
,
device
)
assert
first_sampler_output
==
second_sampler_output
del
model_runner
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
...
...
@@ -181,20 +163,18 @@ def test_sampler_all_beam(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
_
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
_
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
best_of
=
2
,
use_beam_search
=
True
,
)
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
model_runner
,
sampling_params
,
device
)
_do_sample
(
batch_size
,
fake_logits
,
sampler
,
sampling_params
,
device
)
# no assertion here as I am not sure how to determine whether
# the outputs are expected - in other words, this just tests
# whether there are no exceptions in the sampler
# when handling an all-beam search case.
del
model_runner
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
...
...
@@ -448,13 +428,13 @@ def test_sampler_min_tokens_penalty(seed: int, device: str):
(
"Invalid test case, expected_penalization does not match computed"
"batch size"
)
_
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
_
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
sampling_metadata
=
SamplingMetadata
.
prepare
(
seq_group_metadata_list
,
seq_lens
=
seq_lens
if
seq_lens
else
None
,
query_lens
=
seq_lens
if
seq_lens
else
None
,
device
=
device
,
pin_memory
=
model_runner
.
pin_memory
)
pin_memory
=
is_pin_memory_available
()
)
# the logits tensor is modified in-place by the sampler
_
=
sampler
(
logits
=
fake_logits
,
sampling_metadata
=
sampling_metadata
)
...
...
@@ -480,8 +460,6 @@ def test_sampler_min_tokens_penalty(seed: int, device: str):
fake_logits
[
logits_idx
,
:]
==
-
float
(
'inf'
))
==
0
,
"No tokens should have been penalized"
del
model_runner
for
test_case
in
test_cases
:
run_test_case
(
**
test_case
)
...
...
@@ -492,8 +470,7 @@ def test_sampler_mixed(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
input_tensor
,
fake_logits
,
sampler
,
model_runner
=
_prepare_test
(
batch_size
)
input_tensor
,
fake_logits
,
sampler
=
_prepare_test
(
batch_size
)
seq_group_metadata_list
=
[]
expected_tokens
:
List
[
Optional
[
List
[
int
]]]
=
[]
...
...
@@ -534,13 +511,13 @@ def test_sampler_mixed(seed: int, device: str):
))
seq_lens
.
append
(
seq_group_metadata_list
[
-
1
].
seq_data
[
0
].
get_len
())
def
test_sampling
(
model_runner
:
ModelRunner
):
def
test_sampling
():
sampling_metadata
=
SamplingMetadata
.
prepare
(
seq_group_metadata_list
,
seq_lens
,
query_lens
=
seq_lens
,
device
=
device
,
pin_memory
=
model_runner
.
pin_memory
)
pin_memory
=
is_pin_memory_available
()
)
sampler_output
=
sampler
(
logits
=
fake_logits
,
sampling_metadata
=
sampling_metadata
)
...
...
@@ -570,7 +547,7 @@ def test_sampler_mixed(seed: int, device: str):
assert
nth_output
.
output_token
in
expected_tokens
[
i
]
# Test batch
test_sampling
(
model_runner
)
test_sampling
()
# Shuffle the batch and resample
target_index
=
list
(
range
(
batch_size
))
...
...
@@ -583,9 +560,7 @@ def test_sampler_mixed(seed: int, device: str):
# This time, results of seeded random samples will be compared with
# the corresponding sample in the pre-shuffled batch
test_sampling
(
model_runner
)
del
model_runner
test_sampling
()
@
pytest
.
mark
.
parametrize
(
"seed"
,
RANDOM_SEEDS
)
...
...
@@ -605,12 +580,6 @@ def test_sampler_top_k_top_p(seed: int, device: str):
device
=
input_tensor
.
device
,
dtype
=
input_tensor
.
dtype
)
sampler
=
MockLogitsSampler
(
fake_logits
)
model_runner
=
ModelRunner
(
model_config
=
None
,
parallel_config
=
None
,
scheduler_config
=
None
,
device_config
=
None
,
load_config
=
None
,
lora_config
=
None
)
generation_model
=
GenerationMixin
()
generation_config
=
GenerationConfig
(
top_k
=
top_k
,
...
...
@@ -641,7 +610,7 @@ def test_sampler_top_k_top_p(seed: int, device: str):
seq_lens
,
query_lens
=
seq_lens
,
device
=
device
,
pin_memory
=
model_runner
.
pin_memory
)
pin_memory
=
is_pin_memory_available
()
)
sample_probs
=
None
...
...
@@ -657,5 +626,3 @@ def test_sampler_top_k_top_p(seed: int, device: str):
hf_probs
=
torch
.
softmax
(
hf_probs
,
dim
=-
1
,
dtype
=
torch
.
float
)
assert
torch
.
allclose
(
hf_probs
,
sample_probs
,
atol
=
1e-5
)
assert
torch
.
equal
(
hf_probs
.
eq
(
0
),
sample_probs
.
eq
(
0
))
del
model_runner
tests/test_logits_processor.py
View file @
190bc838
...
...
@@ -9,7 +9,7 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.sequence
import
SamplingParams
,
SequenceData
,
SequenceGroupMetadata
from
vllm.
worker.model_runner
import
ModelRunner
from
vllm.
utils
import
is_pin_memory_available
class
MockLogitsProcessor
(
LogitsProcessor
):
...
...
@@ -31,20 +31,14 @@ class MockLogitsProcessor(LogitsProcessor):
def
_prepare_test
(
batch_size
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
,
ModelRunner
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
]:
vocab_size
=
32000
input_tensor
=
torch
.
rand
((
batch_size
,
1024
),
dtype
=
torch
.
float16
)
fake_logits
=
torch
.
full
((
batch_size
,
vocab_size
),
1e-2
,
dtype
=
input_tensor
.
dtype
)
logits_processor
=
MockLogitsProcessor
(
32000
,
0.5
,
fake_logits
)
model_runner
=
ModelRunner
(
model_config
=
None
,
parallel_config
=
None
,
scheduler_config
=
None
,
device_config
=
None
,
load_config
=
None
,
lora_config
=
None
)
return
input_tensor
,
fake_logits
,
logits_processor
,
model_runner
return
input_tensor
,
fake_logits
,
logits_processor
RANDOM_SEEDS
=
list
(
range
(
128
))
...
...
@@ -59,8 +53,7 @@ def test_logits_processors(seed: int, device: str):
set_random_seed
(
seed
)
torch
.
set_default_device
(
device
)
batch_size
=
random
.
randint
(
1
,
256
)
input_tensor
,
fake_logits
,
logits_processor
,
model_runner
=
_prepare_test
(
batch_size
)
input_tensor
,
fake_logits
,
logits_processor
=
_prepare_test
(
batch_size
)
# This sample logits processor gives infinite score to the i-th token,
# where i is the length of the input sequence.
...
...
@@ -87,8 +80,8 @@ def test_logits_processors(seed: int, device: str):
seq_group_metadata_list
,
seq_lens
,
query_lens
=
seq_lens
,
device
=
model_runner
.
device
,
pin_memory
=
model_runner
.
pin_memory
)
device
=
device
,
pin_memory
=
is_pin_memory_available
()
)
logits_processor_output
=
logits_processor
(
embedding
=
None
,
hidden_states
=
input_tensor
,
...
...
@@ -99,5 +92,3 @@ def test_logits_processors(seed: int, device: str):
fake_logits
*=
logits_processor
.
scale
assert
torch
.
allclose
(
logits_processor_output
[:,
1
],
fake_logits
[:,
1
],
1e-4
)
del
model_runner
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment