Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a68aef25
Commit
a68aef25
authored
Jun 05, 2025
by
zhuwenwen
Browse files
[tests] fix v1, tokenization and runai_model_streamer_test
parent
d36deb1a
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
345 additions
and
322 deletions
+345
-322
tests/runai_model_streamer_test/test_weight_utils.py
tests/runai_model_streamer_test/test_weight_utils.py
+5
-3
tests/samplers/test_no_bad_words.py
tests/samplers/test_no_bad_words.py
+3
-2
tests/tensorizer_loader/test_tensorizer.py
tests/tensorizer_loader/test_tensorizer.py
+4
-3
tests/tokenization/test_detokenize.py
tests/tokenization/test_detokenize.py
+2
-2
tests/tokenization/test_tokenizer_group.py
tests/tokenization/test_tokenizer_group.py
+4
-2
tests/v1/core/test_scheduler.py
tests/v1/core/test_scheduler.py
+286
-286
tests/v1/e2e/test_correctness_sliding_window.py
tests/v1/e2e/test_correctness_sliding_window.py
+6
-4
tests/v1/e2e/test_spec_decode.py
tests/v1/e2e/test_spec_decode.py
+5
-0
tests/v1/e2e/untest_cascade_attention.py
tests/v1/e2e/untest_cascade_attention.py
+5
-4
tests/v1/engine/test_llm_engine.py
tests/v1/engine/test_llm_engine.py
+4
-2
tests/v1/engine/test_output_processor.py
tests/v1/engine/test_output_processor.py
+3
-2
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+9
-7
tests/v1/sample/test_logprobs.py
tests/v1/sample/test_logprobs.py
+3
-1
tests/v1/sample/test_logprobs_e2e.py
tests/v1/sample/test_logprobs_e2e.py
+4
-3
tests/v1/sample/test_sampling_params_e2e.py
tests/v1/sample/test_sampling_params_e2e.py
+2
-1
tests/v1/spec_decode/untest_max_len.py
tests/v1/spec_decode/untest_max_len.py
+0
-0
No files found.
tests/runai_model_streamer_test/test_weight_utils.py
View file @
a68aef25
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
glob
import
glob
import
tempfile
import
tempfile
...
@@ -9,6 +10,7 @@ import torch
...
@@ -9,6 +10,7 @@ import torch
from
vllm.model_executor.model_loader.weight_utils
import
(
from
vllm.model_executor.model_loader.weight_utils
import
(
download_weights_from_hf
,
runai_safetensors_weights_iterator
,
download_weights_from_hf
,
runai_safetensors_weights_iterator
,
safetensors_weights_iterator
)
safetensors_weights_iterator
)
from
..utils
import
models_path_prefix
def
test_runai_model_loader
():
def
test_runai_model_loader
():
...
@@ -23,10 +25,10 @@ def test_runai_model_loader():
...
@@ -23,10 +25,10 @@ def test_runai_model_loader():
runai_model_streamer_tensors
=
{}
runai_model_streamer_tensors
=
{}
hf_safetensors_tensors
=
{}
hf_safetensors_tensors
=
{}
for
name
,
tensor
in
runai_safetensors_weights_iterator
(
safetensors
):
for
name
,
tensor
in
runai_safetensors_weights_iterator
(
safetensors
,
False
):
runai_model_streamer_tensors
[
name
]
=
tensor
runai_model_streamer_tensors
[
name
]
=
tensor
for
name
,
tensor
in
safetensors_weights_iterator
(
safetensors
):
for
name
,
tensor
in
safetensors_weights_iterator
(
safetensors
,
False
):
hf_safetensors_tensors
[
name
]
=
tensor
hf_safetensors_tensors
[
name
]
=
tensor
assert
len
(
runai_model_streamer_tensors
)
==
len
(
hf_safetensors_tensors
)
assert
len
(
runai_model_streamer_tensors
)
==
len
(
hf_safetensors_tensors
)
...
@@ -38,4 +40,4 @@ def test_runai_model_loader():
...
@@ -38,4 +40,4 @@ def test_runai_model_loader():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
test_runai_model_loader
()
test_runai_model_loader
()
\ No newline at end of file
tests/samplers/test_no_bad_words.py
View file @
a68aef25
...
@@ -43,7 +43,8 @@ def _generate(
...
@@ -43,7 +43,8 @@ def _generate(
class
TestOneTokenBadWord
:
class
TestOneTokenBadWord
:
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"TheBloke/Llama-2-7B-fp16"
)
# MODEL = os.path.join(models_path_prefix, "TheBloke/Llama-2-7B-fp16")
MODEL
=
"TheBloke/Llama-2-7B-fp16"
PROMPT
=
"Hi! How are"
PROMPT
=
"Hi! How are"
TARGET_TOKEN
=
"you"
TARGET_TOKEN
=
"you"
...
@@ -191,4 +192,4 @@ class TestTwoTokenBadWord:
...
@@ -191,4 +192,4 @@ class TestTwoTokenBadWord:
prompt
:
str
,
prompt
:
str
,
add_special_tokens
:
bool
=
True
)
->
list
[
int
]:
add_special_tokens
:
bool
=
True
)
->
list
[
int
]:
return
self
.
tokenizer
(
prompt
,
return
self
.
tokenizer
(
prompt
,
add_special_tokens
=
add_special_tokens
).
input_ids
add_special_tokens
=
add_special_tokens
).
input_ids
\ No newline at end of file
tests/tensorizer_loader/test_tensorizer.py
View file @
a68aef25
...
@@ -7,16 +7,15 @@ import pathlib
...
@@ -7,16 +7,15 @@ import pathlib
import
subprocess
import
subprocess
from
functools
import
partial
from
functools
import
partial
from
unittest.mock
import
MagicMock
,
patch
from
unittest.mock
import
MagicMock
,
patch
from
typing
import
List
,
Tuple
,
Optional
import
openai
import
openai
import
pytest
import
pytest
import
torch
import
torch
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
from
typing
import
List
,
Tuple
,
Optional
from
vllm
import
EngineArgs
,
LLMEngine
,
RequestOutput
,
SamplingParams
from
vllm
import
EngineArgs
,
LLMEngine
,
RequestOutput
,
SamplingParams
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.lora.request
import
LoRARequest
# yapf conflicts with isort for this docstring
# yapf conflicts with isort for this docstring
# yapf: disable
# yapf: disable
from
vllm.model_executor.model_loader.tensorizer
import
(
TensorizerConfig
,
from
vllm.model_executor.model_loader.tensorizer
import
(
TensorizerConfig
,
...
@@ -26,6 +25,8 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
...
@@ -26,6 +25,8 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
open_stream
,
open_stream
,
serialize_vllm_model
,
serialize_vllm_model
,
tensorize_vllm_model
)
tensorize_vllm_model
)
from
vllm.lora.request
import
LoRARequest
# yapf: enable
# yapf: enable
from
vllm.utils
import
PlaceholderModule
,
import_from_path
from
vllm.utils
import
PlaceholderModule
,
import_from_path
...
@@ -245,7 +246,7 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
...
@@ -245,7 +246,7 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
EXAMPLES_PATH
/
"offline_inference/multilora_inference.py"
,
EXAMPLES_PATH
/
"offline_inference/multilora_inference.py"
,
)
)
model_ref
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-2-7b-hf"
)
model_ref
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-2-7b-hf"
)
# lora_path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")
# lora_path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")
lora_path
=
os
.
path
.
join
(
models_path_prefix
,
"yard1/llama-2-7b-sql-lora-test"
)
lora_path
=
os
.
path
.
join
(
models_path_prefix
,
"yard1/llama-2-7b-sql-lora-test"
)
test_prompts
=
multilora_inference
.
create_test_prompts
(
lora_path
)
test_prompts
=
multilora_inference
.
create_test_prompts
(
lora_path
)
...
...
tests/tokenization/test_detokenize.py
View file @
a68aef25
...
@@ -89,7 +89,7 @@ def tokenizer(tokenizer_name):
...
@@ -89,7 +89,7 @@ def tokenizer(tokenizer_name):
AutoTokenizer
.
from_pretrained
(
tokenizer_name
))
AutoTokenizer
.
from_pretrained
(
tokenizer_name
))
@
pytest
.
mark
.
parametrize
(
"tokenizer_name"
,
[
"mistralai/Pixtral-12B-2409"
])
@
pytest
.
mark
.
parametrize
(
"tokenizer_name"
,
[
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Pixtral-12B-2409"
)
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"truth"
,
"truth"
,
[
[
...
@@ -403,4 +403,4 @@ def test_decode_prompt_logprobs_chunked_prefill(
...
@@ -403,4 +403,4 @@ def test_decode_prompt_logprobs_chunked_prefill(
generated_string
+=
prompt_logprobs
[
prompt_token
].
decoded_token
generated_string
+=
prompt_logprobs
[
prompt_token
].
decoded_token
assert
generated_string
==
example_prompts
[
idx
],
(
assert
generated_string
==
example_prompts
[
idx
],
(
"Detokenized prompt logprobs do not match original prompt"
)
"Detokenized prompt logprobs do not match original prompt"
)
\ No newline at end of file
tests/tokenization/test_tokenizer_group.py
View file @
a68aef25
...
@@ -8,11 +8,13 @@ from ..utils import models_path_prefix
...
@@ -8,11 +8,13 @@ from ..utils import models_path_prefix
from
vllm.transformers_utils.tokenizer_group
import
TokenizerGroup
from
vllm.transformers_utils.tokenizer_group
import
TokenizerGroup
# export HF_ENDPOINT=https://hf-mirror.com
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_tokenizer_group
():
async
def
test_tokenizer_group
():
reference_tokenizer
=
AutoTokenizer
.
from_pretrained
(
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
))
# reference_tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, "gpt2"))
reference_tokenizer
=
AutoTokenizer
.
from_pretrained
(
"gpt2"
)
tokenizer_group
=
TokenizerGroup
(
tokenizer_group
=
TokenizerGroup
(
tokenizer_id
=
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
),
#
tokenizer_id=os.path.join(models_path_prefix, "gpt2"),
enable_lora
=
False
,
enable_lora
=
False
,
max_num_seqs
=
1
,
max_num_seqs
=
1
,
max_input_length
=
None
,
max_input_length
=
None
,
...
...
tests/v1/core/test_scheduler.py
View file @
a68aef25
This diff is collapsed.
Click to expand it.
tests/v1/e2e/test_correctness_sliding_window.py
View file @
a68aef25
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...core.block.e2e.test_correctness_sliding_window
import
(
check_answers
,
from
...core.block.e2e.test_correctness_sliding_window
import
(
check_answers
,
prep_prompts
)
prep_prompts
)
from
...utils
import
models_path_prefix
@
dataclass
@
dataclass
...
@@ -16,16 +18,16 @@ class TestConfig:
...
@@ -16,16 +18,16 @@ class TestConfig:
model_config
=
{
model_config
=
{
"bigcode/starcoder2-3b"
:
TestConfig
(
4096
,
(
800
,
1100
)),
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder2-3b"
)
:
TestConfig
(
4096
,
(
800
,
1100
)),
"google/gemma-2-2b-it"
:
TestConfig
(
4096
,
(
400
,
800
)),
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-2-2b-it"
)
:
TestConfig
(
4096
,
(
400
,
800
)),
}
}
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model"
,
"model"
,
[
[
"bigcode/starcoder2-3b"
,
# sliding window only
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder2-3b"
)
,
# sliding window only
"google/gemma-2-2b-it"
,
# sliding window + full attention
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-2-2b-it"
)
,
# sliding window + full attention
])
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
...
...
tests/v1/e2e/test_spec_decode.py
View file @
a68aef25
...
@@ -4,9 +4,11 @@ from __future__ import annotations
...
@@ -4,9 +4,11 @@ from __future__ import annotations
import
random
import
random
from
typing
import
Any
from
typing
import
Any
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
@
pytest
.
fixture
@
pytest
.
fixture
...
@@ -49,14 +51,17 @@ def sampling_config():
...
@@ -49,14 +51,17 @@ def sampling_config():
@
pytest
.
fixture
@
pytest
.
fixture
def
model_name
():
def
model_name
():
# return os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct")
return
"meta-llama/Llama-3.1-8B-Instruct"
return
"meta-llama/Llama-3.1-8B-Instruct"
def
eagle_model_name
():
def
eagle_model_name
():
# return os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3.1-Instruct-8B")
return
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
return
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
def
eagle3_model_name
():
def
eagle3_model_name
():
# return os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B")
return
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
return
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
...
...
tests/v1/e2e/test_cascade_attention.py
→
tests/v1/e2e/
un
test_cascade_attention.py
View file @
a68aef25
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
fork_new_process_for_each_test
from
...utils
import
fork_new_process_for_each_test
,
models_path_prefix
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
@
pytest
.
mark
.
parametrize
(
"attn_backend"
,
@
pytest
.
mark
.
parametrize
(
"attn_backend"
,
[
"FLASH_ATTN_VLLM_V1"
,
"FLASHINFER_VLLM_V1"
])
[
"FLASH_ATTN_VLLM_V1"
])
#
"FLASHINFER_VLLM_V1"
def
test_cascade_attention
(
example_system_message
,
monkeypatch
,
attn_backend
):
def
test_cascade_attention
(
example_system_message
,
monkeypatch
,
attn_backend
):
prompt
=
"
\n
<User>: Implement fibonacci sequence in Python.
\n
<Claude>:"
prompt
=
"
\n
<User>: Implement fibonacci sequence in Python.
\n
<Claude>:"
...
@@ -17,7 +18,7 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
...
@@ -17,7 +18,7 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
attn_backend
)
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
attn_backend
)
llm
=
LLM
(
model
=
"Qwen/Qwen2-1.5B-Instruct"
)
llm
=
LLM
(
model
=
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct"
)
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
100
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
100
)
# No cascade attention.
# No cascade attention.
...
@@ -29,4 +30,4 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
...
@@ -29,4 +30,4 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
prompts
=
[
example_system_message
+
prompt
]
*
64
prompts
=
[
example_system_message
+
prompt
]
*
64
responses
=
llm
.
generate
(
prompts
,
sampling_params
)
responses
=
llm
.
generate
(
prompts
,
sampling_params
)
for
response
in
responses
:
for
response
in
responses
:
assert
response
.
outputs
[
0
].
text
==
ref_output
assert
response
.
outputs
[
0
].
text
==
ref_output
\ No newline at end of file
tests/v1/engine/test_llm_engine.py
View file @
a68aef25
...
@@ -3,11 +3,13 @@
...
@@ -3,11 +3,13 @@
import
random
import
random
from
typing
import
Optional
from
typing
import
Optional
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
MODEL
=
"facebook/opt-125m"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"facebook/opt-125m"
)
DTYPE
=
"half"
DTYPE
=
"half"
...
@@ -96,4 +98,4 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
...
@@ -96,4 +98,4 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
}
}
raise
AssertionError
(
raise
AssertionError
(
f
"
{
len
(
completion_counts
)
}
unique completions; expected"
f
"
{
len
(
completion_counts
)
}
unique completions; expected"
f
"
{
n
}
. Repeats:
{
repeats
}
"
)
f
"
{
n
}
. Repeats:
{
repeats
}
"
)
\ No newline at end of file
tests/v1/engine/test_output_processor.py
View file @
a68aef25
...
@@ -20,6 +20,7 @@ from vllm.v1.engine import EngineCoreRequest
...
@@ -20,6 +20,7 @@ from vllm.v1.engine import EngineCoreRequest
from
vllm.v1.engine.output_processor
import
(
OutputProcessor
,
from
vllm.v1.engine.output_processor
import
(
OutputProcessor
,
RequestOutputCollector
)
RequestOutputCollector
)
from
vllm.v1.metrics.stats
import
IterationStats
from
vllm.v1.metrics.stats
import
IterationStats
from
...utils
import
models_path_prefix
def
_ref_convert_id_to_token
(
def
_ref_convert_id_to_token
(
...
@@ -520,7 +521,7 @@ def test_stop_token(include_stop_str_in_output: bool,
...
@@ -520,7 +521,7 @@ def test_stop_token(include_stop_str_in_output: bool,
dummy_test_vectors: dummy engine core outputs and other data structures
dummy_test_vectors: dummy engine core outputs and other data structures
"""
"""
model_id
=
dummy_test_vectors
.
tokenizer
.
name_or_path
model_id
=
dummy_test_vectors
.
tokenizer
.
name_or_path
if
model_id
!=
'meta-llama/Llama-3.2-1B'
:
if
model_id
!=
os
.
path
.
join
(
models_path_prefix
,
'meta-llama/Llama-3.2-1B'
)
:
raise
AssertionError
(
"Test requires meta-llama/Llama-3.2-1B but "
raise
AssertionError
(
"Test requires meta-llama/Llama-3.2-1B but "
f
"
{
model_id
}
is in use."
)
f
"
{
model_id
}
is in use."
)
do_logprobs
=
num_sample_logprobs
is
not
None
do_logprobs
=
num_sample_logprobs
is
not
None
...
@@ -992,4 +993,4 @@ async def test_cumulative_output_collector_n():
...
@@ -992,4 +993,4 @@ async def test_cumulative_output_collector_n():
# Third is the one where index is 2
# Third is the one where index is 2
third
=
[
k
for
k
in
result
.
outputs
if
k
.
index
==
2
]
third
=
[
k
for
k
in
result
.
outputs
if
k
.
index
==
2
]
assert
len
(
third
)
==
1
assert
len
(
third
)
==
1
assert
third
[
0
].
text
==
"c"
assert
third
[
0
].
text
==
"c"
\ No newline at end of file
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
a68aef25
...
@@ -7,6 +7,7 @@ import re
...
@@ -7,6 +7,7 @@ import re
from
enum
import
Enum
from
enum
import
Enum
from
typing
import
Any
from
typing
import
Any
import
os
import
jsonschema
import
jsonschema
import
pytest
import
pytest
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
...
@@ -15,22 +16,23 @@ from vllm.entrypoints.llm import LLM
...
@@ -15,22 +16,23 @@ from vllm.entrypoints.llm import LLM
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
....utils
import
models_path_prefix
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"guidance:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"guidance:disable-any-whitespace"
,
"auto"
),
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"xgrammar:disable-any-whitespace"
,
"mistral"
),
"mistral"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-1.5B-Instruct"
)
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
#FIXME: This test is flaky on CI thus disabled
#FIXME: This test is flaky on CI thus disabled
#("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
#("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
]
]
PARAMS_MODELS_TOKENIZER_MODE
=
[
PARAMS_MODELS_TOKENIZER_MODE
=
[
(
"mistralai/Ministral-8B-Instruct-2410"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"auto"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-1.5B-Instruct"
)
,
"auto"
),
]
]
...
@@ -572,4 +574,4 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
...
@@ -572,4 +574,4 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
assert
"a3"
in
generated
assert
"a3"
in
generated
assert
"a4"
not
in
generated
assert
"a4"
not
in
generated
assert
"a5"
not
in
generated
assert
"a5"
not
in
generated
assert
"a6"
not
in
generated
assert
"a6"
not
in
generated
\ No newline at end of file
tests/v1/sample/test_logprobs.py
View file @
a68aef25
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
import
itertools
import
itertools
from
collections.abc
import
Generator
from
collections.abc
import
Generator
import
os
import
pytest
import
pytest
import
torch
import
torch
...
@@ -13,8 +14,9 @@ from tests.v1.sample.utils import (
...
@@ -13,8 +14,9 @@ from tests.v1.sample.utils import (
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
...conftest
import
HfRunner
,
VllmRunner
from
...conftest
import
HfRunner
,
VllmRunner
from
...utils
import
models_path_prefix
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
)
DTYPE
=
"half"
DTYPE
=
"half"
NONE
=
BatchLogprobsComposition
.
NONE
NONE
=
BatchLogprobsComposition
.
NONE
...
...
tests/v1/sample/test_logprobs_e2e.py
View file @
a68aef25
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
lm_eval
import
lm_eval
from
...utils
import
RemoteOpenAIServer
from
...utils
import
RemoteOpenAIServer
,
models_path_prefix
# arc-easy uses prompt_logprobs=1, logprobs=1
# arc-easy uses prompt_logprobs=1, logprobs=1
TASK
=
"arc_easy"
TASK
=
"arc_easy"
...
@@ -11,7 +12,7 @@ RTOL = 0.03
...
@@ -11,7 +12,7 @@ RTOL = 0.03
EXPECTED_VALUE
=
0.62
EXPECTED_VALUE
=
0.62
# FIXME(rob): enable prefix caching once supported.
# FIXME(rob): enable prefix caching once supported.
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
)
MODEL_ARGS
=
f
"pretrained=
{
MODEL
}
,enforce_eager=True,enable_prefix_caching=False"
# noqa: E501
MODEL_ARGS
=
f
"pretrained=
{
MODEL
}
,enforce_eager=True,enable_prefix_caching=False"
# noqa: E501
SERVER_ARGS
=
[
SERVER_ARGS
=
[
"--enforce_eager"
,
"--no_enable_prefix_caching"
,
"--disable-log-requests"
"--enforce_eager"
,
"--no_enable_prefix_caching"
,
"--disable-log-requests"
...
@@ -49,4 +50,4 @@ def test_promt_logprobs_e2e_server():
...
@@ -49,4 +50,4 @@ def test_promt_logprobs_e2e_server():
measured_value
=
results
[
"results"
][
TASK
][
FILTER
]
measured_value
=
results
[
"results"
][
TASK
][
FILTER
]
assert
(
measured_value
-
RTOL
<
EXPECTED_VALUE
assert
(
measured_value
-
RTOL
<
EXPECTED_VALUE
and
measured_value
+
RTOL
>
EXPECTED_VALUE
and
measured_value
+
RTOL
>
EXPECTED_VALUE
),
f
"Expected:
{
EXPECTED_VALUE
}
| Measured:
{
measured_value
}
"
),
f
"Expected:
{
EXPECTED_VALUE
}
| Measured:
{
measured_value
}
"
\ No newline at end of file
tests/v1/sample/test_sampling_params_e2e.py
View file @
a68aef25
...
@@ -4,11 +4,12 @@ import os
...
@@ -4,11 +4,12 @@ import os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
if
os
.
getenv
(
"VLLM_USE_V1"
,
"0"
)
!=
"1"
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
"0"
)
!=
"1"
:
pytest
.
skip
(
"Test package requires V1"
,
allow_module_level
=
True
)
pytest
.
skip
(
"Test package requires V1"
,
allow_module_level
=
True
)
MODEL
=
"meta-llama/Llama-3.2-1B"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B"
)
PROMPT
=
"Hello my name is Robert and I"
PROMPT
=
"Hello my name is Robert and I"
...
...
tests/v1/spec_decode/test_max_len.py
→
tests/v1/spec_decode/
un
test_max_len.py
View file @
a68aef25
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment