Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8e340b4f
Commit
8e340b4f
authored
Jun 05, 2025
by
yangql
Browse files
Merge remote-tracking branch 'origin/v0.8.5.post1-dev' into v0.8.5.post1-dev
parents
1cb37dab
a68aef25
Changes
16
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
345 additions
and
322 deletions
+345
-322
tests/runai_model_streamer_test/test_weight_utils.py
tests/runai_model_streamer_test/test_weight_utils.py
+5
-3
tests/samplers/test_no_bad_words.py
tests/samplers/test_no_bad_words.py
+3
-2
tests/tensorizer_loader/test_tensorizer.py
tests/tensorizer_loader/test_tensorizer.py
+4
-3
tests/tokenization/test_detokenize.py
tests/tokenization/test_detokenize.py
+2
-2
tests/tokenization/test_tokenizer_group.py
tests/tokenization/test_tokenizer_group.py
+4
-2
tests/v1/core/test_scheduler.py
tests/v1/core/test_scheduler.py
+286
-286
tests/v1/e2e/test_correctness_sliding_window.py
tests/v1/e2e/test_correctness_sliding_window.py
+6
-4
tests/v1/e2e/test_spec_decode.py
tests/v1/e2e/test_spec_decode.py
+5
-0
tests/v1/e2e/untest_cascade_attention.py
tests/v1/e2e/untest_cascade_attention.py
+5
-4
tests/v1/engine/test_llm_engine.py
tests/v1/engine/test_llm_engine.py
+4
-2
tests/v1/engine/test_output_processor.py
tests/v1/engine/test_output_processor.py
+3
-2
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+9
-7
tests/v1/sample/test_logprobs.py
tests/v1/sample/test_logprobs.py
+3
-1
tests/v1/sample/test_logprobs_e2e.py
tests/v1/sample/test_logprobs_e2e.py
+4
-3
tests/v1/sample/test_sampling_params_e2e.py
tests/v1/sample/test_sampling_params_e2e.py
+2
-1
tests/v1/spec_decode/untest_max_len.py
tests/v1/spec_decode/untest_max_len.py
+0
-0
No files found.
tests/runai_model_streamer_test/test_weight_utils.py
View file @
8e340b4f
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
glob
import
glob
import
tempfile
import
tempfile
...
@@ -9,6 +10,7 @@ import torch
...
@@ -9,6 +10,7 @@ import torch
from
vllm.model_executor.model_loader.weight_utils
import
(
from
vllm.model_executor.model_loader.weight_utils
import
(
download_weights_from_hf
,
runai_safetensors_weights_iterator
,
download_weights_from_hf
,
runai_safetensors_weights_iterator
,
safetensors_weights_iterator
)
safetensors_weights_iterator
)
from
..utils
import
models_path_prefix
def
test_runai_model_loader
():
def
test_runai_model_loader
():
...
@@ -23,10 +25,10 @@ def test_runai_model_loader():
...
@@ -23,10 +25,10 @@ def test_runai_model_loader():
runai_model_streamer_tensors
=
{}
runai_model_streamer_tensors
=
{}
hf_safetensors_tensors
=
{}
hf_safetensors_tensors
=
{}
for
name
,
tensor
in
runai_safetensors_weights_iterator
(
safetensors
):
for
name
,
tensor
in
runai_safetensors_weights_iterator
(
safetensors
,
False
):
runai_model_streamer_tensors
[
name
]
=
tensor
runai_model_streamer_tensors
[
name
]
=
tensor
for
name
,
tensor
in
safetensors_weights_iterator
(
safetensors
):
for
name
,
tensor
in
safetensors_weights_iterator
(
safetensors
,
False
):
hf_safetensors_tensors
[
name
]
=
tensor
hf_safetensors_tensors
[
name
]
=
tensor
assert
len
(
runai_model_streamer_tensors
)
==
len
(
hf_safetensors_tensors
)
assert
len
(
runai_model_streamer_tensors
)
==
len
(
hf_safetensors_tensors
)
...
...
tests/samplers/test_no_bad_words.py
View file @
8e340b4f
...
@@ -43,7 +43,8 @@ def _generate(
...
@@ -43,7 +43,8 @@ def _generate(
class
TestOneTokenBadWord
:
class
TestOneTokenBadWord
:
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"TheBloke/Llama-2-7B-fp16"
)
# MODEL = os.path.join(models_path_prefix, "TheBloke/Llama-2-7B-fp16")
MODEL
=
"TheBloke/Llama-2-7B-fp16"
PROMPT
=
"Hi! How are"
PROMPT
=
"Hi! How are"
TARGET_TOKEN
=
"you"
TARGET_TOKEN
=
"you"
...
...
tests/tensorizer_loader/test_tensorizer.py
View file @
8e340b4f
...
@@ -7,16 +7,15 @@ import pathlib
...
@@ -7,16 +7,15 @@ import pathlib
import
subprocess
import
subprocess
from
functools
import
partial
from
functools
import
partial
from
unittest.mock
import
MagicMock
,
patch
from
unittest.mock
import
MagicMock
,
patch
from
typing
import
List
,
Tuple
,
Optional
import
openai
import
openai
import
pytest
import
pytest
import
torch
import
torch
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
from
typing
import
List
,
Tuple
,
Optional
from
vllm
import
EngineArgs
,
LLMEngine
,
RequestOutput
,
SamplingParams
from
vllm
import
EngineArgs
,
LLMEngine
,
RequestOutput
,
SamplingParams
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.lora.request
import
LoRARequest
# yapf conflicts with isort for this docstring
# yapf conflicts with isort for this docstring
# yapf: disable
# yapf: disable
from
vllm.model_executor.model_loader.tensorizer
import
(
TensorizerConfig
,
from
vllm.model_executor.model_loader.tensorizer
import
(
TensorizerConfig
,
...
@@ -26,6 +25,8 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
...
@@ -26,6 +25,8 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
open_stream
,
open_stream
,
serialize_vllm_model
,
serialize_vllm_model
,
tensorize_vllm_model
)
tensorize_vllm_model
)
from
vllm.lora.request
import
LoRARequest
# yapf: enable
# yapf: enable
from
vllm.utils
import
PlaceholderModule
,
import_from_path
from
vllm.utils
import
PlaceholderModule
,
import_from_path
...
...
tests/tokenization/test_detokenize.py
View file @
8e340b4f
...
@@ -89,7 +89,7 @@ def tokenizer(tokenizer_name):
...
@@ -89,7 +89,7 @@ def tokenizer(tokenizer_name):
AutoTokenizer
.
from_pretrained
(
tokenizer_name
))
AutoTokenizer
.
from_pretrained
(
tokenizer_name
))
@
pytest
.
mark
.
parametrize
(
"tokenizer_name"
,
[
"mistralai/Pixtral-12B-2409"
])
@
pytest
.
mark
.
parametrize
(
"tokenizer_name"
,
[
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Pixtral-12B-2409"
)
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"truth"
,
"truth"
,
[
[
...
...
tests/tokenization/test_tokenizer_group.py
View file @
8e340b4f
...
@@ -8,11 +8,13 @@ from ..utils import models_path_prefix
...
@@ -8,11 +8,13 @@ from ..utils import models_path_prefix
from
vllm.transformers_utils.tokenizer_group
import
TokenizerGroup
from
vllm.transformers_utils.tokenizer_group
import
TokenizerGroup
# export HF_ENDPOINT=https://hf-mirror.com
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_tokenizer_group
():
async
def
test_tokenizer_group
():
reference_tokenizer
=
AutoTokenizer
.
from_pretrained
(
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
))
# reference_tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, "gpt2"))
reference_tokenizer
=
AutoTokenizer
.
from_pretrained
(
"gpt2"
)
tokenizer_group
=
TokenizerGroup
(
tokenizer_group
=
TokenizerGroup
(
tokenizer_id
=
os
.
path
.
join
(
models_path_prefix
,
"gpt2"
),
#
tokenizer_id=os.path.join(models_path_prefix, "gpt2"),
enable_lora
=
False
,
enable_lora
=
False
,
max_num_seqs
=
1
,
max_num_seqs
=
1
,
max_input_length
=
None
,
max_input_length
=
None
,
...
...
tests/v1/core/test_scheduler.py
View file @
8e340b4f
This diff is collapsed.
Click to expand it.
tests/v1/e2e/test_correctness_sliding_window.py
View file @
8e340b4f
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...core.block.e2e.test_correctness_sliding_window
import
(
check_answers
,
from
...core.block.e2e.test_correctness_sliding_window
import
(
check_answers
,
prep_prompts
)
prep_prompts
)
from
...utils
import
models_path_prefix
@
dataclass
@
dataclass
...
@@ -16,16 +18,16 @@ class TestConfig:
...
@@ -16,16 +18,16 @@ class TestConfig:
model_config
=
{
model_config
=
{
"bigcode/starcoder2-3b"
:
TestConfig
(
4096
,
(
800
,
1100
)),
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder2-3b"
)
:
TestConfig
(
4096
,
(
800
,
1100
)),
"google/gemma-2-2b-it"
:
TestConfig
(
4096
,
(
400
,
800
)),
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-2-2b-it"
)
:
TestConfig
(
4096
,
(
400
,
800
)),
}
}
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model"
,
"model"
,
[
[
"bigcode/starcoder2-3b"
,
# sliding window only
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder2-3b"
)
,
# sliding window only
"google/gemma-2-2b-it"
,
# sliding window + full attention
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-2-2b-it"
)
,
# sliding window + full attention
])
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
...
...
tests/v1/e2e/test_spec_decode.py
View file @
8e340b4f
...
@@ -4,9 +4,11 @@ from __future__ import annotations
...
@@ -4,9 +4,11 @@ from __future__ import annotations
import
random
import
random
from
typing
import
Any
from
typing
import
Any
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
@
pytest
.
fixture
@
pytest
.
fixture
...
@@ -49,14 +51,17 @@ def sampling_config():
...
@@ -49,14 +51,17 @@ def sampling_config():
@
pytest
.
fixture
@
pytest
.
fixture
def
model_name
():
def
model_name
():
# return os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct")
return
"meta-llama/Llama-3.1-8B-Instruct"
return
"meta-llama/Llama-3.1-8B-Instruct"
def
eagle_model_name
():
def
eagle_model_name
():
# return os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3.1-Instruct-8B")
return
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
return
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
def
eagle3_model_name
():
def
eagle3_model_name
():
# return os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B")
return
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
return
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
...
...
tests/v1/e2e/test_cascade_attention.py
→
tests/v1/e2e/
un
test_cascade_attention.py
View file @
8e340b4f
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
fork_new_process_for_each_test
from
...utils
import
fork_new_process_for_each_test
,
models_path_prefix
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
@
pytest
.
mark
.
parametrize
(
"attn_backend"
,
@
pytest
.
mark
.
parametrize
(
"attn_backend"
,
[
"FLASH_ATTN_VLLM_V1"
,
"FLASHINFER_VLLM_V1"
])
[
"FLASH_ATTN_VLLM_V1"
])
#
"FLASHINFER_VLLM_V1"
def
test_cascade_attention
(
example_system_message
,
monkeypatch
,
attn_backend
):
def
test_cascade_attention
(
example_system_message
,
monkeypatch
,
attn_backend
):
prompt
=
"
\n
<User>: Implement fibonacci sequence in Python.
\n
<Claude>:"
prompt
=
"
\n
<User>: Implement fibonacci sequence in Python.
\n
<Claude>:"
...
@@ -17,7 +18,7 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
...
@@ -17,7 +18,7 @@ def test_cascade_attention(example_system_message, monkeypatch, attn_backend):
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
attn_backend
)
m
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
attn_backend
)
llm
=
LLM
(
model
=
"Qwen/Qwen2-1.5B-Instruct"
)
llm
=
LLM
(
model
=
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-1.5B-Instruct"
)
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
100
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
100
)
# No cascade attention.
# No cascade attention.
...
...
tests/v1/engine/test_llm_engine.py
View file @
8e340b4f
...
@@ -3,11 +3,13 @@
...
@@ -3,11 +3,13 @@
import
random
import
random
from
typing
import
Optional
from
typing
import
Optional
import
os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
MODEL
=
"facebook/opt-125m"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"facebook/opt-125m"
)
DTYPE
=
"half"
DTYPE
=
"half"
...
...
tests/v1/engine/test_output_processor.py
View file @
8e340b4f
...
@@ -20,6 +20,7 @@ from vllm.v1.engine import EngineCoreRequest
...
@@ -20,6 +20,7 @@ from vllm.v1.engine import EngineCoreRequest
from
vllm.v1.engine.output_processor
import
(
OutputProcessor
,
from
vllm.v1.engine.output_processor
import
(
OutputProcessor
,
RequestOutputCollector
)
RequestOutputCollector
)
from
vllm.v1.metrics.stats
import
IterationStats
from
vllm.v1.metrics.stats
import
IterationStats
from
...utils
import
models_path_prefix
def
_ref_convert_id_to_token
(
def
_ref_convert_id_to_token
(
...
@@ -520,7 +521,7 @@ def test_stop_token(include_stop_str_in_output: bool,
...
@@ -520,7 +521,7 @@ def test_stop_token(include_stop_str_in_output: bool,
dummy_test_vectors: dummy engine core outputs and other data structures
dummy_test_vectors: dummy engine core outputs and other data structures
"""
"""
model_id
=
dummy_test_vectors
.
tokenizer
.
name_or_path
model_id
=
dummy_test_vectors
.
tokenizer
.
name_or_path
if
model_id
!=
'meta-llama/Llama-3.2-1B'
:
if
model_id
!=
os
.
path
.
join
(
models_path_prefix
,
'meta-llama/Llama-3.2-1B'
)
:
raise
AssertionError
(
"Test requires meta-llama/Llama-3.2-1B but "
raise
AssertionError
(
"Test requires meta-llama/Llama-3.2-1B but "
f
"
{
model_id
}
is in use."
)
f
"
{
model_id
}
is in use."
)
do_logprobs
=
num_sample_logprobs
is
not
None
do_logprobs
=
num_sample_logprobs
is
not
None
...
...
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
8e340b4f
...
@@ -7,6 +7,7 @@ import re
...
@@ -7,6 +7,7 @@ import re
from
enum
import
Enum
from
enum
import
Enum
from
typing
import
Any
from
typing
import
Any
import
os
import
jsonschema
import
jsonschema
import
pytest
import
pytest
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
...
@@ -15,22 +16,23 @@ from vllm.entrypoints.llm import LLM
...
@@ -15,22 +16,23 @@ from vllm.entrypoints.llm import LLM
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
....utils
import
models_path_prefix
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"guidance:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"guidance:disable-any-whitespace"
,
"auto"
),
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"xgrammar:disable-any-whitespace"
,
"mistral"
),
"mistral"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-1.5B-Instruct"
)
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
#FIXME: This test is flaky on CI thus disabled
#FIXME: This test is flaky on CI thus disabled
#("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
#("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
]
]
PARAMS_MODELS_TOKENIZER_MODE
=
[
PARAMS_MODELS_TOKENIZER_MODE
=
[
(
"mistralai/Ministral-8B-Instruct-2410"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Ministral-8B-Instruct-2410"
)
,
"auto"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"auto"
),
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-1.5B-Instruct"
)
,
"auto"
),
]
]
...
...
tests/v1/sample/test_logprobs.py
View file @
8e340b4f
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
import
itertools
import
itertools
from
collections.abc
import
Generator
from
collections.abc
import
Generator
import
os
import
pytest
import
pytest
import
torch
import
torch
...
@@ -13,8 +14,9 @@ from tests.v1.sample.utils import (
...
@@ -13,8 +14,9 @@ from tests.v1.sample.utils import (
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
...conftest
import
HfRunner
,
VllmRunner
from
...conftest
import
HfRunner
,
VllmRunner
from
...utils
import
models_path_prefix
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
)
DTYPE
=
"half"
DTYPE
=
"half"
NONE
=
BatchLogprobsComposition
.
NONE
NONE
=
BatchLogprobsComposition
.
NONE
...
...
tests/v1/sample/test_logprobs_e2e.py
View file @
8e340b4f
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
os
import
lm_eval
import
lm_eval
from
...utils
import
RemoteOpenAIServer
from
...utils
import
RemoteOpenAIServer
,
models_path_prefix
# arc-easy uses prompt_logprobs=1, logprobs=1
# arc-easy uses prompt_logprobs=1, logprobs=1
TASK
=
"arc_easy"
TASK
=
"arc_easy"
...
@@ -11,7 +12,7 @@ RTOL = 0.03
...
@@ -11,7 +12,7 @@ RTOL = 0.03
EXPECTED_VALUE
=
0.62
EXPECTED_VALUE
=
0.62
# FIXME(rob): enable prefix caching once supported.
# FIXME(rob): enable prefix caching once supported.
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
)
MODEL_ARGS
=
f
"pretrained=
{
MODEL
}
,enforce_eager=True,enable_prefix_caching=False"
# noqa: E501
MODEL_ARGS
=
f
"pretrained=
{
MODEL
}
,enforce_eager=True,enable_prefix_caching=False"
# noqa: E501
SERVER_ARGS
=
[
SERVER_ARGS
=
[
"--enforce_eager"
,
"--no_enable_prefix_caching"
,
"--disable-log-requests"
"--enforce_eager"
,
"--no_enable_prefix_caching"
,
"--disable-log-requests"
...
...
tests/v1/sample/test_sampling_params_e2e.py
View file @
8e340b4f
...
@@ -4,11 +4,12 @@ import os
...
@@ -4,11 +4,12 @@ import os
import
pytest
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
...utils
import
models_path_prefix
if
os
.
getenv
(
"VLLM_USE_V1"
,
"0"
)
!=
"1"
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
"0"
)
!=
"1"
:
pytest
.
skip
(
"Test package requires V1"
,
allow_module_level
=
True
)
pytest
.
skip
(
"Test package requires V1"
,
allow_module_level
=
True
)
MODEL
=
"meta-llama/Llama-3.2-1B"
MODEL
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B"
)
PROMPT
=
"Hello my name is Robert and I"
PROMPT
=
"Hello my name is Robert and I"
...
...
tests/v1/spec_decode/test_max_len.py
→
tests/v1/spec_decode/
un
test_max_len.py
View file @
8e340b4f
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment