Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a64a8443
Unverified
Commit
a64a8443
authored
Feb 20, 2025
by
Kevin H. Luu
Committed by
GitHub
Feb 20, 2025
Browse files
[2/n][ci] S3: Use full model path (#13564)
Signed-off-by: <>
parent
aa1e62d0
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
19 deletions
+26
-19
tests/basic_correctness/test_cumem.py
tests/basic_correctness/test_cumem.py
+1
-1
tests/conftest.py
tests/conftest.py
+1
-2
tests/engine/test_computed_prefix_blocks.py
tests/engine/test_computed_prefix_blocks.py
+2
-1
tests/engine/test_detokenization.py
tests/engine/test_detokenization.py
+2
-1
tests/engine/test_executor.py
tests/engine/test_executor.py
+8
-4
tests/engine/test_skip_tokenizer_init.py
tests/engine/test_skip_tokenizer_init.py
+2
-1
tests/test_config.py
tests/test_config.py
+7
-6
tests/test_regression.py
tests/test_regression.py
+3
-3
No files found.
tests/basic_correctness/test_cumem.py
View file @
a64a8443
...
...
@@ -121,7 +121,7 @@ def test_cumem_with_cudagraph():
"model, use_v1"
,
[
# sleep mode with safetensors
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/Llama-3.2-1B"
,
True
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
meta-llama/
Llama-3.2-1B"
,
True
),
# sleep mode with pytorch checkpoint
(
"facebook/opt-125m"
,
False
),
])
...
...
tests/conftest.py
View file @
a64a8443
...
...
@@ -746,8 +746,7 @@ class VllmRunner:
**
kwargs
,
)
->
None
:
if
model_name
in
MODELS_ON_S3
and
not
load_format
:
model_name
=
(
f
"s3://vllm-ci-model-weights/"
f
"
{
model_name
.
split
(
'/'
)[
-
1
]
}
"
)
model_name
=
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
{
model_name
}
"
)
load_format
=
LoadFormat
.
RUNAI_STREAMER
if
not
load_format
:
load_format
=
LoadFormat
.
AUTO
...
...
tests/engine/test_computed_prefix_blocks.py
View file @
a64a8443
...
...
@@ -10,7 +10,8 @@ from vllm.sampling_params import SamplingParams
from
..conftest
import
MODEL_WEIGHTS_S3_BUCKET
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"block_size"
,
[
16
])
def
test_computed_prefix_blocks
(
model
:
str
,
block_size
:
int
):
# This test checks if we are able to run the engine to completion
...
...
tests/engine/test_detokenization.py
View file @
a64a8443
...
...
@@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
from
..conftest
import
MODEL_WEIGHTS_S3_BUCKET
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_computed_prefix_blocks
(
model
:
str
):
# This test checks if the engine generates completions both with and
# without optional detokenization, that detokenization includes text
...
...
tests/engine/test_executor.py
View file @
a64a8443
...
...
@@ -38,7 +38,8 @@ class CustomUniExecutor(UniProcExecutor):
CustomUniExecutorAsync
=
CustomUniExecutor
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_custom_executor_type_checking
(
model
):
with
pytest
.
raises
(
ValueError
):
engine_args
=
EngineArgs
(
model
=
model
,
...
...
@@ -51,7 +52,8 @@ def test_custom_executor_type_checking(model):
AsyncLLMEngine
.
from_engine_args
(
engine_args
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_custom_executor
(
model
,
tmp_path
):
cwd
=
os
.
path
.
abspath
(
"."
)
os
.
chdir
(
tmp_path
)
...
...
@@ -75,7 +77,8 @@ def test_custom_executor(model, tmp_path):
os
.
chdir
(
cwd
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_custom_executor_async
(
model
,
tmp_path
):
cwd
=
os
.
path
.
abspath
(
"."
)
os
.
chdir
(
tmp_path
)
...
...
@@ -103,7 +106,8 @@ def test_custom_executor_async(model, tmp_path):
os
.
chdir
(
cwd
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_respect_ray
(
model
):
# even for TP=1 and PP=1,
# if users specify ray, we should use ray.
...
...
tests/engine/test_skip_tokenizer_init.py
View file @
a64a8443
...
...
@@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
from
..conftest
import
MODEL_WEIGHTS_S3_BUCKET
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
])
def
test_skip_tokenizer_initialization
(
model
:
str
):
# This test checks if the flag skip_tokenizer_init skips the initialization
# of tokenizer and detokenizer. The generated output is expected to contain
...
...
tests/test_config.py
View file @
a64a8443
...
...
@@ -14,13 +14,14 @@ from .conftest import MODEL_WEIGHTS_S3_BUCKET
@
pytest
.
mark
.
parametrize
(
(
"model_id"
,
"expected_runner_type"
,
"expected_task"
),
[
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
,
"generate"
,
"generate"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/e5-mistral-7b-instruct"
,
"pooling"
,
"embed"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/Qwen2.5-1.5B-apeach"
,
"pooling"
,
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
,
"generate"
,
"generate"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/intfloat/e5-mistral-7b-instruct"
,
"pooling"
,
"embed"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/jason9693/Qwen2.5-1.5B-apeach"
,
"pooling"
,
"classify"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/ms-marco-MiniLM-L-6-v2"
,
"pooling"
,
"score"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
cross-encoder/
ms-marco-MiniLM-L-6-v2"
,
"pooling"
,
"score"
),
(
"Qwen/Qwen2.5-Math-RM-72B"
,
"pooling"
,
"reward"
),
(
"openai/whisper-small"
,
"transcription"
,
"transcription"
),
],
...
...
tests/test_regression.py
View file @
a64a8443
...
...
@@ -21,7 +21,7 @@ def test_duplicated_ignored_sequence_group():
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
top_p
=
0.1
,
max_tokens
=
256
)
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
,
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
distilbert/
distilgpt2"
,
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
)
...
...
@@ -35,7 +35,7 @@ def test_max_tokens_none():
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
top_p
=
0.1
,
max_tokens
=
None
)
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
,
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
distilbert/
distilgpt2"
,
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
)
...
...
@@ -46,7 +46,7 @@ def test_max_tokens_none():
def
test_gc
():
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilgpt2"
,
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
distilbert/
distilgpt2"
,
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
enforce_eager
=
True
)
del
llm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment