Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
696b01af
Unverified
Commit
696b01af
authored
Oct 21, 2024
by
Cyrus Leung
Committed by
GitHub
Oct 20, 2024
Browse files
[CI/Build] Split up decoder-only LM tests (#9488)
Co-authored-by:
Nick Hill
<
nickhill@us.ibm.com
>
parent
855e0e6f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
57 deletions
+18
-57
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+11
-2
tests/models/decoder_only/language/test_big_models.py
tests/models/decoder_only/language/test_big_models.py
+7
-3
tests/models/decoder_only/language/test_danube3_4b.py
tests/models/decoder_only/language/test_danube3_4b.py
+0
-52
No files found.
.buildkite/test-pipeline.yaml
View file @
696b01af
...
@@ -310,13 +310,22 @@ steps:
...
@@ -310,13 +310,22 @@ steps:
-
pytest -v -s models/test_oot_registration.py
# it needs a clean process
-
pytest -v -s models/test_oot_registration.py
# it needs a clean process
-
pytest -v -s models/*.py --ignore=models/test_oot_registration.py
-
pytest -v -s models/*.py --ignore=models/test_oot_registration.py
-
label
:
Decoder-only Language Models Test
# 1h36
min
-
label
:
Decoder-only Language Models Test
(Standard)
# 35
min
#mirror_hardwares: [amd]
#mirror_hardwares: [amd]
source_file_dependencies
:
source_file_dependencies
:
-
vllm/
-
vllm/
-
tests/models/decoder_only/language
-
tests/models/decoder_only/language
commands
:
commands
:
-
pytest -v -s models/decoder_only/language
-
pytest -v -s models/decoder_only/language/test_models.py
-
pytest -v -s models/decoder_only/language/test_big_models.py
-
label
:
Decoder-only Language Models Test (Extended)
# 1h20min
nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/decoder_only/language
commands
:
-
pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py
-
label
:
Decoder-only Multi-Modal Models Test
# 1h31min
-
label
:
Decoder-only Multi-Modal Models Test
# 1h31min
#mirror_hardwares: [amd]
#mirror_hardwares: [amd]
...
...
tests/models/decoder_only/language/test_big_models.py
View file @
696b01af
...
@@ -21,10 +21,14 @@ MODELS = [
...
@@ -21,10 +21,14 @@ MODELS = [
]
]
if
not
current_platform
.
is_cpu
():
if
not
current_platform
.
is_cpu
():
# MiniCPM requires fused_moe which is not supported by CPU
MODELS
+=
[
MODELS
.
append
(
"openbmb/MiniCPM3-4B"
)
# fused_moe which not supported on CPU
"openbmb/MiniCPM3-4B"
,
# Head size isn't supported on CPU
"h2oai/h2o-danube3-4b-base"
,
]
#TODO: remove this after CPU float16 support ready
#
TODO: remove this after CPU float16 support ready
target_dtype
=
"float"
if
current_platform
.
is_cpu
()
else
"half"
target_dtype
=
"float"
if
current_platform
.
is_cpu
()
else
"half"
...
...
tests/models/decoder_only/language/test_danube3_4b.py
deleted
100644 → 0
View file @
855e0e6f
"""Compare the outputs of HF and vLLM when using greedy sampling.
This tests danube3 separately because its head size isn't supported on CPU yet.
Run `pytest tests/models/test_danube3_4b.py`.
"""
import
pytest
from
...utils
import
check_outputs_equal
MODELS
=
[
"h2oai/h2o-danube3-4b-base"
]
target_dtype
=
"half"
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
target_dtype
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
def
test_models
(
hf_runner
,
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
)
->
None
:
with
hf_runner
(
model
,
dtype
=
dtype
)
as
hf_model
:
hf_outputs
=
hf_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
target_dtype
])
def
test_model_print
(
vllm_runner
,
model
:
str
,
dtype
:
str
,
)
->
None
:
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
print
(
vllm_model
.
model
.
llm_engine
.
model_executor
.
driver_worker
.
model_runner
.
model
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment