Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cff6a1fe
Unverified
Commit
cff6a1fe
authored
Jun 30, 2024
by
Cyrus Leung
Committed by
GitHub
Jun 30, 2024
Browse files
[CI/Build] Reuse code for checking output consistency (#5988)
parent
bcc6a09b
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
125 additions
and
75 deletions
+125
-75
tests/basic_correctness/test_basic_correctness.py
tests/basic_correctness/test_basic_correctness.py
+8
-7
tests/basic_correctness/test_chunked_prefill.py
tests/basic_correctness/test_chunked_prefill.py
+8
-7
tests/basic_correctness/test_preemption.py
tests/basic_correctness/test_preemption.py
+9
-7
tests/distributed/test_basic_distributed_correctness.py
tests/distributed/test_basic_distributed_correctness.py
+8
-7
tests/distributed/test_chunked_prefill_distributed.py
tests/distributed/test_chunked_prefill_distributed.py
+8
-7
tests/models/test_big_models.py
tests/models/test_big_models.py
+8
-7
tests/models/test_llava.py
tests/models/test_llava.py
+10
-8
tests/models/test_llava_next.py
tests/models/test_llava_next.py
+10
-8
tests/models/test_models.py
tests/models/test_models.py
+8
-7
tests/models/test_phi3v.py
tests/models/test_phi3v.py
+10
-8
tests/models/utils.py
tests/models/utils.py
+38
-2
No files found.
tests/basic_correctness/test_basic_correctness.py
View file @
cff6a1fe
...
...
@@ -8,6 +8,8 @@ import pytest
from
vllm
import
LLM
from
..models.utils
import
check_outputs_equal
MODELS
=
[
"facebook/opt-125m"
,
"meta-llama/Llama-2-7b-hf"
,
...
...
@@ -46,10 +48,9 @@ def test_models(
gpu_memory_utilization
=
0.7
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
tests/basic_correctness/test_chunked_prefill.py
View file @
cff6a1fe
...
...
@@ -8,6 +8,8 @@ Run `pytest tests/models/test_chunked_prefill.py`.
"""
import
pytest
from
..models.utils
import
check_outputs_equal
MODELS
=
[
"facebook/opt-125m"
,
"meta-llama/Llama-2-7b-hf"
,
...
...
@@ -54,10 +56,9 @@ def test_models(
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
tests/basic_correctness/test_preemption.py
View file @
cff6a1fe
...
...
@@ -12,6 +12,8 @@ from vllm import SamplingParams
from
vllm.core.scheduler
import
(
ARTIFICIAL_PREEMPTION_MAX_CNT
,
ENABLE_ARTIFICIAL_PREEMPT
)
from
..models.utils
import
check_outputs_equal
MODELS
=
[
"facebook/opt-125m"
,
]
...
...
@@ -94,13 +96,13 @@ def test_preemption(
total_preemption
=
(
vllm_model
.
model
.
llm_engine
.
scheduler
.
num_cumulative_preemption
)
for
i
in
range
(
len
(
example_prompts
)):
hf_
output
_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_
output
_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
output
s_0_lst
=
hf_outputs
,
output
s_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
assert
(
"is preempted by PreemptionMode.RECOMPUTE mode because there "
"is not enough KV cache space."
in
caplog_vllm
.
text
)
# Ensure the count bucket of request-level histogram metrics matches
...
...
tests/distributed/test_basic_distributed_correctness.py
View file @
cff6a1fe
...
...
@@ -17,6 +17,8 @@ import os
import
pytest
import
torch
from
..models.utils
import
check_outputs_equal
MODELS
=
[
os
.
environ
[
"TEST_DIST_MODEL"
],
]
...
...
@@ -48,10 +50,9 @@ def test_models(
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
tests/distributed/test_chunked_prefill_distributed.py
View file @
cff6a1fe
...
...
@@ -16,6 +16,8 @@ import os
import
pytest
import
torch
from
..models.utils
import
check_outputs_equal
MODELS
=
[
os
.
environ
[
"TEST_DIST_MODEL"
],
]
...
...
@@ -59,10 +61,9 @@ def test_models(
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
tests/models/test_big_models.py
View file @
cff6a1fe
...
...
@@ -7,6 +7,8 @@ Run `pytest tests/models/test_big_models.py`.
import
pytest
import
torch
from
.utils
import
check_outputs_equal
MODELS
=
[
"meta-llama/Llama-2-7b-hf"
,
# "mistralai/Mistral-7B-v0.1", # Tested by test_mistral.py
...
...
@@ -40,13 +42,12 @@ def test_models(
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
...
...
tests/models/test_llava.py
View file @
cff6a1fe
...
...
@@ -6,6 +6,7 @@ from transformers import AutoTokenizer
from
vllm.config
import
VisionLanguageConfig
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_outputs_equal
pytestmark
=
pytest
.
mark
.
vlm
...
...
@@ -109,14 +110,15 @@ def run_test(
max_tokens
,
images
=
vllm_images
)
for
i
in
range
(
len
(
HF_IMAGE_PROMPTS
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_to_hf_output
(
vllm_outputs
[
i
],
vlm_config
,
model_id
)
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
hf_outputs
,
[
vllm_to_hf_output
(
vllm_output
,
vlm_config
,
model_id
)
for
vllm_output
in
vllm_outputs
],
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
@
pytest
.
mark
.
parametrize
(
"model_and_config"
,
model_and_vl_config
)
...
...
tests/models/test_llava_next.py
View file @
cff6a1fe
...
...
@@ -6,6 +6,7 @@ from transformers import AutoTokenizer
from
vllm.config
import
VisionLanguageConfig
from
..conftest
import
IMAGE_ASSETS
from
.utils
import
check_outputs_equal
pytestmark
=
pytest
.
mark
.
vlm
...
...
@@ -115,11 +116,12 @@ def test_models(hf_runner, vllm_runner, image_assets, model_and_config,
max_tokens
,
images
=
vllm_images
)
for
i
in
range
(
len
(
HF_IMAGE_PROMPTS
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_to_hf_output
(
vllm_outputs
[
i
],
vlm_config
,
model_id
)
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
hf_outputs
,
[
vllm_to_hf_output
(
vllm_output
,
vlm_config
,
model_id
)
for
vllm_output
in
vllm_outputs
],
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
tests/models/test_models.py
View file @
cff6a1fe
...
...
@@ -7,6 +7,8 @@ Run `pytest tests/models/test_models.py`.
"""
import
pytest
from
.utils
import
check_outputs_equal
MODELS
=
[
"facebook/opt-125m"
,
"gpt2"
,
...
...
@@ -41,13 +43,12 @@ def test_models(
with
vllm_runner
(
model
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
for
i
in
range
(
len
(
example_prompts
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
...
...
tests/models/test_phi3v.py
View file @
cff6a1fe
...
...
@@ -7,6 +7,7 @@ from vllm.config import VisionLanguageConfig
from
vllm.utils
import
is_cpu
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_outputs_equal
pytestmark
=
pytest
.
mark
.
vlm
...
...
@@ -124,14 +125,15 @@ def run_test(
max_tokens
,
images
=
vllm_images
)
for
i
in
range
(
len
(
HF_IMAGE_PROMPTS
)):
hf_output_ids
,
hf_output_str
=
hf_outputs
[
i
]
vllm_output_ids
,
vllm_output_str
=
vllm_to_hf_output
(
vllm_outputs
[
i
],
vlm_config
,
model_id
)
assert
hf_output_str
==
vllm_output_str
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
check_outputs_equal
(
hf_outputs
,
[
vllm_to_hf_output
(
vllm_output
,
vlm_config
,
model_id
)
for
vllm_output
in
vllm_outputs
],
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
# Since we use _attn_implementation="eager" for hf_runner, here is
...
...
tests/models/utils.py
View file @
cff6a1fe
def
check_logprobs_close
(
outputs_0_lst
,
outputs_1_lst
,
name_0
,
name_1
):
"""Compare the logprobs of two sequences generated by different models,
from
typing
import
Dict
,
List
,
Tuple
TokensText
=
Tuple
[
List
[
int
],
str
]
def
check_outputs_equal
(
outputs_0_lst
:
List
[
TokensText
],
outputs_1_lst
:
List
[
TokensText
],
name_0
:
str
,
name_1
:
str
):
"""
Compare the two sequences generated by different models,
which should be equal.
"""
assert
len
(
outputs_0_lst
)
==
len
(
outputs_1_lst
)
for
prompt_idx
,
(
outputs_0
,
outputs_1
)
in
enumerate
(
zip
(
outputs_0_lst
,
outputs_1_lst
)):
output_ids_0
,
output_str_0
=
outputs_0
output_ids_1
,
output_str_1
=
outputs_1
assert
output_str_0
==
output_str_1
,
(
f
"Test
{
prompt_idx
}
:"
f
"
\n
{
name_0
}
:
\t
{
output_str_0
!
r
}
"
f
"
\n
{
name_1
}
:
\t
{
output_str_1
!
r
}
"
)
assert
output_ids_0
==
output_ids_1
,
(
f
"Test
{
prompt_idx
}
:"
f
"
\n
{
name_0
}
:
\t
{
output_str_0
!
r
}
"
f
"
\n
{
name_1
}
:
\t
{
output_str_1
!
r
}
"
)
TokensTextLogprobs
=
Tuple
[
List
[
int
],
str
,
List
[
Dict
[
int
,
float
]]]
def
check_logprobs_close
(
outputs_0_lst
:
List
[
TokensTextLogprobs
],
outputs_1_lst
:
List
[
TokensTextLogprobs
],
name_0
:
str
,
name_1
:
str
):
"""
Compare the logprobs of two sequences generated by different models,
which should be similar but not necessarily equal.
"""
assert
len
(
outputs_0_lst
)
==
len
(
outputs_1_lst
)
# Loop through responses to each prompt.
for
prompt_idx
,
(
outputs_0
,
outputs_1
)
in
enumerate
(
zip
(
outputs_0_lst
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment