Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad58e9b3
Commit
ad58e9b3
authored
Sep 18, 2024
by
zhuwenwen
Browse files
Merge tag 'v0.6.1.post2' into v0.6.1.post2-dev
parents
408f663a
9ba0817f
Changes
118
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
111 additions
and
46 deletions
+111
-46
tests/models/decoder_only/language/test_fp8.py
tests/models/decoder_only/language/test_fp8.py
+1
-1
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+1
-1
tests/models/decoder_only/language/test_gptq_marlin.py
tests/models/decoder_only/language/test_gptq_marlin.py
+1
-1
tests/models/decoder_only/language/test_gptq_marlin_24.py
tests/models/decoder_only/language/test_gptq_marlin_24.py
+2
-1
tests/models/decoder_only/language/test_granite.py
tests/models/decoder_only/language/test_granite.py
+1
-1
tests/models/decoder_only/language/test_jamba.py
tests/models/decoder_only/language/test_jamba.py
+2
-1
tests/models/decoder_only/language/test_marlin.py
tests/models/decoder_only/language/test_marlin.py
+1
-1
tests/models/decoder_only/language/test_mistral.py
tests/models/decoder_only/language/test_mistral.py
+1
-1
tests/models/decoder_only/language/test_modelopt.py
tests/models/decoder_only/language/test_modelopt.py
+0
-0
tests/models/decoder_only/language/test_models.py
tests/models/decoder_only/language/test_models.py
+1
-1
tests/models/decoder_only/language/test_phimoe.py
tests/models/decoder_only/language/test_phimoe.py
+1
-1
tests/models/decoder_only/vision_language/__init__.py
tests/models/decoder_only/vision_language/__init__.py
+0
-0
tests/models/decoder_only/vision_language/test_blip2.py
tests/models/decoder_only/vision_language/test_blip2.py
+3
-5
tests/models/decoder_only/vision_language/test_broadcast.py
tests/models/decoder_only/vision_language/test_broadcast.py
+42
-0
tests/models/decoder_only/vision_language/test_chameleon.py
tests/models/decoder_only/vision_language/test_chameleon.py
+3
-5
tests/models/decoder_only/vision_language/test_fuyu.py
tests/models/decoder_only/vision_language/test_fuyu.py
+3
-5
tests/models/decoder_only/vision_language/test_intern_vit.py
tests/models/decoder_only/vision_language/test_intern_vit.py
+1
-3
tests/models/decoder_only/vision_language/test_internvl.py
tests/models/decoder_only/vision_language/test_internvl.py
+39
-6
tests/models/decoder_only/vision_language/test_llava.py
tests/models/decoder_only/vision_language/test_llava.py
+5
-7
tests/models/decoder_only/vision_language/test_llava_image_embeds.py
...s/decoder_only/vision_language/test_llava_image_embeds.py
+3
-5
No files found.
tests/models/test_fp8.py
→
tests/models/
decoder_only/language/
test_fp8.py
View file @
ad58e9b3
...
...
@@ -10,7 +10,7 @@ import pytest
from
tests.kernels.utils
import
override_backend_env_variable
from
tests.quantization.utils
import
is_quant_method_supported
from
..
models
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gguf.py
→
tests/models/
decoder_only/language/
test_gguf.py
View file @
ad58e9b3
...
...
@@ -11,7 +11,7 @@ from transformers import AutoTokenizer
from
tests.quantization.utils
import
is_quant_method_supported
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gptq_marlin.py
→
tests/models/
decoder_only/language/
test_gptq_marlin.py
View file @
ad58e9b3
...
...
@@ -15,7 +15,7 @@ import pytest
from
tests.quantization.utils
import
is_quant_method_supported
from
vllm.model_executor.layers.rotary_embedding
import
_ROPE_DICT
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gptq_marlin_24.py
→
tests/models/
decoder_only/language/
test_gptq_marlin_24.py
View file @
ad58e9b3
...
...
@@ -10,9 +10,10 @@ from dataclasses import dataclass
import
pytest
from
tests.models.utils
import
check_logprobs_close
from
tests.quantization.utils
import
is_quant_method_supported
from
...utils
import
check_logprobs_close
@
dataclass
class
ModelPair
:
...
...
tests/models/test_granite.py
→
tests/models/
decoder_only/language/
test_granite.py
View file @
ad58e9b3
...
...
@@ -6,7 +6,7 @@ import importlib.metadata
import
pytest
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
TRANSFORMERS_VERSION
=
tuple
(
map
(
int
,
...
...
tests/models/test_jamba.py
→
tests/models/
decoder_only/language/
test_jamba.py
View file @
ad58e9b3
import
pytest
from
tests.models.utils
import
check_outputs_equal
from
vllm.worker.model_runner
import
_get_graph_batch_size
from
...utils
import
check_outputs_equal
MODELS
=
[
"ai21labs/Jamba-tiny-random"
]
...
...
tests/models/test_marlin.py
→
tests/models/
decoder_only/language/
test_marlin.py
View file @
ad58e9b3
...
...
@@ -16,7 +16,7 @@ import pytest
from
tests.quantization.utils
import
is_quant_method_supported
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
@
dataclass
...
...
tests/models/test_mistral.py
→
tests/models/
decoder_only/language/
test_mistral.py
View file @
ad58e9b3
...
...
@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
"""
import
pytest
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
MODELS
=
[
"mistralai/Mistral-7B-Instruct-v0.1"
,
...
...
tests/models/test_modelopt.py
→
tests/models/
decoder_only/language/
test_modelopt.py
View file @
ad58e9b3
File moved
tests/models/test_models.py
→
tests/models/
decoder_only/language/
test_models.py
View file @
ad58e9b3
...
...
@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`.
"""
import
pytest
from
.utils
import
check_outputs_equal
from
..
.utils
import
check_outputs_equal
MODELS
=
[
"facebook/opt-125m"
,
...
...
tests/models/test_phimoe.py
→
tests/models/
decoder_only/language/
test_phimoe.py
View file @
ad58e9b3
...
...
@@ -7,7 +7,7 @@ import torch
from
vllm.utils
import
is_cpu
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
MODELS
=
[
"microsoft/Phi-3.5-MoE-instruct"
,
...
...
tests/models/decoder_only/vision_language/__init__.py
0 → 100644
View file @
ad58e9b3
tests/models/test_blip2.py
→
tests/models/
decoder_only/vision_language/
test_blip2.py
View file @
ad58e9b3
...
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
..conftest
import
IMAGE_ASSETS
from
.utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
IMAGE_ASSETS
from
...utils
import
check_logprobs_close
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalData objects and corresponding
MultiModalConfig as input.
...
...
tests/
distributed/test_multimodal
_broadcast.py
→
tests/
models/decoder_only/vision_language/test
_broadcast.py
View file @
ad58e9b3
"""Compare the outputs of HF and distributed vLLM when using greedy sampling.
Run:
```sh
pytest -s -v test_multimodal_broadcast.py
```
"""
import
pytest
from
vllm.utils
import
cuda_device_count_stateless
from
..utils
import
fork_new_process_for_each_test
from
....utils
import
multi_gpu_test
@
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
2
,
reason
=
"Need at least 2 GPUs to run the test."
)
@
pytest
.
mark
.
parametrize
(
"model, distributed_executor_backend"
,
[
(
"llava-hf/llava-1.5-7b-hf"
,
"ray"
),
(
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"ray"
),
(
"facebook/chameleon-7b"
,
"ray"
),
(
"llava-hf/llava-1.5-7b-hf"
,
"mp"
),
(
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"mp"
),
(
"facebook/chameleon-7b"
,
"mp"
),
@
multi_gpu_test
(
num_gpus
=
2
)
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"llava-hf/llava-1.5-7b-hf"
,
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"facebook/chameleon-7b"
,
])
@
fork_new_process_for_each_test
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
:
str
,
distributed_executor_backend
:
str
)
->
None
:
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
distributed_executor_backend
,
model
)
->
None
:
dtype
=
"half"
max_tokens
=
5
...
...
@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str,
tensor_parallel_size
=
2
if
model
.
startswith
(
"llava-hf/llava-1.5"
):
from
..models
.test_llava
import
models
,
run_test
from
.test_llava
import
models
,
run_test
elif
model
.
startswith
(
"llava-hf/llava-v1.6"
):
from
..models.test_llava_next
import
run_test
# type: ignore[no-redef]
from
..models.test_llava_next
import
models
from
.test_llava_next
import
models
,
run_test
# type: ignore[no-redef]
elif
model
.
startswith
(
"facebook/chameleon"
):
from
..models.test_chameleon
import
run_test
# type: ignore[no-redef]
from
..models.test_chameleon
import
models
from
.test_chameleon
import
models
,
run_test
# type: ignore[no-redef]
else
:
raise
NotImplementedError
(
f
"Unsupported model:
{
model
}
"
)
...
...
tests/models/test_chameleon.py
→
tests/models/
decoder_only/vision_language/
test_chameleon.py
View file @
ad58e9b3
...
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_outputs_equal
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
...utils
import
check_outputs_equal
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -36,7 +34,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
...
...
tests/models/test_fuyu.py
→
tests/models/
decoder_only/vision_language/
test_fuyu.py
View file @
ad58e9b3
...
...
@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.utils
import
is_cpu
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
...utils
import
check_logprobs_close
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -46,7 +44,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
...
...
tests/models/test_intern_vit.py
→
tests/models/
decoder_only/vision_language/
test_intern_vit.py
View file @
ad58e9b3
...
...
@@ -6,9 +6,7 @@ import torch.nn as nn
from
huggingface_hub
import
snapshot_download
from
transformers
import
AutoConfig
,
AutoModel
,
CLIPImageProcessor
from
..conftest
import
_ImageAssets
,
cleanup
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
_ImageAssets
,
cleanup
# we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner
...
...
tests/models/test_internvl.py
→
tests/models/
decoder_only/vision_language/
test_internvl.py
View file @
ad58e9b3
...
...
@@ -9,11 +9,9 @@ from transformers import AutoConfig
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.utils
import
is_cpu
from
..conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
from
.utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
from
...utils
import
check_logprobs_close
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -78,7 +76,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
...
...
@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"OpenGVLab/InternVL2-2B"
])
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[[
0.5
,
1.0
]])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
target_dtype
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
torch
.
inference_mode
()
def
test_different_num_patches
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
size_factors
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
.
resize
((
896
,
896
))
for
asset
in
image_assets
]
inputs_batching
=
[(
[
prompt
for
_
in
size_factors
],
[
rescale_image_size
(
image
,
factor
)
for
factor
in
size_factors
],
)
for
image
,
prompt
in
zip
(
images
,
HF_IMAGE_PROMPTS
)]
inputs_multi_images
=
[
([
HF_MULTIIMAGE_IMAGE_PROMPT
for
_
in
size_factors
],
[[
rescale_image_size
(
image
,
factor
)
for
image
in
images
]
for
factor
in
size_factors
])
]
for
inputs
in
[
inputs_batching
,
inputs_multi_images
]:
run_test
(
hf_runner
,
vllm_runner
,
inputs
,
model
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
,
mm_limit
=
2
,
tensor_parallel_size
=
1
,
)
@
pytest
.
mark
.
parametrize
(
"models"
,
[(
"OpenGVLab/InternVL2-2B"
,
"OpenGVLab/InternVL2-2B-AWQ"
)])
@
pytest
.
mark
.
parametrize
(
...
...
tests/models/test_llava.py
→
tests/models/
decoder_only/vision_language/
test_llava.py
View file @
ad58e9b3
...
...
@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
..conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
from
.utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
from
...utils
import
check_logprobs_close
_LIMIT_IMAGE_PER_PROMPT
=
4
...
...
@@ -143,7 +141,7 @@ def _run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
...
...
@@ -239,7 +237,7 @@ def _run_test(
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
size_factors
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
run_test
(
hf_runner
,
vllm_runner
,
...
...
tests/models/test_llava_image_embeds.py
→
tests/models/
decoder_only/vision_language/
test_llava_image_embeds.py
View file @
ad58e9b3
...
...
@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
from
vllm.sequence
import
SampleLogprobs
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
...utils
import
check_logprobs_close
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -62,7 +60,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment