Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
199 additions
and
6 deletions
+199
-6
tests/models/multimodal/processing/test_phi3v.py
tests/models/multimodal/processing/test_phi3v.py
+5
-1
tests/models/multimodal/processing/test_qwen2_vl.py
tests/models/multimodal/processing/test_qwen2_vl.py
+6
-1
tests/models/registry.py
tests/models/registry.py
+11
-3
tests/models/test_initialization.py
tests/models/test_initialization.py
+2
-0
tests/models/test_oot_registration.py
tests/models/test_oot_registration.py
+5
-1
tests/models/test_registry.py
tests/models/test_registry.py
+2
-0
tests/models/test_transformers.py
tests/models/test_transformers.py
+76
-0
tests/models/utils.py
tests/models/utils.py
+2
-0
tests/mq_llm_engine/test_abort.py
tests/mq_llm_engine/test_abort.py
+1
-0
tests/mq_llm_engine/test_error_handling.py
tests/mq_llm_engine/test_error_handling.py
+1
-0
tests/mq_llm_engine/test_load.py
tests/mq_llm_engine/test_load.py
+1
-0
tests/mq_llm_engine/utils.py
tests/mq_llm_engine/utils.py
+2
-0
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+2
-0
tests/multi_step/test_correctness_llm.py
tests/multi_step/test_correctness_llm.py
+2
-0
tests/multimodal/test_inputs.py
tests/multimodal/test_inputs.py
+2
-0
tests/multimodal/test_processing.py
tests/multimodal/test_processing.py
+71
-0
tests/multimodal/test_utils.py
tests/multimodal/test_utils.py
+2
-0
tests/multimodal/utils.py
tests/multimodal/utils.py
+2
-0
tests/neuron/test_prefix_prefill.py
tests/neuron/test_prefix_prefill.py
+2
-0
tests/plugins/vllm_add_dummy_model/setup.py
tests/plugins/vllm_add_dummy_model/setup.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/models/multimodal/processing/test_phi3v.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Tests for phi3v's multimodal preprocessing kwargs."""
"""Tests for phi3v's multimodal preprocessing kwargs."""
import
pytest
import
pytest
...
@@ -37,7 +38,10 @@ def test_processor_override(
...
@@ -37,7 +38,10 @@ def test_processor_override(
trust_remote_code
=
True
,
trust_remote_code
=
True
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
tokenizer
=
cached_get_tokenizer
(
ctx
.
model_config
.
tokenizer
)
tokenizer
=
cached_get_tokenizer
(
ctx
.
model_config
.
tokenizer
,
trust_remote_code
=
ctx
.
model_config
.
trust_remote_code
,
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
,
ctx
.
model_config
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
...
...
tests/models/multimodal/processing/test_qwen2_vl.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
...
@@ -31,7 +33,10 @@ def test_processor_override(
...
@@ -31,7 +33,10 @@ def test_processor_override(
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
tokenizer
=
cached_get_tokenizer
(
ctx
.
model_config
.
tokenizer
)
tokenizer
=
cached_get_tokenizer
(
ctx
.
model_config
.
tokenizer
,
trust_remote_code
=
ctx
.
model_config
.
trust_remote_code
,
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
,
ctx
.
model_config
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
...
...
tests/models/registry.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
,
field
from
dataclasses
import
dataclass
,
field
from
typing
import
AbstractSet
,
Any
,
Literal
,
Mapping
,
Optional
from
typing
import
AbstractSet
,
Any
,
Literal
,
Mapping
,
Optional
...
@@ -222,8 +224,7 @@ _CROSS_ENCODER_EXAMPLE_MODELS = {
...
@@ -222,8 +224,7 @@ _CROSS_ENCODER_EXAMPLE_MODELS = {
_MULTIMODAL_EXAMPLE_MODELS
=
{
_MULTIMODAL_EXAMPLE_MODELS
=
{
# [Decoder-only]
# [Decoder-only]
"AriaForConditionalGeneration"
:
_HfExamplesInfo
(
"rhymes-ai/Aria"
,
"AriaForConditionalGeneration"
:
_HfExamplesInfo
(
"rhymes-ai/Aria"
),
min_transformers_version
=
"4.48"
),
"Blip2ForConditionalGeneration"
:
_HfExamplesInfo
(
"Salesforce/blip2-opt-2.7b"
),
# noqa: E501
"Blip2ForConditionalGeneration"
:
_HfExamplesInfo
(
"Salesforce/blip2-opt-2.7b"
),
# noqa: E501
"ChameleonForConditionalGeneration"
:
_HfExamplesInfo
(
"facebook/chameleon-7b"
),
# noqa: E501
"ChameleonForConditionalGeneration"
:
_HfExamplesInfo
(
"facebook/chameleon-7b"
),
# noqa: E501
"ChatGLMModel"
:
_HfExamplesInfo
(
"THUDM/glm-4v-9b"
,
"ChatGLMModel"
:
_HfExamplesInfo
(
"THUDM/glm-4v-9b"
,
...
@@ -263,6 +264,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -263,6 +264,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
,
# noqa: E501
min_transformers_version
=
"4.49"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_3"
,
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_3"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
),
# [Encoder-decoder]
# [Encoder-decoder]
...
@@ -276,7 +279,11 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
...
@@ -276,7 +279,11 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
"MedusaModel"
:
_HfExamplesInfo
(
"JackFram/llama-68m"
,
"MedusaModel"
:
_HfExamplesInfo
(
"JackFram/llama-68m"
,
speculative_model
=
"abhigoyal/vllm-medusa-llama-68m-random"
),
# noqa: E501
speculative_model
=
"abhigoyal/vllm-medusa-llama-68m-random"
),
# noqa: E501
"MLPSpeculatorPreTrainedModel"
:
_HfExamplesInfo
(
"JackFram/llama-160m"
,
"MLPSpeculatorPreTrainedModel"
:
_HfExamplesInfo
(
"JackFram/llama-160m"
,
speculative_model
=
"ibm-fms/llama-160m-accelerator"
),
# noqa: E501
speculative_model
=
"ibm-ai-platform/llama-160m-accelerator"
),
# noqa: E501
}
_FALLBACK_MODEL
=
{
"TransformersModel"
:
_HfExamplesInfo
(
"ArthurZ/Ilama-3.2-1B"
,
trust_remote_code
=
True
),
# noqa: E501
}
}
_EXAMPLE_MODELS
=
{
_EXAMPLE_MODELS
=
{
...
@@ -285,6 +292,7 @@ _EXAMPLE_MODELS = {
...
@@ -285,6 +292,7 @@ _EXAMPLE_MODELS = {
**
_CROSS_ENCODER_EXAMPLE_MODELS
,
**
_CROSS_ENCODER_EXAMPLE_MODELS
,
**
_MULTIMODAL_EXAMPLE_MODELS
,
**
_MULTIMODAL_EXAMPLE_MODELS
,
**
_SPECULATIVE_DECODING_EXAMPLE_MODELS
,
**
_SPECULATIVE_DECODING_EXAMPLE_MODELS
,
**
_FALLBACK_MODEL
,
}
}
...
...
tests/models/test_initialization.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
unittest.mock
import
patch
from
unittest.mock
import
patch
import
pytest
import
pytest
...
...
tests/models/test_oot_registration.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
pytest
import
pytest
...
@@ -13,7 +15,9 @@ def test_plugin(dummy_opt_path):
...
@@ -13,7 +15,9 @@ def test_plugin(dummy_opt_path):
os
.
environ
[
"VLLM_PLUGINS"
]
=
""
os
.
environ
[
"VLLM_PLUGINS"
]
=
""
with
pytest
.
raises
(
Exception
)
as
excinfo
:
with
pytest
.
raises
(
Exception
)
as
excinfo
:
LLM
(
model
=
dummy_opt_path
,
load_format
=
"dummy"
)
LLM
(
model
=
dummy_opt_path
,
load_format
=
"dummy"
)
assert
"are not supported for now"
in
str
(
excinfo
.
value
)
error_msg
=
"has no vLLM implementation and "
\
"the Transformers implementation is not compatible with vLLM."
assert
(
error_msg
in
str
(
excinfo
.
value
))
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
...
...
tests/models/test_registry.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
warnings
import
warnings
import
pytest
import
pytest
...
...
tests/models/test_transformers.py
0 → 100644
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Test the functionality of the Transformers backend.
Run `pytest tests/models/test_transformers.py`.
"""
from
contextlib
import
nullcontext
from
typing
import
Type
import
pytest
from
..conftest
import
HfRunner
,
VllmRunner
from
..utils
import
multi_gpu_test
from
.utils
import
check_logprobs_close
def
check_implementation
(
hf_runner
:
Type
[
HfRunner
],
vllm_runner
:
Type
[
VllmRunner
],
example_prompts
:
list
[
str
],
model
:
str
,
**
kwargs
,
):
max_tokens
=
32
num_logprobs
=
5
with
vllm_runner
(
model
,
**
kwargs
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
generate_greedy_logprobs
(
example_prompts
,
max_tokens
,
num_logprobs
)
with
hf_runner
(
model
)
as
hf_model
:
hf_outputs
=
hf_model
.
generate_greedy_logprobs_limit
(
example_prompts
,
max_tokens
,
num_logprobs
)
check_logprobs_close
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
@
pytest
.
mark
.
parametrize
(
"model,model_impl"
,
[
(
"meta-llama/Llama-3.2-1B-Instruct"
,
"transformers"
),
(
"openai-community/gpt2"
,
"transformers"
),
(
"ArthurZ/Ilama-3.2-1B"
,
"auto"
),
# CUSTOM CODE
(
"meta-llama/Llama-3.2-1B-Instruct"
,
"auto"
),
])
# trust_remote_code=True by default
def
test_models
(
hf_runner
,
vllm_runner
,
example_prompts
,
model
,
model_impl
)
->
None
:
maybe_raises
=
nullcontext
()
if
model
==
"openai-community/gpt2"
and
model_impl
==
"transformers"
:
# Model is not backend compatible
maybe_raises
=
pytest
.
raises
(
ValueError
,
match
=
"The Transformers implementation.*not compatible with vLLM"
)
with
maybe_raises
:
check_implementation
(
hf_runner
,
vllm_runner
,
example_prompts
,
model
,
model_impl
=
model_impl
)
@
multi_gpu_test
(
num_gpus
=
2
)
def
test_distributed
(
hf_runner
,
vllm_runner
,
example_prompts
,
):
kwargs
=
{
"model_impl"
:
"transformers"
,
"tensor_parallel_size"
:
2
}
check_implementation
(
hf_runner
,
vllm_runner
,
example_prompts
,
"meta-llama/Llama-3.2-1B-Instruct"
,
**
kwargs
)
tests/models/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
warnings
import
warnings
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
...
...
tests/mq_llm_engine/test_abort.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Test that aborting is handled properly."""
"""Test that aborting is handled properly."""
import
asyncio
import
asyncio
...
...
tests/mq_llm_engine/test_error_handling.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Test that various errors are handled properly."""
"""Test that various errors are handled properly."""
import
asyncio
import
asyncio
...
...
tests/mq_llm_engine/test_load.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Test that the MQLLMEngine is able to handle 10k concurrent requests."""
"""Test that the MQLLMEngine is able to handle 10k concurrent requests."""
import
asyncio
import
asyncio
...
...
tests/mq_llm_engine/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
asyncio
import
asyncio
import
multiprocessing
import
multiprocessing
from
typing
import
Callable
,
Tuple
,
Union
from
typing
import
Callable
,
Tuple
,
Union
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Test the AsyncLLMEngine with multi-step-decoding
# Test the AsyncLLMEngine with multi-step-decoding
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
...
...
tests/multi_step/test_correctness_llm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Test the LLMEngine with multi-step-decoding
# Test the LLMEngine with multi-step-decoding
import
copy
import
copy
...
...
tests/multimodal/test_inputs.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
torch
import
torch
from
vllm.multimodal.inputs
import
MultiModalKwargs
,
NestedTensors
from
vllm.multimodal.inputs
import
MultiModalKwargs
,
NestedTensors
...
...
tests/multimodal/test_processing.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
contextlib
import
nullcontext
from
contextlib
import
nullcontext
from
types
import
MethodType
from
typing
import
cast
from
typing
import
cast
from
unittest.mock
import
MagicMock
from
unittest.mock
import
MagicMock
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
from
transformers
import
ProcessorMixin
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
...
@@ -634,3 +638,70 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
...
@@ -634,3 +638,70 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
mm_data
=
mm_data
,
mm_data
=
mm_data
,
hf_processor_mm_kwargs
=
{},
hf_processor_mm_kwargs
=
{},
)
)
class
_ProcessorProxy
:
def
__init__
(
self
,
processor
:
ProcessorMixin
)
->
None
:
super
().
__init__
()
self
.
__processor
=
processor
def
__getattr__
(
self
,
key
:
str
):
return
getattr
(
self
.
__processor
,
key
)
def
__call__
(
self
,
text
=
None
,
images
=
None
,
videos
=
None
,
exists
=
None
,
return_tensors
=
None
,
):
return
dict
(
exists
=
exists
)
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"Qwen/Qwen2-VL-7B-Instruct"
])
# Dummy
# yapf: disable
@
pytest
.
mark
.
parametrize
(
(
"call_kwargs"
,
"expected_kwargs"
),
[
# Should ignore invalid kwargs
({
"does_not_exist"
:
100
},
{
"exists"
:
None
}),
({
"exists"
:
1
},
{
"exists"
:
1
}),
({
"does_not_exist"
:
100
,
"exists"
:
1
},
{
"exists"
:
1
}),
],
)
# yapf: enable
def
test_hf_processor_kwargs
(
model_id
,
call_kwargs
,
expected_kwargs
):
model_config
=
ModelConfig
(
model
=
model_id
,
task
=
"auto"
,
tokenizer
=
model_id
,
tokenizer_mode
=
"auto"
,
trust_remote_code
=
False
,
seed
=
0
,
dtype
=
"half"
,
revision
=
None
,
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
model_config
,
tokenizer
=
cached_get_tokenizer
(
model_config
.
tokenizer
),
)
orig_get_hf_processor
=
processor
.
info
.
get_hf_processor
def
get_hf_processor
(
self
,
**
kwargs
):
assert
kwargs
==
call_kwargs
return
_ProcessorProxy
(
orig_get_hf_processor
())
processor
.
info
.
get_hf_processor
=
MethodType
(
get_hf_processor
,
processor
.
info
)
out_kwargs
=
processor
.
_call_hf_processor
(
prompt
=
""
,
mm_data
=
{},
mm_kwargs
=
call_kwargs
,
)
assert
out_kwargs
==
expected_kwargs
tests/multimodal/test_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
base64
import
base64
import
mimetypes
import
mimetypes
import
os
import
os
...
...
tests/multimodal/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
from
PIL
import
Image
...
...
tests/neuron/test_prefix_prefill.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
random
import
random
from
typing
import
Optional
from
typing
import
Optional
...
...
tests/plugins/vllm_add_dummy_model/setup.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
setuptools
import
setup
from
setuptools
import
setup
setup
(
name
=
'vllm_add_dummy_model'
,
setup
(
name
=
'vllm_add_dummy_model'
,
...
...
Prev
1
…
15
16
17
18
19
20
21
22
23
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment