Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cf069aa8
Unverified
Commit
cf069aa8
authored
Mar 03, 2025
by
Harry Mellor
Committed by
GitHub
Mar 02, 2025
Browse files
Update deprecated Python 3.8 typing (#13971)
parent
bf33700e
Changes
300
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
112 additions
and
115 deletions
+112
-115
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+10
-11
tests/models/decoder_only/vision_language/vlm_utils/types.py
tests/models/decoder_only/vision_language/vlm_utils/types.py
+18
-18
tests/models/embedding/language/test_gritlm.py
tests/models/embedding/language/test_gritlm.py
+5
-6
tests/models/embedding/utils.py
tests/models/embedding/utils.py
+3
-3
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
+6
-6
tests/models/embedding/vision_language/test_llava_next.py
tests/models/embedding/vision_language/test_llava_next.py
+3
-5
tests/models/embedding/vision_language/test_phi3v.py
tests/models/embedding/vision_language/test_phi3v.py
+3
-5
tests/models/encoder_decoder/language/test_bart.py
tests/models/encoder_decoder/language/test_bart.py
+5
-5
tests/models/encoder_decoder/vision_language/test_florence2.py
.../models/encoder_decoder/vision_language/test_florence2.py
+4
-4
tests/models/encoder_decoder/vision_language/test_mllama.py
tests/models/encoder_decoder/vision_language/test_mllama.py
+18
-18
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+2
-1
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+2
-1
tests/models/registry.py
tests/models/registry.py
+3
-2
tests/models/test_transformers.py
tests/models/test_transformers.py
+7
-8
tests/models/utils.py
tests/models/utils.py
+11
-10
tests/mq_llm_engine/utils.py
tests/mq_llm_engine/utils.py
+2
-2
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+2
-2
tests/multimodal/test_utils.py
tests/multimodal/test_utils.py
+4
-4
tests/neuron/test_logits_processor.py
tests/neuron/test_logits_processor.py
+1
-2
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
...dd_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
+3
-2
No files found.
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
cf069aa8
...
...
@@ -3,7 +3,6 @@
types / modalities.
"""
from
pathlib
import
PosixPath
from
typing
import
Type
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.
import
builders
,
core
...
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
####### Entrypoints for running different test types
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
...
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
...
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
...
...
@@ -85,8 +84,8 @@ def run_video_test(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
,
):
assert
test_case
.
size_wrapper
is
not
None
...
...
@@ -111,8 +110,8 @@ def run_video_test(
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
]):
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
]):
# Custom test cases can provide inputs directly, but they need to
# explicitly provided a CustomTestConfig, which wraps the inputs and
# the limit_mm_per_prompt
...
...
tests/models/decoder_only/vision_language/vlm_utils/types.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
"""Types for writing multimodal model tests."""
from
collections.abc
import
Iterable
from
enum
import
Enum
from
pathlib
import
PosixPath
from
typing
import
(
Any
,
Callable
,
Dict
,
Iterable
,
List
,
NamedTuple
,
Optional
,
Tuple
,
Type
,
Union
)
from
typing
import
Any
,
Callable
,
NamedTuple
,
Optional
,
Union
import
torch
from
PIL.Image
import
Image
...
...
@@ -35,7 +35,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
RunnerOutput
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
RunnerOutput
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
# yapf: enable
...
...
@@ -53,8 +53,8 @@ class SizeType(Enum):
class
CustomTestOptions
(
NamedTuple
):
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
L
ist
[
Union
[
L
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
D
ict
[
str
,
int
]
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
l
ist
[
Union
[
l
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
d
ict
[
str
,
int
]
# kwarg to pass multimodal data in as to vllm/hf runner instances.
runner_mm_key
:
str
=
"images"
...
...
@@ -63,13 +63,13 @@ class ImageSizeWrapper(NamedTuple):
type
:
SizeType
# A size factor is a wrapper of 0+ floats,
# while a fixed size contains an iterable of integer pairs
data
:
Union
[
Iterable
[
float
],
Iterable
[
T
uple
[
int
,
int
]]]
data
:
Union
[
Iterable
[
float
],
Iterable
[
t
uple
[
int
,
int
]]]
class
VLMTestInfo
(
NamedTuple
):
"""Holds the configuration for 1+ tests for one model architecture."""
models
:
L
ist
[
str
]
models
:
l
ist
[
str
]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
# Should be None only if this is a CUSTOM_INPUTS test
...
...
@@ -97,19 +97,19 @@ class VLMTestInfo(NamedTuple):
max_num_seqs
:
int
=
256
task
:
TaskOption
=
"auto"
tensor_parallel_size
:
int
=
1
vllm_runner_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
vllm_runner_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
stop_str
:
Optional
[
L
ist
[
str
]]
=
None
stop_str
:
Optional
[
l
ist
[
str
]]
=
None
# Exposed options for HF runner
hf_model_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
hf_model_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos
:
bool
=
False
auto_cls
:
T
ype
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
auto_cls
:
t
ype
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# the data type to input post processing, because almost all of the uses of
# postprocess_inputs are to fix the data types of BatchEncoding values.
...
...
@@ -128,12 +128,12 @@ class VLMTestInfo(NamedTuple):
# Default expandable params per test; these defaults can be overridden in
# instances of this object; the complete set of test cases for the model
# is all combinations of .models + all fields below
max_tokens
:
Union
[
int
,
T
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
T
uple
[
int
]]
=
5
max_tokens
:
Union
[
int
,
t
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
t
uple
[
int
]]
=
5
dtype
:
Union
[
str
,
Iterable
[
str
]]
=
"half"
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
# Only expanded in video tests
num_video_frames
:
Union
[
int
,
T
uple
[
int
]]
=
16
num_video_frames
:
Union
[
int
,
t
uple
[
int
]]
=
16
# Fixed image sizes / image size factors; most tests use image_size_factors
# The values provided for these two fields will be stacked and expanded
...
...
@@ -141,19 +141,19 @@ class VLMTestInfo(NamedTuple):
# once per tests (much like concatenating and wrapping in one parametrize
# call)
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_sizes
:
Optional
[
Iterable
[
Iterable
[
T
uple
[
int
,
int
]]]]
=
None
image_sizes
:
Optional
[
Iterable
[
Iterable
[
t
uple
[
int
,
int
]]]]
=
None
# Hack for updating a prompt to take into a local path; currently only used
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
prompt_path_encoder
:
Optional
[
Callable
[[
PosixPath
,
str
,
Union
[
L
ist
[
ImageAsset
],
_ImageAssets
]],
Callable
[[
PosixPath
,
str
,
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
]],
str
]]
=
None
# noqa: E501
# Allows configuring a test to run with custom inputs
custom_test_opts
:
Optional
[
L
ist
[
CustomTestOptions
]]
=
None
custom_test_opts
:
Optional
[
l
ist
[
CustomTestOptions
]]
=
None
marks
:
Optional
[
L
ist
[
MarkDecorator
]]
=
None
marks
:
Optional
[
l
ist
[
MarkDecorator
]]
=
None
def
get_non_parametrized_runner_kwargs
(
self
):
"""Returns a dictionary of expandable kwargs for items that are used
...
...
tests/models/embedding/language/test_gritlm.py
View file @
cf069aa8
...
...
@@ -3,7 +3,6 @@
import
importlib.util
import
math
from
array
import
array
from
typing
import
List
import
openai
import
pytest
...
...
@@ -81,14 +80,14 @@ async def client_generate(server_generate: RemoteOpenAIServer):
yield
async_client
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
L
ist
[
str
],
instruction
:
str
)
->
L
ist
[
float
]:
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
l
ist
[
str
],
instruction
:
str
)
->
l
ist
[
float
]:
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
L
ist
[
str
],
instruction
:
str
)
->
L
ist
[
float
]:
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
l
ist
[
str
],
instruction
:
str
)
->
l
ist
[
float
]:
outputs
=
await
client
.
embeddings
.
create
(
model
=
MODEL_NAME
,
input
=
[
instruction
+
q
for
q
in
queries
],
...
...
@@ -123,7 +122,7 @@ def get_test_data():
return
queries
,
q_instruction
,
documents
,
d_instruction
def
validate_embed_output
(
q_rep
:
L
ist
[
float
],
d_rep
:
L
ist
[
float
]):
def
validate_embed_output
(
q_rep
:
l
ist
[
float
],
d_rep
:
l
ist
[
float
]):
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
...
...
tests/models/embedding/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Sequence
from
collections.abc
import
Sequence
import
torch
import
torch.nn.functional
as
F
...
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
def
check_embeddings_close
(
*
,
embeddings_0_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_0_lst
:
Sequence
[
l
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
l
ist
[
float
]],
name_0
:
str
,
name_1
:
str
,
tol
:
float
=
1e-3
,
...
...
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
typing
import
Callable
,
Dict
,
List
,
Type
from
typing
import
Callable
import
pytest
import
torch
...
...
@@ -67,7 +67,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
def
apply_chat_template_and_add_eos
(
messages
:
L
ist
[
D
ict
],
messages
:
l
ist
[
d
ict
],
apply_chat_template_fn
:
Callable
,
):
prompt
=
apply_chat_template_fn
(
...
...
@@ -80,11 +80,11 @@ def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
embed_texts
:
L
ist
[
bool
],
embed_texts
:
l
ist
[
bool
],
model
:
str
,
*
,
dtype
:
str
,
...
...
tests/models/embedding/vision_language/test_llava_next.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
pytest
import
torch.nn.functional
as
F
from
transformers
import
AutoModelForVision2Seq
...
...
@@ -35,9 +33,9 @@ MODELS = ["royokong/e5-v"]
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
model
:
str
,
*
,
...
...
tests/models/embedding/vision_language/test_phi3v.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
pytest
import
torch.nn.functional
as
F
...
...
@@ -29,9 +27,9 @@ MODELS = ["TIGER-Lab/VLM2Vec-Full"]
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
model
:
str
,
*
,
...
...
tests/models/encoder_decoder/language/test_bart.py
View file @
cf069aa8
...
...
@@ -3,7 +3,7 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
pytest
from
transformers
import
AutoModelForSeq2SeqLM
...
...
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
decoder_prompt_type
:
DecoderPromptType
,
):
"""Sanitize vllm output to be comparable with hf output."""
...
...
@@ -31,9 +31,9 @@ def vllm_to_hf_output(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts
:
l
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
decoder_prompt_type
:
DecoderPromptType
,
model
:
str
,
*
,
...
...
tests/models/encoder_decoder/vision_language/test_florence2.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
,
Type
from
typing
import
Optional
import
pytest
from
PIL
import
Image
...
...
@@ -51,8 +51,8 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
list
[
list
[
ExplicitEncoderDecoderPrompt
]],
model
:
str
,
*
,
...
...
@@ -114,7 +114,7 @@ def run_test(
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
def
test_models
(
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
...
...
tests/models/encoder_decoder/vision_language/test_mllama.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
,
overload
from
typing
import
Optional
,
overload
import
pytest
import
torch
...
...
@@ -64,7 +64,7 @@ prompt_data = {
}
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
...
...
@@ -91,9 +91,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def
_get_inputs
(
image_assets
:
_ImageAssets
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
)
->
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]]:
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
)
->
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]]:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
if
size_factors
is
not
None
:
...
...
@@ -123,12 +123,12 @@ def _get_inputs(
@
overload
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
size_factors
:
L
ist
[
float
],
size_factors
:
l
ist
[
float
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -140,12 +140,12 @@ def run_test(
@
overload
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
sizes
:
L
ist
[
T
uple
[
int
,
int
]],
sizes
:
l
ist
[
t
uple
[
int
,
int
]],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -156,13 +156,13 @@ def run_test(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
...
...
@@ -183,9 +183,9 @@ def run_test(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]],
model
:
str
,
*
,
dtype
:
str
,
...
...
tests/models/multimodal/processing/test_h2ovl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
"""Tests for H2OVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
from
PIL
import
Image
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
"""Tests for InternVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
from
PIL
import
Image
...
...
tests/models/registry.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
collections.abc
import
Mapping
,
Set
from
dataclasses
import
dataclass
,
field
from
typing
import
AbstractSet
,
Any
,
Literal
,
Mapping
,
Optional
from
typing
import
Any
,
Literal
,
Optional
import
pytest
from
packaging.version
import
Version
...
...
@@ -324,7 +325,7 @@ class HfExampleModels:
self
.
hf_models
=
hf_models
def
get_supported_archs
(
self
)
->
Abstract
Set
[
str
]:
def
get_supported_archs
(
self
)
->
Set
[
str
]:
return
self
.
hf_models
.
keys
()
def
get_hf_info
(
self
,
model_arch
:
str
)
->
_HfExamplesInfo
:
...
...
tests/models/test_transformers.py
View file @
cf069aa8
...
...
@@ -4,7 +4,6 @@
Run `pytest tests/models/test_transformers.py`.
"""
from
contextlib
import
nullcontext
from
typing
import
Type
import
pytest
...
...
@@ -14,8 +13,8 @@ from .utils import check_logprobs_close
def
check_implementation
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
model
:
str
,
**
kwargs
,
...
...
@@ -47,8 +46,8 @@ def check_implementation(
(
"ArthurZ/Ilama-3.2-1B"
,
"auto"
),
# CUSTOM CODE
])
# trust_remote_code=True by default
def
test_models
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
model
:
str
,
model_impl
:
str
,
...
...
@@ -71,8 +70,8 @@ def test_models(
@
multi_gpu_test
(
num_gpus
=
2
)
def
test_distributed
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
,
):
kwargs
=
{
"model_impl"
:
"transformers"
,
"tensor_parallel_size"
:
2
}
...
...
@@ -92,7 +91,7 @@ def test_distributed(
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_quantization
(
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
model
:
str
,
quantization_kwargs
:
dict
[
str
,
str
],
...
...
tests/models/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
import
warnings
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
collections.abc
import
Sequence
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -9,7 +10,7 @@ from vllm.config import ModelConfig, TaskOption
from
vllm.inputs
import
InputContext
from
vllm.sequence
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
TokensText
=
T
uple
[
L
ist
[
int
],
str
]
TokensText
=
t
uple
[
l
ist
[
int
],
str
]
def
check_outputs_equal
(
...
...
@@ -46,7 +47,7 @@ def check_outputs_equal(
# * List of top sample logprobs for each sampled token
#
# Assumes prompt logprobs were not requested.
TokensTextLogprobs
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
int
,
TokensTextLogprobs
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
int
,
float
]],
SampleLogprobs
]]]
...
...
@@ -57,8 +58,8 @@ TokensTextLogprobs = Tuple[List[int], str, Optional[Union[List[Dict[int,
# * Optional list of top sample logprobs for each sampled token
#
# Assumes prompt logprobs were not requested.
TextTextLogprobs
=
T
uple
[
L
ist
[
str
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
str
,
float
]],
L
ist
[
D
ict
[
str
,
TextTextLogprobs
=
t
uple
[
l
ist
[
str
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
str
,
float
]],
l
ist
[
d
ict
[
str
,
Logprob
]]]]]
# Representation of generated sequence as a tuple of
...
...
@@ -68,9 +69,9 @@ TextTextLogprobs = Tuple[List[str], str, Optional[Union[List[Dict[str, float]],
# * Optional list of top prompt logprobs for each prompt token
#
# Allows prompt logprobs to be requested.
TokensTextLogprobsPromptLogprobs
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
int
,
float
]],
SampleLogprobs
]],
Optional
[
Union
[
L
ist
[
Optional
[
D
ict
[
int
,
float
]]],
PromptLogprobs
]]]
TokensTextLogprobsPromptLogprobs
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
int
,
float
]],
SampleLogprobs
]],
Optional
[
Union
[
l
ist
[
Optional
[
d
ict
[
int
,
float
]]],
PromptLogprobs
]]]
def
check_logprobs_close
(
...
...
@@ -254,8 +255,8 @@ def build_model_context(
tokenizer_name
:
Optional
[
str
]
=
None
,
trust_remote_code
:
bool
=
False
,
dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
None
,
mm_processor_kwargs
:
Optional
[
D
ict
]
=
None
,
limit_mm_per_prompt
:
Optional
[
D
ict
]
=
None
,
mm_processor_kwargs
:
Optional
[
d
ict
]
=
None
,
limit_mm_per_prompt
:
Optional
[
d
ict
]
=
None
,
disable_mm_preprocessor_cache
:
bool
=
True
,
):
"""Creates an InputContext for a given model.
...
...
tests/mq_llm_engine/utils.py
View file @
cf069aa8
...
...
@@ -2,7 +2,7 @@
import
asyncio
import
multiprocessing
from
typing
import
Callable
,
Tuple
,
Union
from
typing
import
Callable
,
Union
from
vllm
import
SamplingParams
from
vllm.engine.arg_utils
import
AsyncEngineArgs
...
...
@@ -16,7 +16,7 @@ async def generate(
client
:
MQLLMEngineClient
,
request_id
:
str
,
num_tokens
:
int
,
return_output
:
bool
=
False
)
->
Union
[
RequestOutput
,
T
uple
[
int
,
str
]]:
return_output
:
bool
=
False
)
->
Union
[
RequestOutput
,
t
uple
[
int
,
str
]]:
final_output
=
None
count
=
0
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# Test the AsyncLLMEngine with multi-step-decoding
from
typing
import
List
,
Optional
from
typing
import
Optional
import
pytest
...
...
@@ -17,7 +17,7 @@ MODELS = [
NUM_SCHEDULER_STEPS
=
[
8
]
# Multi-step decoding steps
NUM_PROMPTS
=
[
10
]
DEFAULT_SERVER_ARGS
:
L
ist
[
str
]
=
[
DEFAULT_SERVER_ARGS
:
l
ist
[
str
]
=
[
"--distributed-executor-backend"
,
"ray"
,
"--gpu-memory-utilization"
,
...
...
tests/multimodal/test_utils.py
View file @
cf069aa8
...
...
@@ -4,7 +4,7 @@ import base64
import
mimetypes
import
os
from
tempfile
import
NamedTemporaryFile
,
TemporaryDirectory
from
typing
import
TYPE_CHECKING
,
Dict
,
NamedTuple
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
NamedTuple
,
Optional
import
numpy
as
np
import
pytest
...
...
@@ -30,7 +30,7 @@ TEST_IMAGE_URLS = [
@
pytest
.
fixture
(
scope
=
"module"
)
def
url_images
()
->
D
ict
[
str
,
Image
.
Image
]:
def
url_images
()
->
d
ict
[
str
,
Image
.
Image
]:
connector
=
MediaConnector
()
return
{
...
...
@@ -39,7 +39,7 @@ def url_images() -> Dict[str, Image.Image]:
}
def
get_supported_suffixes
()
->
T
uple
[
str
,
...]:
def
get_supported_suffixes
()
->
t
uple
[
str
,
...]:
# We should at least test the file types mentioned in GPT-4 with Vision
OPENAI_SUPPORTED_SUFFIXES
=
(
'.png'
,
'.jpeg'
,
'.jpg'
,
'.webp'
,
'.gif'
)
...
...
@@ -66,7 +66,7 @@ async def test_fetch_image_http(image_url: str):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"suffix"
,
get_supported_suffixes
())
async
def
test_fetch_image_base64
(
url_images
:
D
ict
[
str
,
Image
.
Image
],
async
def
test_fetch_image_base64
(
url_images
:
d
ict
[
str
,
Image
.
Image
],
image_url
:
str
,
suffix
:
str
):
connector
=
MediaConnector
()
url_image
=
url_images
[
image_url
]
...
...
tests/neuron/test_logits_processor.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
import
random
from
typing
import
Tuple
from
unittest.mock
import
patch
import
pytest
...
...
@@ -33,7 +32,7 @@ class MockLogitsProcessor(LogitsProcessor):
def
_prepare_test
(
batch_size
:
int
)
->
T
uple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
]:
)
->
t
uple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
]:
vocab_size
=
32000
input_tensor
=
torch
.
rand
((
batch_size
,
1024
),
dtype
=
torch
.
float16
)
fake_logits
=
torch
.
full
((
batch_size
,
vocab_size
),
...
...
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Iterable
,
Optional
,
Tuple
,
Union
from
collections.abc
import
Iterable
from
typing
import
Optional
,
Union
import
torch
import
torch.nn
as
nn
...
...
@@ -59,7 +60,7 @@ class MyGemma2Embedding(nn.Module):
)
->
Optional
[
PoolerOutput
]:
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
T
uple
[
str
,
torch
.
Tensor
]]):
def
load_weights
(
self
,
weights
:
Iterable
[
t
uple
[
str
,
torch
.
Tensor
]]):
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
((
name
,
data
)
for
name
,
data
in
weights
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment