Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cf069aa8
Unverified
Commit
cf069aa8
authored
Mar 03, 2025
by
Harry Mellor
Committed by
GitHub
Mar 02, 2025
Browse files
Update deprecated Python 3.8 typing (#13971)
parent
bf33700e
Changes
300
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
112 additions
and
115 deletions
+112
-115
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+10
-11
tests/models/decoder_only/vision_language/vlm_utils/types.py
tests/models/decoder_only/vision_language/vlm_utils/types.py
+18
-18
tests/models/embedding/language/test_gritlm.py
tests/models/embedding/language/test_gritlm.py
+5
-6
tests/models/embedding/utils.py
tests/models/embedding/utils.py
+3
-3
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
+6
-6
tests/models/embedding/vision_language/test_llava_next.py
tests/models/embedding/vision_language/test_llava_next.py
+3
-5
tests/models/embedding/vision_language/test_phi3v.py
tests/models/embedding/vision_language/test_phi3v.py
+3
-5
tests/models/encoder_decoder/language/test_bart.py
tests/models/encoder_decoder/language/test_bart.py
+5
-5
tests/models/encoder_decoder/vision_language/test_florence2.py
.../models/encoder_decoder/vision_language/test_florence2.py
+4
-4
tests/models/encoder_decoder/vision_language/test_mllama.py
tests/models/encoder_decoder/vision_language/test_mllama.py
+18
-18
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+2
-1
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+2
-1
tests/models/registry.py
tests/models/registry.py
+3
-2
tests/models/test_transformers.py
tests/models/test_transformers.py
+7
-8
tests/models/utils.py
tests/models/utils.py
+11
-10
tests/mq_llm_engine/utils.py
tests/mq_llm_engine/utils.py
+2
-2
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+2
-2
tests/multimodal/test_utils.py
tests/multimodal/test_utils.py
+4
-4
tests/neuron/test_logits_processor.py
tests/neuron/test_logits_processor.py
+1
-2
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
...dd_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
+3
-2
No files found.
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
cf069aa8
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
types / modalities.
types / modalities.
"""
"""
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Type
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.
import
builders
,
core
from
.
import
builders
,
core
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
...
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
####### Entrypoints for running different test types
####### Entrypoints for running different test types
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
def
run_single_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
def
run_multi_image_test
(
*
,
tmp_path
:
PosixPath
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
def
run_embedding_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
...
@@ -85,8 +84,8 @@ def run_video_test(
...
@@ -85,8 +84,8 @@ def run_video_test(
*
,
*
,
model_test_info
:
VLMTestInfo
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
,
video_assets
:
_VideoAssets
,
):
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
...
@@ -111,8 +110,8 @@ def run_video_test(
...
@@ -111,8 +110,8 @@ def run_video_test(
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
def
run_custom_inputs_test
(
*
,
model_test_info
:
VLMTestInfo
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
]):
vllm_runner
:
t
ype
[
VllmRunner
]):
# Custom test cases can provide inputs directly, but they need to
# Custom test cases can provide inputs directly, but they need to
# explicitly provided a CustomTestConfig, which wraps the inputs and
# explicitly provided a CustomTestConfig, which wraps the inputs and
# the limit_mm_per_prompt
# the limit_mm_per_prompt
...
...
tests/models/decoder_only/vision_language/vlm_utils/types.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Types for writing multimodal model tests."""
"""Types for writing multimodal model tests."""
from
collections.abc
import
Iterable
from
enum
import
Enum
from
enum
import
Enum
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
(
Any
,
Callable
,
Dict
,
Iterable
,
List
,
NamedTuple
,
Optional
,
from
typing
import
Any
,
Callable
,
NamedTuple
,
Optional
,
Union
Tuple
,
Type
,
Union
)
import
torch
import
torch
from
PIL.Image
import
Image
from
PIL.Image
import
Image
...
@@ -35,7 +35,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
...
@@ -35,7 +35,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
IMAGE_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.25
,
0.5
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
EMBEDDING_SIZE_FACTORS
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
)]
RunnerOutput
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
RunnerOutput
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]
# yapf: enable
# yapf: enable
...
@@ -53,8 +53,8 @@ class SizeType(Enum):
...
@@ -53,8 +53,8 @@ class SizeType(Enum):
class
CustomTestOptions
(
NamedTuple
):
class
CustomTestOptions
(
NamedTuple
):
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
L
ist
[
Union
[
L
ist
[
Image
],
Image
]]]]
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
l
ist
[
Union
[
l
ist
[
Image
],
Image
]]]]
limit_mm_per_prompt
:
D
ict
[
str
,
int
]
limit_mm_per_prompt
:
d
ict
[
str
,
int
]
# kwarg to pass multimodal data in as to vllm/hf runner instances.
# kwarg to pass multimodal data in as to vllm/hf runner instances.
runner_mm_key
:
str
=
"images"
runner_mm_key
:
str
=
"images"
...
@@ -63,13 +63,13 @@ class ImageSizeWrapper(NamedTuple):
...
@@ -63,13 +63,13 @@ class ImageSizeWrapper(NamedTuple):
type
:
SizeType
type
:
SizeType
# A size factor is a wrapper of 0+ floats,
# A size factor is a wrapper of 0+ floats,
# while a fixed size contains an iterable of integer pairs
# while a fixed size contains an iterable of integer pairs
data
:
Union
[
Iterable
[
float
],
Iterable
[
T
uple
[
int
,
int
]]]
data
:
Union
[
Iterable
[
float
],
Iterable
[
t
uple
[
int
,
int
]]]
class
VLMTestInfo
(
NamedTuple
):
class
VLMTestInfo
(
NamedTuple
):
"""Holds the configuration for 1+ tests for one model architecture."""
"""Holds the configuration for 1+ tests for one model architecture."""
models
:
L
ist
[
str
]
models
:
l
ist
[
str
]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
test_type
:
Union
[
VLMTestType
,
Iterable
[
VLMTestType
]]
# Should be None only if this is a CUSTOM_INPUTS test
# Should be None only if this is a CUSTOM_INPUTS test
...
@@ -97,19 +97,19 @@ class VLMTestInfo(NamedTuple):
...
@@ -97,19 +97,19 @@ class VLMTestInfo(NamedTuple):
max_num_seqs
:
int
=
256
max_num_seqs
:
int
=
256
task
:
TaskOption
=
"auto"
task
:
TaskOption
=
"auto"
tensor_parallel_size
:
int
=
1
tensor_parallel_size
:
int
=
1
vllm_runner_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
vllm_runner_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Optional callable which gets a list of token IDs from the model tokenizer
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]]
=
None
# Optional list of strings to stop generation, useful when stop tokens are
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
# not special tokens in the tokenizer
stop_str
:
Optional
[
L
ist
[
str
]]
=
None
stop_str
:
Optional
[
l
ist
[
str
]]
=
None
# Exposed options for HF runner
# Exposed options for HF runner
hf_model_kwargs
:
Optional
[
D
ict
[
str
,
Any
]]
=
None
hf_model_kwargs
:
Optional
[
d
ict
[
str
,
Any
]]
=
None
# Indicates we should explicitly pass the EOS from the tokenizer
# Indicates we should explicitly pass the EOS from the tokenizer
use_tokenizer_eos
:
bool
=
False
use_tokenizer_eos
:
bool
=
False
auto_cls
:
T
ype
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
auto_cls
:
t
ype
[
_BaseAutoModelClass
]
=
AutoModelForCausalLM
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# Callable to pass to the HF runner to run on inputs; for now, we also pass
# the data type to input post processing, because almost all of the uses of
# the data type to input post processing, because almost all of the uses of
# postprocess_inputs are to fix the data types of BatchEncoding values.
# postprocess_inputs are to fix the data types of BatchEncoding values.
...
@@ -128,12 +128,12 @@ class VLMTestInfo(NamedTuple):
...
@@ -128,12 +128,12 @@ class VLMTestInfo(NamedTuple):
# Default expandable params per test; these defaults can be overridden in
# Default expandable params per test; these defaults can be overridden in
# instances of this object; the complete set of test cases for the model
# instances of this object; the complete set of test cases for the model
# is all combinations of .models + all fields below
# is all combinations of .models + all fields below
max_tokens
:
Union
[
int
,
T
uple
[
int
]]
=
128
max_tokens
:
Union
[
int
,
t
uple
[
int
]]
=
128
num_logprobs
:
Union
[
int
,
T
uple
[
int
]]
=
5
num_logprobs
:
Union
[
int
,
t
uple
[
int
]]
=
5
dtype
:
Union
[
str
,
Iterable
[
str
]]
=
"half"
dtype
:
Union
[
str
,
Iterable
[
str
]]
=
"half"
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
distributed_executor_backend
:
Optional
[
Union
[
str
,
Iterable
[
str
]]]
=
None
# Only expanded in video tests
# Only expanded in video tests
num_video_frames
:
Union
[
int
,
T
uple
[
int
]]
=
16
num_video_frames
:
Union
[
int
,
t
uple
[
int
]]
=
16
# Fixed image sizes / image size factors; most tests use image_size_factors
# Fixed image sizes / image size factors; most tests use image_size_factors
# The values provided for these two fields will be stacked and expanded
# The values provided for these two fields will be stacked and expanded
...
@@ -141,19 +141,19 @@ class VLMTestInfo(NamedTuple):
...
@@ -141,19 +141,19 @@ class VLMTestInfo(NamedTuple):
# once per tests (much like concatenating and wrapping in one parametrize
# once per tests (much like concatenating and wrapping in one parametrize
# call)
# call)
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_size_factors
:
Iterable
[
Iterable
[
float
]]
=
IMAGE_SIZE_FACTORS
image_sizes
:
Optional
[
Iterable
[
Iterable
[
T
uple
[
int
,
int
]]]]
=
None
image_sizes
:
Optional
[
Iterable
[
Iterable
[
t
uple
[
int
,
int
]]]]
=
None
# Hack for updating a prompt to take into a local path; currently only used
# Hack for updating a prompt to take into a local path; currently only used
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
# for HF runner
prompt_path_encoder
:
Optional
[
prompt_path_encoder
:
Optional
[
Callable
[[
PosixPath
,
str
,
Union
[
L
ist
[
ImageAsset
],
_ImageAssets
]],
Callable
[[
PosixPath
,
str
,
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
]],
str
]]
=
None
# noqa: E501
str
]]
=
None
# noqa: E501
# Allows configuring a test to run with custom inputs
# Allows configuring a test to run with custom inputs
custom_test_opts
:
Optional
[
L
ist
[
CustomTestOptions
]]
=
None
custom_test_opts
:
Optional
[
l
ist
[
CustomTestOptions
]]
=
None
marks
:
Optional
[
L
ist
[
MarkDecorator
]]
=
None
marks
:
Optional
[
l
ist
[
MarkDecorator
]]
=
None
def
get_non_parametrized_runner_kwargs
(
self
):
def
get_non_parametrized_runner_kwargs
(
self
):
"""Returns a dictionary of expandable kwargs for items that are used
"""Returns a dictionary of expandable kwargs for items that are used
...
...
tests/models/embedding/language/test_gritlm.py
View file @
cf069aa8
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
import
importlib.util
import
importlib.util
import
math
import
math
from
array
import
array
from
array
import
array
from
typing
import
List
import
openai
import
openai
import
pytest
import
pytest
...
@@ -81,14 +80,14 @@ async def client_generate(server_generate: RemoteOpenAIServer):
...
@@ -81,14 +80,14 @@ async def client_generate(server_generate: RemoteOpenAIServer):
yield
async_client
yield
async_client
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
L
ist
[
str
],
def
run_llm_encode
(
llm
:
vllm
.
LLM
,
queries
:
l
ist
[
str
],
instruction
:
str
)
->
L
ist
[
float
]:
instruction
:
str
)
->
l
ist
[
float
]:
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
outputs
=
llm
.
encode
([
instruction
+
q
for
q
in
queries
],
)
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
return
[
output
.
outputs
.
embedding
for
output
in
outputs
]
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
L
ist
[
str
],
async
def
run_client_embeddings
(
client
:
vllm
.
LLM
,
queries
:
l
ist
[
str
],
instruction
:
str
)
->
L
ist
[
float
]:
instruction
:
str
)
->
l
ist
[
float
]:
outputs
=
await
client
.
embeddings
.
create
(
outputs
=
await
client
.
embeddings
.
create
(
model
=
MODEL_NAME
,
model
=
MODEL_NAME
,
input
=
[
instruction
+
q
for
q
in
queries
],
input
=
[
instruction
+
q
for
q
in
queries
],
...
@@ -123,7 +122,7 @@ def get_test_data():
...
@@ -123,7 +122,7 @@ def get_test_data():
return
queries
,
q_instruction
,
documents
,
d_instruction
return
queries
,
q_instruction
,
documents
,
d_instruction
def
validate_embed_output
(
q_rep
:
L
ist
[
float
],
d_rep
:
L
ist
[
float
]):
def
validate_embed_output
(
q_rep
:
l
ist
[
float
],
d_rep
:
l
ist
[
float
]):
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
cosine_sim_q0_d0
=
1
-
cosine
(
q_rep
[
0
],
d_rep
[
0
])
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
assert
math
.
isclose
(
cosine_sim_q0_d0
,
0.609
,
abs_tol
=
0.001
)
...
...
tests/models/embedding/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Sequence
from
collections.abc
import
Sequence
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
...
@@ -8,8 +8,8 @@ import torch.nn.functional as F
def
check_embeddings_close
(
def
check_embeddings_close
(
*
,
*
,
embeddings_0_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_0_lst
:
Sequence
[
l
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
L
ist
[
float
]],
embeddings_1_lst
:
Sequence
[
l
ist
[
float
]],
name_0
:
str
,
name_0
:
str
,
name_1
:
str
,
name_1
:
str
,
tol
:
float
=
1e-3
,
tol
:
float
=
1e-3
,
...
...
tests/models/embedding/vision_language/test_dse_qwen2_vl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
functools
import
partial
from
functools
import
partial
from
typing
import
Callable
,
Dict
,
List
,
Type
from
typing
import
Callable
import
pytest
import
pytest
import
torch
import
torch
...
@@ -67,7 +67,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
...
@@ -67,7 +67,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):
def
apply_chat_template_and_add_eos
(
def
apply_chat_template_and_add_eos
(
messages
:
L
ist
[
D
ict
],
messages
:
l
ist
[
d
ict
],
apply_chat_template_fn
:
Callable
,
apply_chat_template_fn
:
Callable
,
):
):
prompt
=
apply_chat_template_fn
(
prompt
=
apply_chat_template_fn
(
...
@@ -80,11 +80,11 @@ def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):
...
@@ -80,11 +80,11 @@ def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
embed_texts
:
L
ist
[
bool
],
embed_texts
:
l
ist
[
bool
],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
...
tests/models/embedding/vision_language/test_llava_next.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
pytest
import
pytest
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
AutoModelForVision2Seq
...
@@ -35,9 +33,9 @@ MODELS = ["royokong/e5-v"]
...
@@ -35,9 +33,9 @@ MODELS = ["royokong/e5-v"]
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
model
:
str
,
model
:
str
,
*
,
*
,
...
...
tests/models/embedding/vision_language/test_phi3v.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Type
import
pytest
import
pytest
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
...
@@ -29,9 +27,9 @@ MODELS = ["TIGER-Lab/VLM2Vec-Full"]
...
@@ -29,9 +27,9 @@ MODELS = ["TIGER-Lab/VLM2Vec-Full"]
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
input_texts
:
L
ist
[
str
],
input_texts
:
l
ist
[
str
],
input_images
:
PromptImageInput
,
input_images
:
PromptImageInput
,
model
:
str
,
model
:
str
,
*
,
*
,
...
...
tests/models/encoder_decoder/language/test_bart.py
View file @
cf069aa8
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
"""
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
pytest
import
pytest
from
transformers
import
AutoModelForSeq2SeqLM
from
transformers
import
AutoModelForSeq2SeqLM
...
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
...
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
def
vllm_to_hf_output
(
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
decoder_prompt_type
:
DecoderPromptType
,
decoder_prompt_type
:
DecoderPromptType
,
):
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -31,9 +31,9 @@ def vllm_to_hf_output(
...
@@ -31,9 +31,9 @@ def vllm_to_hf_output(
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts
:
L
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
prompts
:
l
ist
[
ExplicitEncoderDecoderPrompt
[
str
,
str
]],
decoder_prompt_type
:
DecoderPromptType
,
decoder_prompt_type
:
DecoderPromptType
,
model
:
str
,
model
:
str
,
*
,
*
,
...
...
tests/models/encoder_decoder/vision_language/test_florence2.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
,
Type
from
typing
import
Optional
import
pytest
import
pytest
from
PIL
import
Image
from
PIL
import
Image
...
@@ -51,8 +51,8 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
...
@@ -51,8 +51,8 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
list
[
list
[
ExplicitEncoderDecoderPrompt
]],
inputs
:
list
[
list
[
ExplicitEncoderDecoderPrompt
]],
model
:
str
,
model
:
str
,
*
,
*
,
...
@@ -114,7 +114,7 @@ def run_test(
...
@@ -114,7 +114,7 @@ def run_test(
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
def
test_models
(
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
model
:
str
,
image_assets
:
_ImageAssets
,
model
:
str
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
num_logprobs
:
int
)
->
None
:
...
...
tests/models/encoder_decoder/vision_language/test_mllama.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
,
overload
from
typing
import
Optional
,
overload
import
pytest
import
pytest
import
torch
import
torch
...
@@ -64,7 +64,7 @@ prompt_data = {
...
@@ -64,7 +64,7 @@ prompt_data = {
}
}
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
Optional
[
SampleLogprobs
]],
model
:
str
):
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -91,9 +91,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
...
@@ -91,9 +91,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def
_get_inputs
(
def
_get_inputs
(
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
*
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
)
->
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]]:
)
->
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]]:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
if
size_factors
is
not
None
:
if
size_factors
is
not
None
:
...
@@ -123,12 +123,12 @@ def _get_inputs(
...
@@ -123,12 +123,12 @@ def _get_inputs(
@
overload
@
overload
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
L
ist
[
float
],
size_factors
:
l
ist
[
float
],
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -140,12 +140,12 @@ def run_test(
...
@@ -140,12 +140,12 @@ def run_test(
@
overload
@
overload
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
sizes
:
L
ist
[
T
uple
[
int
,
int
]],
sizes
:
l
ist
[
t
uple
[
int
,
int
]],
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -156,13 +156,13 @@ def run_test(
...
@@ -156,13 +156,13 @@ def run_test(
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
Optional
[
L
ist
[
float
]]
=
None
,
size_factors
:
Optional
[
l
ist
[
float
]]
=
None
,
sizes
:
Optional
[
L
ist
[
T
uple
[
int
,
int
]]]
=
None
,
sizes
:
Optional
[
l
ist
[
t
uple
[
int
,
int
]]]
=
None
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
@@ -183,9 +183,9 @@ def run_test(
...
@@ -183,9 +183,9 @@ def run_test(
def
_run_test
(
def
_run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
...
tests/models/multimodal/processing/test_h2ovl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Tests for H2OVL's multimodal preprocessing kwargs."""
"""Tests for H2OVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
import
pytest
from
PIL
import
Image
from
PIL
import
Image
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Tests for InternVL's multimodal preprocessing kwargs."""
"""Tests for InternVL's multimodal preprocessing kwargs."""
from
typing
import
Mapping
,
Optional
from
collections.abc
import
Mapping
from
typing
import
Optional
import
pytest
import
pytest
from
PIL
import
Image
from
PIL
import
Image
...
...
tests/models/registry.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
collections.abc
import
Mapping
,
Set
from
dataclasses
import
dataclass
,
field
from
dataclasses
import
dataclass
,
field
from
typing
import
AbstractSet
,
Any
,
Literal
,
Mapping
,
Optional
from
typing
import
Any
,
Literal
,
Optional
import
pytest
import
pytest
from
packaging.version
import
Version
from
packaging.version
import
Version
...
@@ -324,7 +325,7 @@ class HfExampleModels:
...
@@ -324,7 +325,7 @@ class HfExampleModels:
self
.
hf_models
=
hf_models
self
.
hf_models
=
hf_models
def
get_supported_archs
(
self
)
->
Abstract
Set
[
str
]:
def
get_supported_archs
(
self
)
->
Set
[
str
]:
return
self
.
hf_models
.
keys
()
return
self
.
hf_models
.
keys
()
def
get_hf_info
(
self
,
model_arch
:
str
)
->
_HfExamplesInfo
:
def
get_hf_info
(
self
,
model_arch
:
str
)
->
_HfExamplesInfo
:
...
...
tests/models/test_transformers.py
View file @
cf069aa8
...
@@ -4,7 +4,6 @@
...
@@ -4,7 +4,6 @@
Run `pytest tests/models/test_transformers.py`.
Run `pytest tests/models/test_transformers.py`.
"""
"""
from
contextlib
import
nullcontext
from
contextlib
import
nullcontext
from
typing
import
Type
import
pytest
import
pytest
...
@@ -14,8 +13,8 @@ from .utils import check_logprobs_close
...
@@ -14,8 +13,8 @@ from .utils import check_logprobs_close
def
check_implementation
(
def
check_implementation
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
example_prompts
:
list
[
str
],
model
:
str
,
model
:
str
,
**
kwargs
,
**
kwargs
,
...
@@ -47,8 +46,8 @@ def check_implementation(
...
@@ -47,8 +46,8 @@ def check_implementation(
(
"ArthurZ/Ilama-3.2-1B"
,
"auto"
),
# CUSTOM CODE
(
"ArthurZ/Ilama-3.2-1B"
,
"auto"
),
# CUSTOM CODE
])
# trust_remote_code=True by default
])
# trust_remote_code=True by default
def
test_models
(
def
test_models
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
example_prompts
:
list
[
str
],
model
:
str
,
model
:
str
,
model_impl
:
str
,
model_impl
:
str
,
...
@@ -71,8 +70,8 @@ def test_models(
...
@@ -71,8 +70,8 @@ def test_models(
@
multi_gpu_test
(
num_gpus
=
2
)
@
multi_gpu_test
(
num_gpus
=
2
)
def
test_distributed
(
def
test_distributed
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
,
example_prompts
,
):
):
kwargs
=
{
"model_impl"
:
"transformers"
,
"tensor_parallel_size"
:
2
}
kwargs
=
{
"model_impl"
:
"transformers"
,
"tensor_parallel_size"
:
2
}
...
@@ -92,7 +91,7 @@ def test_distributed(
...
@@ -92,7 +91,7 @@ def test_distributed(
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_quantization
(
def
test_quantization
(
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
list
[
str
],
example_prompts
:
list
[
str
],
model
:
str
,
model
:
str
,
quantization_kwargs
:
dict
[
str
,
str
],
quantization_kwargs
:
dict
[
str
,
str
],
...
...
tests/models/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
warnings
import
warnings
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
collections.abc
import
Sequence
from
typing
import
Optional
,
Union
import
torch
import
torch
...
@@ -9,7 +10,7 @@ from vllm.config import ModelConfig, TaskOption
...
@@ -9,7 +10,7 @@ from vllm.config import ModelConfig, TaskOption
from
vllm.inputs
import
InputContext
from
vllm.inputs
import
InputContext
from
vllm.sequence
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.sequence
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
TokensText
=
T
uple
[
L
ist
[
int
],
str
]
TokensText
=
t
uple
[
l
ist
[
int
],
str
]
def
check_outputs_equal
(
def
check_outputs_equal
(
...
@@ -46,7 +47,7 @@ def check_outputs_equal(
...
@@ -46,7 +47,7 @@ def check_outputs_equal(
# * List of top sample logprobs for each sampled token
# * List of top sample logprobs for each sampled token
#
#
# Assumes prompt logprobs were not requested.
# Assumes prompt logprobs were not requested.
TokensTextLogprobs
=
T
uple
[
L
ist
[
int
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
int
,
TokensTextLogprobs
=
t
uple
[
l
ist
[
int
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
int
,
float
]],
float
]],
SampleLogprobs
]]]
SampleLogprobs
]]]
...
@@ -57,8 +58,8 @@ TokensTextLogprobs = Tuple[List[int], str, Optional[Union[List[Dict[int,
...
@@ -57,8 +58,8 @@ TokensTextLogprobs = Tuple[List[int], str, Optional[Union[List[Dict[int,
# * Optional list of top sample logprobs for each sampled token
# * Optional list of top sample logprobs for each sampled token
#
#
# Assumes prompt logprobs were not requested.
# Assumes prompt logprobs were not requested.
TextTextLogprobs
=
T
uple
[
L
ist
[
str
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
str
,
float
]],
TextTextLogprobs
=
t
uple
[
l
ist
[
str
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
str
,
float
]],
L
ist
[
D
ict
[
str
,
l
ist
[
d
ict
[
str
,
Logprob
]]]]]
Logprob
]]]]]
# Representation of generated sequence as a tuple of
# Representation of generated sequence as a tuple of
...
@@ -68,9 +69,9 @@ TextTextLogprobs = Tuple[List[str], str, Optional[Union[List[Dict[str, float]],
...
@@ -68,9 +69,9 @@ TextTextLogprobs = Tuple[List[str], str, Optional[Union[List[Dict[str, float]],
# * Optional list of top prompt logprobs for each prompt token
# * Optional list of top prompt logprobs for each prompt token
#
#
# Allows prompt logprobs to be requested.
# Allows prompt logprobs to be requested.
TokensTextLogprobsPromptLogprobs
=
T
uple
[
TokensTextLogprobsPromptLogprobs
=
t
uple
[
L
ist
[
int
],
str
,
Optional
[
Union
[
L
ist
[
D
ict
[
int
,
float
]],
SampleLogprobs
]],
l
ist
[
int
],
str
,
Optional
[
Union
[
l
ist
[
d
ict
[
int
,
float
]],
SampleLogprobs
]],
Optional
[
Union
[
L
ist
[
Optional
[
D
ict
[
int
,
float
]]],
PromptLogprobs
]]]
Optional
[
Union
[
l
ist
[
Optional
[
d
ict
[
int
,
float
]]],
PromptLogprobs
]]]
def
check_logprobs_close
(
def
check_logprobs_close
(
...
@@ -254,8 +255,8 @@ def build_model_context(
...
@@ -254,8 +255,8 @@ def build_model_context(
tokenizer_name
:
Optional
[
str
]
=
None
,
tokenizer_name
:
Optional
[
str
]
=
None
,
trust_remote_code
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
None
,
dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
None
,
mm_processor_kwargs
:
Optional
[
D
ict
]
=
None
,
mm_processor_kwargs
:
Optional
[
d
ict
]
=
None
,
limit_mm_per_prompt
:
Optional
[
D
ict
]
=
None
,
limit_mm_per_prompt
:
Optional
[
d
ict
]
=
None
,
disable_mm_preprocessor_cache
:
bool
=
True
,
disable_mm_preprocessor_cache
:
bool
=
True
,
):
):
"""Creates an InputContext for a given model.
"""Creates an InputContext for a given model.
...
...
tests/mq_llm_engine/utils.py
View file @
cf069aa8
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
import
asyncio
import
asyncio
import
multiprocessing
import
multiprocessing
from
typing
import
Callable
,
Tuple
,
Union
from
typing
import
Callable
,
Union
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
...
@@ -16,7 +16,7 @@ async def generate(
...
@@ -16,7 +16,7 @@ async def generate(
client
:
MQLLMEngineClient
,
client
:
MQLLMEngineClient
,
request_id
:
str
,
request_id
:
str
,
num_tokens
:
int
,
num_tokens
:
int
,
return_output
:
bool
=
False
)
->
Union
[
RequestOutput
,
T
uple
[
int
,
str
]]:
return_output
:
bool
=
False
)
->
Union
[
RequestOutput
,
t
uple
[
int
,
str
]]:
final_output
=
None
final_output
=
None
count
=
0
count
=
0
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# Test the AsyncLLMEngine with multi-step-decoding
# Test the AsyncLLMEngine with multi-step-decoding
from
typing
import
List
,
Optional
from
typing
import
Optional
import
pytest
import
pytest
...
@@ -17,7 +17,7 @@ MODELS = [
...
@@ -17,7 +17,7 @@ MODELS = [
NUM_SCHEDULER_STEPS
=
[
8
]
# Multi-step decoding steps
NUM_SCHEDULER_STEPS
=
[
8
]
# Multi-step decoding steps
NUM_PROMPTS
=
[
10
]
NUM_PROMPTS
=
[
10
]
DEFAULT_SERVER_ARGS
:
L
ist
[
str
]
=
[
DEFAULT_SERVER_ARGS
:
l
ist
[
str
]
=
[
"--distributed-executor-backend"
,
"--distributed-executor-backend"
,
"ray"
,
"ray"
,
"--gpu-memory-utilization"
,
"--gpu-memory-utilization"
,
...
...
tests/multimodal/test_utils.py
View file @
cf069aa8
...
@@ -4,7 +4,7 @@ import base64
...
@@ -4,7 +4,7 @@ import base64
import
mimetypes
import
mimetypes
import
os
import
os
from
tempfile
import
NamedTemporaryFile
,
TemporaryDirectory
from
tempfile
import
NamedTemporaryFile
,
TemporaryDirectory
from
typing
import
TYPE_CHECKING
,
Dict
,
NamedTuple
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
NamedTuple
,
Optional
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
...
@@ -30,7 +30,7 @@ TEST_IMAGE_URLS = [
...
@@ -30,7 +30,7 @@ TEST_IMAGE_URLS = [
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
url_images
()
->
D
ict
[
str
,
Image
.
Image
]:
def
url_images
()
->
d
ict
[
str
,
Image
.
Image
]:
connector
=
MediaConnector
()
connector
=
MediaConnector
()
return
{
return
{
...
@@ -39,7 +39,7 @@ def url_images() -> Dict[str, Image.Image]:
...
@@ -39,7 +39,7 @@ def url_images() -> Dict[str, Image.Image]:
}
}
def
get_supported_suffixes
()
->
T
uple
[
str
,
...]:
def
get_supported_suffixes
()
->
t
uple
[
str
,
...]:
# We should at least test the file types mentioned in GPT-4 with Vision
# We should at least test the file types mentioned in GPT-4 with Vision
OPENAI_SUPPORTED_SUFFIXES
=
(
'.png'
,
'.jpeg'
,
'.jpg'
,
'.webp'
,
'.gif'
)
OPENAI_SUPPORTED_SUFFIXES
=
(
'.png'
,
'.jpeg'
,
'.jpg'
,
'.webp'
,
'.gif'
)
...
@@ -66,7 +66,7 @@ async def test_fetch_image_http(image_url: str):
...
@@ -66,7 +66,7 @@ async def test_fetch_image_http(image_url: str):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"suffix"
,
get_supported_suffixes
())
@
pytest
.
mark
.
parametrize
(
"suffix"
,
get_supported_suffixes
())
async
def
test_fetch_image_base64
(
url_images
:
D
ict
[
str
,
Image
.
Image
],
async
def
test_fetch_image_base64
(
url_images
:
d
ict
[
str
,
Image
.
Image
],
image_url
:
str
,
suffix
:
str
):
image_url
:
str
,
suffix
:
str
):
connector
=
MediaConnector
()
connector
=
MediaConnector
()
url_image
=
url_images
[
image_url
]
url_image
=
url_images
[
image_url
]
...
...
tests/neuron/test_logits_processor.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
random
import
random
from
typing
import
Tuple
from
unittest.mock
import
patch
from
unittest.mock
import
patch
import
pytest
import
pytest
...
@@ -33,7 +32,7 @@ class MockLogitsProcessor(LogitsProcessor):
...
@@ -33,7 +32,7 @@ class MockLogitsProcessor(LogitsProcessor):
def
_prepare_test
(
def
_prepare_test
(
batch_size
:
int
batch_size
:
int
)
->
T
uple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
]:
)
->
t
uple
[
torch
.
Tensor
,
torch
.
Tensor
,
MockLogitsProcessor
]:
vocab_size
=
32000
vocab_size
=
32000
input_tensor
=
torch
.
rand
((
batch_size
,
1024
),
dtype
=
torch
.
float16
)
input_tensor
=
torch
.
rand
((
batch_size
,
1024
),
dtype
=
torch
.
float16
)
fake_logits
=
torch
.
full
((
batch_size
,
vocab_size
),
fake_logits
=
torch
.
full
((
batch_size
,
vocab_size
),
...
...
tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Iterable
,
Optional
,
Tuple
,
Union
from
collections.abc
import
Iterable
from
typing
import
Optional
,
Union
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
...
@@ -59,7 +60,7 @@ class MyGemma2Embedding(nn.Module):
...
@@ -59,7 +60,7 @@ class MyGemma2Embedding(nn.Module):
)
->
Optional
[
PoolerOutput
]:
)
->
Optional
[
PoolerOutput
]:
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
return
self
.
_pooler
(
hidden_states
,
pooling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
T
uple
[
str
,
torch
.
Tensor
]]):
def
load_weights
(
self
,
weights
:
Iterable
[
t
uple
[
str
,
torch
.
Tensor
]]):
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
self
.
hf_to_vllm_mapper
.
apply
(
weights
)
weights
=
((
name
,
data
)
for
name
,
data
in
weights
weights
=
((
name
,
data
)
for
name
,
data
in
weights
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment