Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
27f4c2fd
Unverified
Commit
27f4c2fd
authored
Dec 07, 2025
by
Cyrus Leung
Committed by
GitHub
Dec 06, 2025
Browse files
[Renderer] Separate out `RendererConfig` from `ModelConfig` (#30145)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
a49d813f
Changes
105
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
194 additions
and
200 deletions
+194
-200
docs/contributing/model/transcription.md
docs/contributing/model/transcription.md
+6
-6
tests/compile/distributed/test_sequence_parallelism.py
tests/compile/distributed/test_sequence_parallelism.py
+2
-0
tests/compile/test_functionalization.py
tests/compile/test_functionalization.py
+5
-1
tests/compile/test_fusion.py
tests/compile/test_fusion.py
+5
-1
tests/compile/test_fusion_attn.py
tests/compile/test_fusion_attn.py
+2
-0
tests/compile/test_pass_manager.py
tests/compile/test_pass_manager.py
+6
-2
tests/compile/test_qk_norm_rope_fusion.py
tests/compile/test_qk_norm_rope_fusion.py
+4
-1
tests/distributed/test_kvlayout.py
tests/distributed/test_kvlayout.py
+3
-0
tests/entrypoints/openai/test_chat_template.py
tests/entrypoints/openai/test_chat_template.py
+4
-18
tests/entrypoints/openai/test_lora_resolvers.py
tests/entrypoints/openai/test_lora_resolvers.py
+15
-6
tests/entrypoints/openai/test_serving_chat.py
tests/entrypoints/openai/test_serving_chat.py
+21
-7
tests/entrypoints/openai/test_serving_engine.py
tests/entrypoints/openai/test_serving_engine.py
+7
-1
tests/entrypoints/openai/test_serving_models.py
tests/entrypoints/openai/test_serving_models.py
+7
-1
tests/entrypoints/test_chat_utils.py
tests/entrypoints/test_chat_utils.py
+71
-123
tests/lora/test_lora_manager.py
tests/lora/test_lora_manager.py
+11
-3
tests/lora/test_worker.py
tests/lora/test_worker.py
+2
-0
tests/model_executor/test_model_load_with_params.py
tests/model_executor/test_model_load_with_params.py
+13
-9
tests/models/language/pooling/test_gritlm.py
tests/models/language/pooling/test_gritlm.py
+3
-2
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+5
-17
tests/models/multimodal/processing/test_glm4_1v.py
tests/models/multimodal/processing/test_glm4_1v.py
+2
-2
No files found.
docs/contributing/model/transcription.md
View file @
27f4c2fd
...
@@ -22,7 +22,7 @@ Declare supported languages and capabilities:
...
@@ -22,7 +22,7 @@ Declare supported languages and capabilities:
import torch
import torch
from torch import nn
from torch import nn
from vllm.config import
Model
Config, SpeechToTextConfig
from vllm.config import
Renderer
Config, SpeechToTextConfig
from vllm.inputs.data import PromptType
from vllm.inputs.data import PromptType
from vllm.model_executor.models.interfaces import SupportsTranscription
from vllm.model_executor.models.interfaces import SupportsTranscription
...
@@ -52,7 +52,7 @@ This is for controlling general behavior of the API when serving your model:
...
@@ -52,7 +52,7 @@ This is for controlling general behavior of the API when serving your model:
@classmethod
@classmethod
def get_speech_to_text_config(
def get_speech_to_text_config(
cls,
cls,
model
_config:
Model
Config,
renderer
_config:
Renderer
Config,
task_type: Literal["transcribe", "translate"],
task_type: Literal["transcribe", "translate"],
) -> SpeechToTextConfig:
) -> SpeechToTextConfig:
return SpeechToTextConfig(
return SpeechToTextConfig(
...
@@ -83,7 +83,7 @@ Return a dict containing `multi_modal_data` with the audio, and either a `prompt
...
@@ -83,7 +83,7 @@ Return a dict containing `multi_modal_data` with the audio, and either a `prompt
cls,
cls,
audio: np.ndarray,
audio: np.ndarray,
stt_config: SpeechToTextConfig,
stt_config: SpeechToTextConfig,
model
_config:
Model
Config,
renderer
_config:
Renderer
Config,
language: str | None,
language: str | None,
task_type: Literal["transcribe", "translate"],
task_type: Literal["transcribe", "translate"],
request_prompt: str,
request_prompt: str,
...
@@ -120,7 +120,7 @@ Return a dict with separate `encoder_prompt` and `decoder_prompt` entries:
...
@@ -120,7 +120,7 @@ Return a dict with separate `encoder_prompt` and `decoder_prompt` entries:
cls,
cls,
audio: np.ndarray,
audio: np.ndarray,
stt_config: SpeechToTextConfig,
stt_config: SpeechToTextConfig,
model
_config:
Model
Config,
renderer
_config:
Renderer
Config,
language: str | None,
language: str | None,
task_type: Literal["transcribe", "translate"],
task_type: Literal["transcribe", "translate"],
request_prompt: str,
request_prompt: str,
...
@@ -183,7 +183,7 @@ Provide a fast duration→token estimate to improve streaming usage statistics:
...
@@ -183,7 +183,7 @@ Provide a fast duration→token estimate to improve streaming usage statistics:
cls,
cls,
audio_duration_s: float,
audio_duration_s: float,
stt_config: SpeechToTextConfig,
stt_config: SpeechToTextConfig,
model
_config:
Model
Config,
renderer
_config:
Renderer
Config,
) -> int | None:
) -> int | None:
# Return None if unknown; otherwise return an estimate.
# Return None if unknown; otherwise return an estimate.
return int(audio_duration_s * stt_config.sample_rate // 320) # example
return int(audio_duration_s * stt_config.sample_rate // 320) # example
...
@@ -216,7 +216,7 @@ Relevant server logic:
...
@@ -216,7 +216,7 @@ Relevant server logic:
prompt = self.model_cls.get_generation_prompt(
prompt = self.model_cls.get_generation_prompt(
audio=chunk,
audio=chunk,
stt_config=self.asr_config,
stt_config=self.asr_config,
model
_config=self.
model
_config,
renderer
_config=self.
renderer
_config,
language=language,
language=language,
task_type=self.task_type,
task_type=self.task_type,
request_prompt=request.prompt,
request_prompt=request.prompt,
...
...
tests/compile/distributed/test_sequence_parallelism.py
View file @
27f4c2fd
...
@@ -17,6 +17,7 @@ from vllm.config import (
...
@@ -17,6 +17,7 @@ from vllm.config import (
DeviceConfig
,
DeviceConfig
,
ModelConfig
,
ModelConfig
,
PassConfig
,
PassConfig
,
RendererConfig
,
VllmConfig
,
VllmConfig
,
get_current_vllm_config
,
get_current_vllm_config
,
set_current_vllm_config
,
set_current_vllm_config
,
...
@@ -276,6 +277,7 @@ def sequence_parallelism_pass_on_test_model(
...
@@ -276,6 +277,7 @@ def sequence_parallelism_pass_on_test_model(
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
device_config
=
device_config
,
device_config
=
device_config
,
compilation_config
=
compilation_config
,
compilation_config
=
compilation_config
,
)
)
...
...
tests/compile/test_functionalization.py
View file @
27f4c2fd
...
@@ -15,6 +15,7 @@ from vllm.config import (
...
@@ -15,6 +15,7 @@ from vllm.config import (
CompilationConfig
,
CompilationConfig
,
ModelConfig
,
ModelConfig
,
PassConfig
,
PassConfig
,
RendererConfig
,
VllmConfig
,
VllmConfig
,
set_current_vllm_config
,
set_current_vllm_config
,
)
)
...
@@ -219,8 +220,11 @@ def test_fix_functionalization(
...
@@ -219,8 +220,11 @@ def test_fix_functionalization(
torch
.
set_default_device
(
"cuda"
)
torch
.
set_default_device
(
"cuda"
)
torch
.
set_default_dtype
(
dtype
)
torch
.
set_default_dtype
(
dtype
)
model_config
=
ModelConfig
(
dtype
=
dtype
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
dtype
),
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
compilation_config
=
CompilationConfig
(
compilation_config
=
CompilationConfig
(
custom_ops
=
[
"all"
],
custom_ops
=
[
"all"
],
pass_config
=
PassConfig
(
pass_config
=
PassConfig
(
...
...
tests/compile/test_fusion.py
View file @
27f4c2fd
...
@@ -15,6 +15,7 @@ from vllm.config import (
...
@@ -15,6 +15,7 @@ from vllm.config import (
CompilationMode
,
CompilationMode
,
ModelConfig
,
ModelConfig
,
PassConfig
,
PassConfig
,
RendererConfig
,
VllmConfig
,
VllmConfig
,
)
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
@@ -154,8 +155,11 @@ def test_fusion_rmsnorm_quant(
...
@@ -154,8 +155,11 @@ def test_fusion_rmsnorm_quant(
custom_ops
.
append
(
"+rms_norm"
)
custom_ops
.
append
(
"+rms_norm"
)
if
enable_quant_fp8_custom_op
:
if
enable_quant_fp8_custom_op
:
custom_ops
.
append
(
"+quant_fp8"
)
custom_ops
.
append
(
"+quant_fp8"
)
model_config
=
ModelConfig
(
dtype
=
dtype
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
dtype
),
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
compilation_config
=
CompilationConfig
(
compilation_config
=
CompilationConfig
(
mode
=
CompilationMode
.
VLLM_COMPILE
,
mode
=
CompilationMode
.
VLLM_COMPILE
,
custom_ops
=
custom_ops
,
custom_ops
=
custom_ops
,
...
...
tests/compile/test_fusion_attn.py
View file @
27f4c2fd
...
@@ -24,6 +24,7 @@ from vllm.config import (
...
@@ -24,6 +24,7 @@ from vllm.config import (
CompilationMode
,
CompilationMode
,
ModelConfig
,
ModelConfig
,
PassConfig
,
PassConfig
,
RendererConfig
,
SchedulerConfig
,
SchedulerConfig
,
VllmConfig
,
VllmConfig
,
set_current_vllm_config
,
set_current_vllm_config
,
...
@@ -325,6 +326,7 @@ def test_attention_quant_pattern(
...
@@ -325,6 +326,7 @@ def test_attention_quant_pattern(
)
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
scheduler_config
=
SchedulerConfig
(
scheduler_config
=
SchedulerConfig
(
max_num_seqs
=
1024
,
max_num_seqs
=
1024
,
max_model_len
=
model_config
.
max_model_len
,
max_model_len
=
model_config
.
max_model_len
,
...
...
tests/compile/test_pass_manager.py
View file @
27f4c2fd
...
@@ -7,7 +7,7 @@ import torch
...
@@ -7,7 +7,7 @@ import torch
from
vllm.compilation.inductor_pass
import
CallableInductorPass
,
InductorPass
from
vllm.compilation.inductor_pass
import
CallableInductorPass
,
InductorPass
from
vllm.compilation.pass_manager
import
PostGradPassManager
from
vllm.compilation.pass_manager
import
PostGradPassManager
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
,
VllmConfig
# dummy custom pass that doesn't inherit
# dummy custom pass that doesn't inherit
...
@@ -43,7 +43,11 @@ class ProperPass(InductorPass):
...
@@ -43,7 +43,11 @@ class ProperPass(InductorPass):
)
)
def
test_pass_manager_uuid
(
callable
):
def
test_pass_manager_uuid
(
callable
):
# Some passes need dtype to be set
# Some passes need dtype to be set
config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
torch
.
bfloat16
))
model_config
=
ModelConfig
(
dtype
=
torch
.
bfloat16
)
config
=
VllmConfig
(
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
)
pass_manager
=
PostGradPassManager
()
pass_manager
=
PostGradPassManager
()
pass_manager
.
configure
(
config
)
pass_manager
.
configure
(
config
)
...
...
tests/compile/test_qk_norm_rope_fusion.py
View file @
27f4c2fd
...
@@ -19,6 +19,7 @@ from vllm.config import (
...
@@ -19,6 +19,7 @@ from vllm.config import (
CompilationMode
,
CompilationMode
,
ModelConfig
,
ModelConfig
,
PassConfig
,
PassConfig
,
RendererConfig
,
VllmConfig
,
VllmConfig
,
set_current_vllm_config
,
set_current_vllm_config
,
)
)
...
@@ -133,8 +134,10 @@ def test_qk_norm_rope_fusion(
...
@@ -133,8 +134,10 @@ def test_qk_norm_rope_fusion(
if
enable_rope_custom_op
:
if
enable_rope_custom_op
:
custom_ops
.
append
(
"+rotary_embedding"
)
custom_ops
.
append
(
"+rotary_embedding"
)
model_config
=
ModelConfig
(
dtype
=
dtype
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
dtype
),
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
compilation_config
=
CompilationConfig
(
compilation_config
=
CompilationConfig
(
mode
=
CompilationMode
.
VLLM_COMPILE
,
mode
=
CompilationMode
.
VLLM_COMPILE
,
custom_ops
=
custom_ops
,
custom_ops
=
custom_ops
,
...
...
tests/distributed/test_kvlayout.py
View file @
27f4c2fd
...
@@ -5,6 +5,7 @@ from vllm.config import (
...
@@ -5,6 +5,7 @@ from vllm.config import (
DeviceConfig
,
DeviceConfig
,
KVTransferConfig
,
KVTransferConfig
,
ModelConfig
,
ModelConfig
,
RendererConfig
,
VllmConfig
,
VllmConfig
,
set_current_vllm_config
,
set_current_vllm_config
,
)
)
...
@@ -47,6 +48,7 @@ def test_get_kv_connector_cache_layout_with_nixl_connector():
...
@@ -47,6 +48,7 @@ def test_get_kv_connector_cache_layout_with_nixl_connector():
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
device_config
=
DeviceConfig
(
"cpu"
),
device_config
=
DeviceConfig
(
"cpu"
),
model_config
=
model_config
,
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
kv_transfer_config
=
kv_transfer_config
,
kv_transfer_config
=
kv_transfer_config
,
)
)
with
set_current_vllm_config
(
vllm_config
):
with
set_current_vllm_config
(
vllm_config
):
...
@@ -70,6 +72,7 @@ def test_get_kv_connector_cache_layout_with_multi_connector():
...
@@ -70,6 +72,7 @@ def test_get_kv_connector_cache_layout_with_multi_connector():
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
device_config
=
DeviceConfig
(
"cpu"
),
device_config
=
DeviceConfig
(
"cpu"
),
model_config
=
model_config
,
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
kv_transfer_config
=
kv_transfer_config
,
kv_transfer_config
=
kv_transfer_config
,
)
)
with
set_current_vllm_config
(
vllm_config
):
with
set_current_vllm_config
(
vllm_config
):
...
...
tests/entrypoints/openai/test_chat_template.py
View file @
27f4c2fd
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
import
pytest
import
pytest
from
vllm.config
import
ModelConfig
from
vllm.entrypoints.chat_utils
import
apply_hf_chat_template
,
load_chat_template
from
vllm.entrypoints.chat_utils
import
apply_hf_chat_template
,
load_chat_template
from
vllm.entrypoints.openai.protocol
import
ChatCompletionRequest
from
vllm.entrypoints.openai.protocol
import
ChatCompletionRequest
from
vllm.tokenizers
import
get_tokenizer
from
vllm.tokenizers
import
get_tokenizer
...
@@ -107,24 +106,11 @@ def test_get_gen_prompt(
...
@@ -107,24 +106,11 @@ def test_get_gen_prompt(
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model
)
model
,
tokenizer
=
model_info
.
tokenizer
or
model
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
trust_remote_code
=
model_info
.
trust_remote_code
,
revision
=
model_info
.
revision
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
# Initialize the tokenizer
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
tokenizer_name
=
model
_config
.
tokenizer
,
renderer
_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
template_content
=
load_chat_template
(
chat_template
=
template
)
template_content
=
load_chat_template
(
chat_template
=
template
)
...
@@ -143,7 +129,7 @@ def test_get_gen_prompt(
...
@@ -143,7 +129,7 @@ def test_get_gen_prompt(
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
conversation
=
mock_request
.
messages
,
conversation
=
mock_request
.
messages
,
chat_template
=
mock_request
.
chat_template
or
template_content
,
chat_template
=
mock_request
.
chat_template
or
template_content
,
model_config
=
model
_config
,
renderer_config
=
renderer
_config
,
tools
=
None
,
tools
=
None
,
add_generation_prompt
=
mock_request
.
add_generation_prompt
,
add_generation_prompt
=
mock_request
.
add_generation_prompt
,
continue_final_message
=
mock_request
.
continue_final_message
,
continue_final_message
=
mock_request
.
continue_final_message
,
...
...
tests/entrypoints/openai/test_lora_resolvers.py
View file @
27f4c2fd
...
@@ -33,26 +33,34 @@ class MockModelConfig:
...
@@ -33,26 +33,34 @@ class MockModelConfig:
"""Minimal mock ModelConfig for testing."""
"""Minimal mock ModelConfig for testing."""
model
:
str
=
MODEL_NAME
model
:
str
=
MODEL_NAME
tokenizer
:
str
=
MODEL_NAME
trust_remote_code
:
bool
=
False
trust_remote_code
:
bool
=
False
tokenizer_mode
:
str
=
"auto"
max_model_len
:
int
=
100
max_model_len
:
int
=
100
tokenizer_revision
:
str
|
None
=
None
multimodal_config
:
MultiModalConfig
=
field
(
default_factory
=
MultiModalConfig
)
multimodal_config
:
MultiModalConfig
=
field
(
default_factory
=
MultiModalConfig
)
hf_config
:
MockHFConfig
=
field
(
default_factory
=
MockHFConfig
)
hf_config
:
MockHFConfig
=
field
(
default_factory
=
MockHFConfig
)
logits_processors
:
list
[
str
]
|
None
=
None
logits_processors
:
list
[
str
]
|
None
=
None
logits_processor_pattern
:
str
|
None
=
None
logits_processor_pattern
:
str
|
None
=
None
diff_sampling_param
:
dict
|
None
=
None
diff_sampling_param
:
dict
|
None
=
None
allowed_local_media_path
:
str
=
""
allowed_media_domains
:
list
[
str
]
|
None
=
None
encoder_config
=
None
encoder_config
=
None
generation_config
:
str
=
"auto"
generation_config
:
str
=
"auto"
skip_tokenizer_init
:
bool
=
False
def
get_diff_sampling_param
(
self
):
def
get_diff_sampling_param
(
self
):
return
self
.
diff_sampling_param
or
{}
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockRendererConfig
:
"""Minimal mock RendererConfig for testing."""
model_config
:
MockModelConfig
tokenizer
:
str
=
MODEL_NAME
tokenizer_mode
:
str
=
"auto"
tokenizer_revision
:
str
|
None
=
None
skip_tokenizer_init
:
bool
=
False
allowed_local_media_path
:
str
=
""
allowed_media_domains
:
list
[
str
]
|
None
=
None
class
MockLoRAResolver
(
LoRAResolver
):
class
MockLoRAResolver
(
LoRAResolver
):
async
def
resolve_lora
(
async
def
resolve_lora
(
self
,
base_model_name
:
str
,
lora_name
:
str
self
,
base_model_name
:
str
,
lora_name
:
str
...
@@ -114,6 +122,7 @@ def mock_serving_setup():
...
@@ -114,6 +122,7 @@ def mock_serving_setup():
mock_engine
.
add_lora
.
reset_mock
()
mock_engine
.
add_lora
.
reset_mock
()
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_engine
.
model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
...
tests/entrypoints/openai/test_serving_chat.py
View file @
27f4c2fd
...
@@ -346,27 +346,33 @@ class MockHFConfig:
...
@@ -346,27 +346,33 @@ class MockHFConfig:
class
MockModelConfig
:
class
MockModelConfig
:
task
=
"generate"
task
=
"generate"
runner_type
=
"generate"
runner_type
=
"generate"
tokenizer
=
MODEL_NAME
trust_remote_code
=
False
trust_remote_code
=
False
tokenizer_mode
=
"auto"
max_model_len
=
100
max_model_len
=
100
tokenizer_revision
=
None
multimodal_config
=
MultiModalConfig
()
multimodal_config
=
MultiModalConfig
()
hf_config
=
MockHFConfig
()
hf_config
=
MockHFConfig
()
logits_processors
:
list
[
str
]
|
None
=
None
logits_processors
:
list
[
str
]
|
None
=
None
logits_processor_pattern
=
None
logits_processor_pattern
=
None
diff_sampling_param
:
dict
|
None
=
None
diff_sampling_param
:
dict
|
None
=
None
allowed_local_media_path
:
str
=
""
allowed_media_domains
:
list
[
str
]
|
None
=
None
encoder_config
=
None
encoder_config
=
None
generation_config
:
str
=
"auto"
generation_config
:
str
=
"auto"
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
=
field
(
default_factory
=
dict
)
skip_tokenizer_init
=
False
def
get_diff_sampling_param
(
self
):
def
get_diff_sampling_param
(
self
):
return
self
.
diff_sampling_param
or
{}
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockRendererConfig
:
model_config
:
MockModelConfig
=
field
(
default_factory
=
MockModelConfig
)
tokenizer
=
MODEL_NAME
tokenizer_mode
=
"auto"
tokenizer_revision
=
None
skip_tokenizer_init
=
False
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
=
field
(
default_factory
=
dict
)
allowed_local_media_path
:
str
=
""
allowed_media_domains
:
list
[
str
]
|
None
=
None
def
_build_serving_chat
(
engine
:
AsyncLLM
)
->
OpenAIServingChat
:
def
_build_serving_chat
(
engine
:
AsyncLLM
)
->
OpenAIServingChat
:
models
=
OpenAIServingModels
(
models
=
OpenAIServingModels
(
engine_client
=
engine
,
engine_client
=
engine
,
...
@@ -399,6 +405,7 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
...
@@ -399,6 +405,7 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
@
dataclass
@
dataclass
class
MockEngine
:
class
MockEngine
:
model_config
:
MockModelConfig
=
field
(
default_factory
=
MockModelConfig
)
model_config
:
MockModelConfig
=
field
(
default_factory
=
MockModelConfig
)
renderer_config
:
MockRendererConfig
=
field
(
default_factory
=
MockRendererConfig
)
input_processor
:
MagicMock
=
field
(
default_factory
=
MagicMock
)
input_processor
:
MagicMock
=
field
(
default_factory
=
MagicMock
)
io_processor
:
MagicMock
=
field
(
default_factory
=
MagicMock
)
io_processor
:
MagicMock
=
field
(
default_factory
=
MagicMock
)
...
@@ -429,6 +436,7 @@ async def test_serving_chat_returns_correct_model_name():
...
@@ -429,6 +436,7 @@ async def test_serving_chat_returns_correct_model_name():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_engine
.
model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -459,6 +467,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
...
@@ -459,6 +467,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_engine
.
model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -492,6 +501,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
...
@@ -492,6 +501,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -537,6 +547,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
...
@@ -537,6 +547,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -583,6 +594,7 @@ async def test_serving_chat_could_load_correct_generation_config():
...
@@ -583,6 +594,7 @@ async def test_serving_chat_could_load_correct_generation_config():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -629,6 +641,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
...
@@ -629,6 +641,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
model_config
=
mock_model_config
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
@@ -662,6 +675,7 @@ async def test_serving_chat_data_parallel_rank_extraction():
...
@@ -662,6 +675,7 @@ async def test_serving_chat_data_parallel_rank_extraction():
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
get_tokenizer
.
return_value
=
get_tokenizer
(
MODEL_NAME
)
mock_engine
.
errored
=
False
mock_engine
.
errored
=
False
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
model_config
=
MockModelConfig
()
mock_engine
.
renderer_config
=
MockRendererConfig
(
mock_engine
.
model_config
)
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
input_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
mock_engine
.
io_processor
=
MagicMock
()
...
...
tests/entrypoints/openai/test_serving_engine.py
View file @
27f4c2fd
...
@@ -7,7 +7,7 @@ from unittest.mock import Mock
...
@@ -7,7 +7,7 @@ from unittest.mock import Mock
import
pytest
import
pytest
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.tokenizers
import
MistralTokenizer
from
vllm.tokenizers
import
MistralTokenizer
...
@@ -19,10 +19,16 @@ def serving() -> OpenAIServing:
...
@@ -19,10 +19,16 @@ def serving() -> OpenAIServing:
# Create minimal mocks
# Create minimal mocks
engine_client
=
Mock
()
engine_client
=
Mock
()
model_config
=
Mock
(
spec
=
ModelConfig
)
model_config
=
Mock
(
spec
=
ModelConfig
)
model_config
.
max_model_len
=
32768
model_config
.
max_model_len
=
32768
renderer_config
=
Mock
(
spec
=
RendererConfig
)
renderer_config
.
model_config
=
model_config
models
=
Mock
(
spec
=
OpenAIServingModels
)
models
=
Mock
(
spec
=
OpenAIServingModels
)
models
.
model_config
=
model_config
models
.
model_config
=
model_config
models
.
renderer_config
=
renderer_config
models
.
input_processor
=
Mock
()
models
.
input_processor
=
Mock
()
models
.
io_processor
=
Mock
()
models
.
io_processor
=
Mock
()
...
...
tests/entrypoints/openai/test_serving_models.py
View file @
27f4c2fd
...
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock
...
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock
import
pytest
import
pytest
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
ErrorResponse
,
ErrorResponse
,
...
@@ -27,9 +27,15 @@ LORA_UNLOADING_SUCCESS_MESSAGE = (
...
@@ -27,9 +27,15 @@ LORA_UNLOADING_SUCCESS_MESSAGE = (
async
def
_async_serving_models_init
()
->
OpenAIServingModels
:
async
def
_async_serving_models_init
()
->
OpenAIServingModels
:
mock_engine_client
=
MagicMock
(
spec
=
EngineClient
)
mock_engine_client
=
MagicMock
(
spec
=
EngineClient
)
# Set the max_model_len attribute to avoid missing attribute
# Set the max_model_len attribute to avoid missing attribute
mock_model_config
=
MagicMock
(
spec
=
ModelConfig
)
mock_model_config
=
MagicMock
(
spec
=
ModelConfig
)
mock_model_config
.
max_model_len
=
2048
mock_model_config
.
max_model_len
=
2048
mock_renderer_config
=
MagicMock
(
spec
=
RendererConfig
)
mock_renderer_config
.
model_config
=
mock_model_config
mock_engine_client
.
model_config
=
mock_model_config
mock_engine_client
.
model_config
=
mock_model_config
mock_engine_client
.
renderer_config
=
mock_renderer_config
mock_engine_client
.
input_processor
=
MagicMock
()
mock_engine_client
.
input_processor
=
MagicMock
()
mock_engine_client
.
io_processor
=
MagicMock
()
mock_engine_client
.
io_processor
=
MagicMock
()
...
...
tests/entrypoints/test_chat_utils.py
View file @
27f4c2fd
...
@@ -12,7 +12,7 @@ from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy
...
@@ -12,7 +12,7 @@ from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.image
import
ImageAsset
from
vllm.assets.image
import
ImageAsset
from
vllm.assets.video
import
VideoAsset
from
vllm.assets.video
import
VideoAsset
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
_try_extract_ast
,
_try_extract_ast
,
apply_mistral_chat_template
,
apply_mistral_chat_template
,
...
@@ -233,7 +233,7 @@ def test_parse_chat_messages_single_image(
...
@@ -233,7 +233,7 @@ def test_parse_chat_messages_single_image(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -265,7 +265,7 @@ def test_parse_chat_messages_single_image_with_uuid(
...
@@ -265,7 +265,7 @@ def test_parse_chat_messages_single_image_with_uuid(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -295,7 +295,7 @@ def test_parse_chat_messages_single_empty_image_with_uuid(
...
@@ -295,7 +295,7 @@ def test_parse_chat_messages_single_empty_image_with_uuid(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -328,7 +328,7 @@ def test_parse_chat_messages_single_image_with_bad_uuid_format(
...
@@ -328,7 +328,7 @@ def test_parse_chat_messages_single_image_with_bad_uuid_format(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -369,7 +369,7 @@ def test_parse_chat_messages_multiple_images_with_uuids(
...
@@ -369,7 +369,7 @@ def test_parse_chat_messages_multiple_images_with_uuids(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -409,7 +409,7 @@ def test_parse_chat_messages_multiple_empty_images_with_uuids(
...
@@ -409,7 +409,7 @@ def test_parse_chat_messages_multiple_empty_images_with_uuids(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -451,7 +451,7 @@ def test_parse_chat_messages_mixed_empty_images_with_uuids(
...
@@ -451,7 +451,7 @@ def test_parse_chat_messages_mixed_empty_images_with_uuids(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -485,7 +485,7 @@ async def test_parse_chat_messages_single_image_with_uuid_async(
...
@@ -485,7 +485,7 @@ async def test_parse_chat_messages_single_image_with_uuid_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -516,7 +516,7 @@ async def test_parse_chat_messages_empty_image_with_uuid_async(
...
@@ -516,7 +516,7 @@ async def test_parse_chat_messages_empty_image_with_uuid_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -554,7 +554,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_async(
...
@@ -554,7 +554,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -595,7 +595,7 @@ async def test_parse_chat_messages_multiple_empty_images_with_uuids_async(
...
@@ -595,7 +595,7 @@ async def test_parse_chat_messages_multiple_empty_images_with_uuids_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -634,7 +634,7 @@ async def test_parse_chat_messages_multiple_images_with_partial_uuids_async(
...
@@ -634,7 +634,7 @@ async def test_parse_chat_messages_multiple_images_with_partial_uuids_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -660,7 +660,7 @@ def test_parse_chat_messages_empty_system(
...
@@ -660,7 +660,7 @@ def test_parse_chat_messages_empty_system(
"content"
:
[{
"type"
:
"text"
,
"text"
:
"Who are you?"
}],
"content"
:
[{
"type"
:
"text"
,
"text"
:
"Who are you?"
}],
},
},
],
],
mistral_model_config
,
RendererConfig
(
model_config
=
mistral_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
assert
conversation
==
[
assert
conversation
==
[
...
@@ -677,7 +677,7 @@ def test_parse_chat_messages_empty_system(
...
@@ -677,7 +677,7 @@ def test_parse_chat_messages_empty_system(
"content"
:
[{
"type"
:
"text"
,
"text"
:
"Who are you?"
}],
"content"
:
[{
"type"
:
"text"
,
"text"
:
"Who are you?"
}],
},
},
],
],
mistral_model_config
,
RendererConfig
(
model_config
=
mistral_model_config
)
,
content_format
=
"openai"
,
content_format
=
"openai"
,
)
)
assert
conversation
==
[
assert
conversation
==
[
...
@@ -701,7 +701,7 @@ async def test_parse_chat_messages_single_image_async(
...
@@ -701,7 +701,7 @@ async def test_parse_chat_messages_single_image_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -730,7 +730,7 @@ def test_parse_chat_messages_multiple_images(
...
@@ -730,7 +730,7 @@ def test_parse_chat_messages_multiple_images(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -758,7 +758,7 @@ def test_parse_chat_messages_empty_pil_image_with_uuid(
...
@@ -758,7 +758,7 @@ def test_parse_chat_messages_empty_pil_image_with_uuid(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -786,7 +786,7 @@ def test_parse_chat_messages_empty_image_embeds_with_uuid(
...
@@ -786,7 +786,7 @@ def test_parse_chat_messages_empty_image_embeds_with_uuid(
],
],
}
}
],
],
phi3v_model_config_image_embeds
,
RendererConfig
(
model_config
=
phi3v_model_config_image_embeds
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -818,7 +818,7 @@ def test_parse_chat_messages_empty_audio_embeds_with_uuid(
...
@@ -818,7 +818,7 @@ def test_parse_chat_messages_empty_audio_embeds_with_uuid(
],
],
}
}
],
],
audio_embeds_model_config
,
RendererConfig
(
model_config
=
audio_embeds_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -858,7 +858,7 @@ def test_parse_chat_messages_audio_embeds_with_string(
...
@@ -858,7 +858,7 @@ def test_parse_chat_messages_audio_embeds_with_string(
],
],
}
}
],
],
audio_embeds_model_config
,
RendererConfig
(
model_config
=
audio_embeds_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -900,7 +900,7 @@ async def test_parse_chat_messages_audio_embeds_async(
...
@@ -900,7 +900,7 @@ async def test_parse_chat_messages_audio_embeds_async(
],
],
}
}
],
],
audio_embeds_model_config
,
RendererConfig
(
model_config
=
audio_embeds_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1108,7 +1108,7 @@ async def test_parse_chat_messages_empty_image_embeds_with_uuid_async(
...
@@ -1108,7 +1108,7 @@ async def test_parse_chat_messages_empty_image_embeds_with_uuid_async(
],
],
}
}
],
],
phi3v_model_config_image_embeds
,
RendererConfig
(
model_config
=
phi3v_model_config_image_embeds
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1144,7 +1144,7 @@ async def test_parse_chat_messages_multiple_images_async(
...
@@ -1144,7 +1144,7 @@ async def test_parse_chat_messages_multiple_images_async(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1176,7 +1176,7 @@ def test_parse_chat_messages_placeholder_already_in_prompt(
...
@@ -1176,7 +1176,7 @@ def test_parse_chat_messages_placeholder_already_in_prompt(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
assert
conversation
==
[
assert
conversation
==
[
...
@@ -1208,7 +1208,7 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
...
@@ -1208,7 +1208,7 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1245,7 +1245,7 @@ def test_parse_chat_messages_multiple_images_across_messages(
...
@@ -1245,7 +1245,7 @@ def test_parse_chat_messages_multiple_images_across_messages(
],
],
},
},
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1289,7 +1289,7 @@ def test_parse_chat_messages_multiple_images_with_uuids_across_messages(
...
@@ -1289,7 +1289,7 @@ def test_parse_chat_messages_multiple_images_with_uuids_across_messages(
],
],
},
},
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1314,7 +1314,7 @@ def test_parse_chat_messages_context_text_format(
...
@@ -1314,7 +1314,7 @@ def test_parse_chat_messages_context_text_format(
{
"role"
:
"assistant"
,
"content"
:
"Some stuff."
},
{
"role"
:
"assistant"
,
"content"
:
"Some stuff."
},
{
"role"
:
"user"
,
"content"
:
"What about this one?"
},
{
"role"
:
"user"
,
"content"
:
"What about this one?"
},
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"openai"
,
content_format
=
"openai"
,
)
)
...
@@ -1367,7 +1367,7 @@ def test_parse_chat_messages_rejects_too_many_images_in_one_message(
...
@@ -1367,7 +1367,7 @@ def test_parse_chat_messages_rejects_too_many_images_in_one_message(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1410,7 +1410,7 @@ def test_parse_chat_messages_rejects_too_many_images_across_messages(
...
@@ -1410,7 +1410,7 @@ def test_parse_chat_messages_rejects_too_many_images_across_messages(
],
],
},
},
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1430,7 +1430,7 @@ def test_parse_chat_messages_multiple_images_uncommon_input(
...
@@ -1430,7 +1430,7 @@ def test_parse_chat_messages_multiple_images_uncommon_input(
],
],
}
}
],
],
phi3v_model_config
,
RendererConfig
(
model_config
=
phi3v_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1464,7 +1464,7 @@ def test_parse_chat_messages_multiple_images_interleave(
...
@@ -1464,7 +1464,7 @@ def test_parse_chat_messages_multiple_images_interleave(
],
],
}
}
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1500,7 +1500,7 @@ async def test_parse_chat_messages_multiple_images_interleave_async(
...
@@ -1500,7 +1500,7 @@ async def test_parse_chat_messages_multiple_images_interleave_async(
],
],
}
}
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1545,7 +1545,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_interleave_async(
...
@@ -1545,7 +1545,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_interleave_async(
],
],
}
}
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1583,7 +1583,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave(
...
@@ -1583,7 +1583,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave(
],
],
},
},
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1631,7 +1631,7 @@ def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interl
...
@@ -1631,7 +1631,7 @@ def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interl
],
],
},
},
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1675,7 +1675,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
...
@@ -1675,7 +1675,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
],
],
},
},
],
],
qwen25omni_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
qwen25omni_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1743,7 +1743,7 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl
...
@@ -1743,7 +1743,7 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl
],
],
},
},
],
],
qwen25omni_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
qwen25omni_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1813,7 +1813,7 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes
...
@@ -1813,7 +1813,7 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes
],
],
},
},
],
],
qwen25omni_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
qwen25omni_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1879,7 +1879,7 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message
...
@@ -1879,7 +1879,7 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message
],
],
},
},
],
],
qwen25omni_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
qwen25omni_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1927,7 +1927,7 @@ def test_parse_chat_messages_multiple_images_interleave_with_placeholders(
...
@@ -1927,7 +1927,7 @@ def test_parse_chat_messages_multiple_images_interleave_with_placeholders(
],
],
}
}
],
],
phi3v_model_config_mm_interleaved
,
RendererConfig
(
model_config
=
phi3v_model_config_mm_interleaved
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -1945,24 +1945,11 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
...
@@ -1945,24 +1945,11 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model
)
model
,
tokenizer
=
model_info
.
tokenizer
or
model
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
# Build the tokenizer
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model
,
renderer_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
tools
=
(
tools
=
(
...
@@ -1985,7 +1972,7 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
...
@@ -1985,7 +1972,7 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
tokenizer
,
tokenizer
,
chat_template
=
None
,
chat_template
=
None
,
tools
=
tools
,
tools
=
tools
,
model_config
=
model_config
,
model_config
=
renderer_config
.
model_config
,
)
)
assert
isinstance
(
chat_template
,
str
)
assert
isinstance
(
chat_template
,
str
)
...
@@ -2047,24 +2034,11 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
...
@@ -2047,24 +2034,11 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
"enable_thinking"
:
True
,
"enable_thinking"
:
True
,
}
}
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model
)
model
,
tokenizer
=
model_info
.
tokenizer
or
model
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
# Build the tokenizer
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model
,
renderer_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
# Test detecting the tokenizer's chat_template
# Test detecting the tokenizer's chat_template
...
@@ -2072,7 +2046,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
...
@@ -2072,7 +2046,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
tokenizer
,
tokenizer
,
chat_template
=
None
,
chat_template
=
None
,
tools
=
tools
,
tools
=
tools
,
model_config
=
model_config
,
model_config
=
renderer_config
.
model_config
,
)
)
with
pytest
.
raises
(
with
pytest
.
raises
(
ValueError
,
match
=
"Found unexpected chat template kwargs from request"
ValueError
,
match
=
"Found unexpected chat template kwargs from request"
...
@@ -2143,23 +2117,11 @@ def test_resolve_content_format_hf_defined(model, expected_format):
...
@@ -2143,23 +2117,11 @@ def test_resolve_content_format_hf_defined(model, expected_format):
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model
)
model
,
tokenizer
=
model_info
.
tokenizer
or
model
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model
,
renderer_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
# Test detecting the tokenizer's chat_template
# Test detecting the tokenizer's chat_template
...
@@ -2167,7 +2129,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
...
@@ -2167,7 +2129,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
tokenizer
,
tokenizer
,
chat_template
=
None
,
chat_template
=
None
,
tools
=
None
,
tools
=
None
,
model_config
=
model_config
,
model_config
=
renderer_config
.
model_config
,
)
)
assert
isinstance
(
chat_template
,
str
)
assert
isinstance
(
chat_template
,
str
)
...
@@ -2181,7 +2143,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
...
@@ -2181,7 +2143,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
None
,
None
,
"auto"
,
"auto"
,
tokenizer
,
tokenizer
,
model_config
=
model
_config
,
renderer_config
=
renderer
_config
,
)
)
assert
resolved_format
==
expected_format
assert
resolved_format
==
expected_format
...
@@ -2203,23 +2165,11 @@ def test_resolve_content_format_fallbacks(model, expected_format):
...
@@ -2203,23 +2165,11 @@ def test_resolve_content_format_fallbacks(model, expected_format):
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model
)
model
,
tokenizer
=
model_info
.
tokenizer
or
model
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model
_config
.
tokenizer
,
renderer
_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
# Test detecting the tokenizer's chat_template
# Test detecting the tokenizer's chat_template
...
@@ -2227,7 +2177,7 @@ def test_resolve_content_format_fallbacks(model, expected_format):
...
@@ -2227,7 +2177,7 @@ def test_resolve_content_format_fallbacks(model, expected_format):
tokenizer
,
tokenizer
,
chat_template
=
None
,
chat_template
=
None
,
tools
=
None
,
tools
=
None
,
model_config
=
model_config
,
model_config
=
renderer_config
.
model_config
,
)
)
assert
isinstance
(
chat_template
,
str
)
assert
isinstance
(
chat_template
,
str
)
...
@@ -2241,7 +2191,7 @@ def test_resolve_content_format_fallbacks(model, expected_format):
...
@@ -2241,7 +2191,7 @@ def test_resolve_content_format_fallbacks(model, expected_format):
None
,
None
,
"auto"
,
"auto"
,
tokenizer
,
tokenizer
,
model_config
=
model
_config
,
renderer_config
=
renderer
_config
,
)
)
assert
resolved_format
==
expected_format
assert
resolved_format
==
expected_format
...
@@ -2272,15 +2222,13 @@ def test_resolve_content_format_fallbacks(model, expected_format):
...
@@ -2272,15 +2222,13 @@ def test_resolve_content_format_fallbacks(model, expected_format):
],
],
)
)
def
test_resolve_content_format_examples
(
template_path
,
expected_format
):
def
test_resolve_content_format_examples
(
template_path
,
expected_format
):
model_config
=
ModelConfig
(
model
=
PHI3V_MODEL_ID
# Dummy
PHI3V_MODEL_ID
,
# Dummy
model_config
=
ModelConfig
(
model
,
trust_remote_code
=
True
)
tokenizer
=
PHI3V_MODEL_ID
,
# Dummy
renderer_config
=
RendererConfig
(
model_config
=
model_config
,
tokenizer
=
model
)
trust_remote_code
=
True
,
)
dummy_tokenizer
=
get_tokenizer
(
dummy_tokenizer
=
get_tokenizer
(
PHI3V_MODEL_ID
,
# Dummy
renderer_config
.
tokenizer
,
trust_remote_code
=
model
_config
.
trust_remote_code
,
trust_remote_code
=
renderer
_config
.
trust_remote_code
,
)
)
dummy_tokenizer
.
chat_template
=
None
dummy_tokenizer
.
chat_template
=
None
...
@@ -2297,7 +2245,7 @@ def test_resolve_content_format_examples(template_path, expected_format):
...
@@ -2297,7 +2245,7 @@ def test_resolve_content_format_examples(template_path, expected_format):
None
,
None
,
"auto"
,
"auto"
,
dummy_tokenizer
,
dummy_tokenizer
,
model_config
=
model
_config
,
renderer_config
=
renderer
_config
,
)
)
assert
resolved_format
==
expected_format
assert
resolved_format
==
expected_format
...
@@ -2332,7 +2280,7 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config):
...
@@ -2332,7 +2280,7 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config):
conversation_with_thinking
,
_
,
_
=
parse_chat_messages
(
conversation_with_thinking
,
_
,
_
=
parse_chat_messages
(
messages
,
messages
,
mistral_model_config
,
RendererConfig
(
model_config
=
mistral_model_config
)
,
content_format
=
"openai"
,
content_format
=
"openai"
,
)
)
...
@@ -2432,7 +2380,7 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
...
@@ -2432,7 +2380,7 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
],
],
}
}
],
],
qwen2_audio_model_config
,
RendererConfig
(
model_config
=
qwen2_audio_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
@@ -2466,7 +2414,7 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
...
@@ -2466,7 +2414,7 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
],
],
}
}
],
],
qwen2_audio_model_config
,
RendererConfig
(
model_config
=
qwen2_audio_model_config
)
,
content_format
=
"string"
,
content_format
=
"string"
,
)
)
...
...
tests/lora/test_lora_manager.py
View file @
27f4c2fd
...
@@ -8,7 +8,7 @@ import torch
...
@@ -8,7 +8,7 @@ import torch
from
safetensors.torch
import
load_file
from
safetensors.torch
import
load_file
from
torch
import
nn
from
torch
import
nn
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
,
VllmConfig
from
vllm.config.lora
import
LoRAConfig
from
vllm.config.lora
import
LoRAConfig
from
vllm.lora.layers
import
(
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
ColumnParallelLinearWithLoRA
,
...
@@ -422,7 +422,11 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, tmp_pa
...
@@ -422,7 +422,11 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, tmp_pa
)
)
model_config
=
ModelConfig
(
max_model_len
=
16
)
model_config
=
ModelConfig
(
max_model_len
=
16
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
lora_config
=
lora_config
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
lora_config
=
lora_config
,
)
vllm_config
.
scheduler_config
.
max_num_seqs
=
4
vllm_config
.
scheduler_config
.
max_num_seqs
=
4
vllm_config
.
scheduler_config
.
max_num_batched_tokens
=
2
vllm_config
.
scheduler_config
.
max_num_batched_tokens
=
2
...
@@ -525,7 +529,11 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, tmp_path
...
@@ -525,7 +529,11 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, tmp_path
)
)
model_config
=
ModelConfig
(
max_model_len
=
16
)
model_config
=
ModelConfig
(
max_model_len
=
16
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
lora_config
=
lora_config
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
lora_config
=
lora_config
,
)
vllm_config
.
scheduler_config
.
max_num_seqs
=
4
vllm_config
.
scheduler_config
.
max_num_seqs
=
4
vllm_config
.
scheduler_config
.
max_num_batched_tokens
=
2
vllm_config
.
scheduler_config
.
max_num_batched_tokens
=
2
...
...
tests/lora/test_worker.py
View file @
27f4c2fd
...
@@ -11,6 +11,7 @@ from vllm.config import (
...
@@ -11,6 +11,7 @@ from vllm.config import (
DeviceConfig
,
DeviceConfig
,
ModelConfig
,
ModelConfig
,
ParallelConfig
,
ParallelConfig
,
RendererConfig
,
SchedulerConfig
,
SchedulerConfig
,
VllmConfig
,
VllmConfig
,
)
)
...
@@ -43,6 +44,7 @@ def test_worker_apply_lora(qwen3_lora_files):
...
@@ -43,6 +44,7 @@ def test_worker_apply_lora(qwen3_lora_files):
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
load_config
=
LoadConfig
(
load_config
=
LoadConfig
(
download_dir
=
None
,
download_dir
=
None
,
load_format
=
"dummy"
,
load_format
=
"dummy"
,
...
...
tests/model_executor/test_model_load_with_params.py
View file @
27f4c2fd
...
@@ -42,8 +42,10 @@ def test_model_loading_with_params(vllm_runner, monkeypatch):
...
@@ -42,8 +42,10 @@ def test_model_loading_with_params(vllm_runner, monkeypatch):
"Write a short story about a robot that dreams for the first time.
\n
"
"Write a short story about a robot that dreams for the first time.
\n
"
)
)
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
llm_engine
=
vllm_model
.
llm
.
llm_engine
model_tokenizer
=
vllm_model
.
llm
.
llm_engine
.
tokenizer
model_config
=
llm_engine
.
model_config
renderer_config
=
llm_engine
.
renderer_config
tokenizer
=
llm_engine
.
tokenizer
# asserts on the bert model config file
# asserts on the bert model config file
assert
model_config
.
encoder_config
[
"max_seq_length"
]
==
512
assert
model_config
.
encoder_config
[
"max_seq_length"
]
==
512
...
@@ -54,8 +56,8 @@ def test_model_loading_with_params(vllm_runner, monkeypatch):
...
@@ -54,8 +56,8 @@ def test_model_loading_with_params(vllm_runner, monkeypatch):
assert
model_config
.
pooler_config
.
normalize
assert
model_config
.
pooler_config
.
normalize
# asserts on the tokenizer loaded
# asserts on the tokenizer loaded
assert
model
_config
.
tokenizer
==
"BAAI/bge-base-en-v1.5"
assert
renderer
_config
.
tokenizer
==
"BAAI/bge-base-en-v1.5"
assert
model_
tokenizer
.
model_max_length
==
512
assert
tokenizer
.
model_max_length
==
512
def
check_model
(
model
):
def
check_model
(
model
):
assert
isinstance
(
model
,
BertEmbeddingModel
)
assert
isinstance
(
model
,
BertEmbeddingModel
)
...
@@ -86,8 +88,10 @@ def test_roberta_model_loading_with_params(vllm_runner, monkeypatch):
...
@@ -86,8 +88,10 @@ def test_roberta_model_loading_with_params(vllm_runner, monkeypatch):
"Write a short story about a robot that dreams for the first time.
\n
"
"Write a short story about a robot that dreams for the first time.
\n
"
)
)
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
llm_engine
=
vllm_model
.
llm
.
llm_engine
model_tokenizer
=
vllm_model
.
llm
.
llm_engine
.
tokenizer
model_config
=
llm_engine
.
model_config
renderer_config
=
llm_engine
.
renderer_config
tokenizer
=
llm_engine
.
tokenizer
# asserts on the bert model config file
# asserts on the bert model config file
assert
model_config
.
encoder_config
[
"max_seq_length"
]
==
512
assert
model_config
.
encoder_config
[
"max_seq_length"
]
==
512
...
@@ -98,8 +102,8 @@ def test_roberta_model_loading_with_params(vllm_runner, monkeypatch):
...
@@ -98,8 +102,8 @@ def test_roberta_model_loading_with_params(vllm_runner, monkeypatch):
assert
model_config
.
pooler_config
.
normalize
assert
model_config
.
pooler_config
.
normalize
# asserts on the tokenizer loaded
# asserts on the tokenizer loaded
assert
model
_config
.
tokenizer
==
"intfloat/multilingual-e5-base"
assert
renderer
_config
.
tokenizer
==
"intfloat/multilingual-e5-base"
assert
model_
tokenizer
.
model_max_length
==
512
assert
tokenizer
.
model_max_length
==
512
def
check_model
(
model
):
def
check_model
(
model
):
assert
isinstance
(
model
,
RobertaEmbeddingModel
)
assert
isinstance
(
model
,
RobertaEmbeddingModel
)
...
@@ -128,7 +132,7 @@ def test_facebook_roberta_model_loading_with_params(vllm_runner, monkeypatch):
...
@@ -128,7 +132,7 @@ def test_facebook_roberta_model_loading_with_params(vllm_runner, monkeypatch):
"Write a short story about a robot that dreams for the first time.
\n
"
"Write a short story about a robot that dreams for the first time.
\n
"
)
)
assert
vllm_model
.
llm
.
llm_engine
.
model
_config
.
tokenizer
==
model_name
assert
vllm_model
.
llm
.
llm_engine
.
renderer
_config
.
tokenizer
==
model_name
def
check_model
(
model
):
def
check_model
(
model
):
assert
isinstance
(
model
,
RobertaEmbeddingModel
)
assert
isinstance
(
model
,
RobertaEmbeddingModel
)
...
...
tests/models/language/pooling/test_gritlm.py
View file @
27f4c2fd
...
@@ -6,7 +6,7 @@ import pytest
...
@@ -6,7 +6,7 @@ import pytest
from
scipy.spatial.distance
import
cosine
from
scipy.spatial.distance
import
cosine
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
RendererConfig
from
....utils
import
RemoteOpenAIServer
from
....utils
import
RemoteOpenAIServer
...
@@ -31,7 +31,8 @@ def test_find_array():
...
@@ -31,7 +31,8 @@ def test_find_array():
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
seed
=
0
,
seed
=
0
,
)
)
pooling
=
GritLMMeanPool
(
model_config
=
model_config
)
renderer_config
=
RendererConfig
(
model_config
=
model_config
)
pooling
=
GritLMMeanPool
(
renderer_config
=
renderer_config
)
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
arr
=
_arr
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
])
...
...
tests/models/multimodal/processing/test_common.py
View file @
27f4c2fd
...
@@ -25,7 +25,6 @@ from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingC
...
@@ -25,7 +25,6 @@ from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingC
from
vllm.tokenizers
import
(
from
vllm.tokenizers
import
(
MistralTokenizer
,
MistralTokenizer
,
TokenizerLike
,
TokenizerLike
,
cached_tokenizer_from_config
,
)
)
from
....multimodal.utils
import
random_audio
,
random_image
,
random_video
from
....multimodal.utils
import
random_audio
,
random_image
,
random_video
...
@@ -212,31 +211,20 @@ def _test_processing_correctness(
...
@@ -212,31 +211,20 @@ def _test_processing_correctness(
else
:
else
:
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id_or_arch
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id_or_arch
)
model_id
=
model_id_or_arch
model_id
=
model_id_or_arch
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
model_config
=
ModelConfig
(
renderer_config
=
model_info
.
build_renderer_config
(
model_id
,
model
=
model_id
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
# Ensure that the cache can fit all of the data
# Ensure that the cache can fit all of the data
mm_processor_cache_gb
=
2048
,
mm_processor_cache_gb
=
2048
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
)
model_config
=
renderer_config
.
model_config
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
factories
=
model_cls
.
_processor_factory
factories
=
model_cls
.
_processor_factory
ctx
=
InputProcessingContext
(
ctx
=
InputProcessingContext
.
from_config
(
renderer_config
)
model_config
,
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
)
cache
=
MultiModalProcessorOnlyCache
(
model_config
)
cache
=
MultiModalProcessorOnlyCache
(
model_config
)
processing_info
=
factories
.
info
(
ctx
)
processing_info
=
factories
.
info
(
ctx
)
...
...
tests/models/multimodal/processing/test_glm4_1v.py
View file @
27f4c2fd
...
@@ -40,7 +40,7 @@ def test_processor_override(
...
@@ -40,7 +40,7 @@ def test_processor_override(
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"video"
:
1
},
limit_mm_per_prompt
=
{
"video"
:
1
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
tokenizer
=
processor
.
info
.
get_tokenizer
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
hf_processor_mm_kwargs
=
{
"fps"
:
fps
}
hf_processor_mm_kwargs
=
{
"fps"
:
fps
}
...
@@ -79,7 +79,7 @@ def test_video_loader_consistency(
...
@@ -79,7 +79,7 @@ def test_video_loader_consistency(
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"video"
:
1
},
limit_mm_per_prompt
=
{
"video"
:
1
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
hf_processor_mm_kwargs
=
{
"fps"
:
fps
}
hf_processor_mm_kwargs
=
{
"fps"
:
fps
}
# Build the image str / prompt based on the number of images we pass
# Build the image str / prompt based on the number of images we pass
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment