Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2f308214
Unverified
Commit
2f308214
authored
Feb 13, 2026
by
Cyrus Leung
Committed by
GitHub
Feb 12, 2026
Browse files
[Refactor] Pass full VllmConfig to Renderer (#34485)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
1b4e8e53
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
137 additions
and
86 deletions
+137
-86
tests/entrypoints/openai/test_chat_error.py
tests/entrypoints/openai/test_chat_error.py
+6
-1
tests/entrypoints/openai/test_completion_error.py
tests/entrypoints/openai/test_completion_error.py
+6
-1
tests/entrypoints/openai/test_lora_resolvers.py
tests/entrypoints/openai/test_lora_resolvers.py
+6
-1
tests/entrypoints/openai/test_serving_chat.py
tests/entrypoints/openai/test_serving_chat.py
+14
-3
tests/renderers/test_completions.py
tests/renderers/test_completions.py
+32
-23
tests/renderers/test_mistral.py
tests/renderers/test_mistral.py
+9
-1
tests/test_inputs.py
tests/test_inputs.py
+3
-2
vllm/inputs/preprocess.py
vllm/inputs/preprocess.py
+5
-6
vllm/renderers/base.py
vllm/renderers/base.py
+7
-7
vllm/renderers/deepseek_v32.py
vllm/renderers/deepseek_v32.py
+7
-6
vllm/renderers/grok2.py
vllm/renderers/grok2.py
+7
-6
vllm/renderers/hf.py
vllm/renderers/hf.py
+8
-7
vllm/renderers/mistral.py
vllm/renderers/mistral.py
+7
-6
vllm/renderers/registry.py
vllm/renderers/registry.py
+9
-5
vllm/renderers/terratorch.py
vllm/renderers/terratorch.py
+7
-6
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+1
-1
vllm/v1/engine/input_processor.py
vllm/v1/engine/input_processor.py
+2
-3
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+1
-1
No files found.
tests/entrypoints/openai/test_chat_error.py
View file @
2f308214
...
...
@@ -59,11 +59,16 @@ class MockModelConfig:
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
model_config
,
MockVllmConfig
(
model_config
)
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
...
...
tests/entrypoints/openai/test_completion_error.py
View file @
2f308214
...
...
@@ -58,6 +58,11 @@ class MockModelConfig:
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
def
_build_serving_completion
(
engine
:
AsyncLLM
)
->
OpenAIServingCompletion
:
models
=
OpenAIServingModels
(
engine_client
=
engine
,
...
...
@@ -74,7 +79,7 @@ def _build_renderer(model_config: MockModelConfig):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
model_config
,
MockVllmConfig
(
model_config
)
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
...
...
tests/entrypoints/openai/test_lora_resolvers.py
View file @
2f308214
...
...
@@ -57,6 +57,11 @@ class MockModelConfig:
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
class
MockLoRAResolver
(
LoRAResolver
):
async
def
resolve_lora
(
self
,
base_model_name
:
str
,
lora_name
:
str
...
...
@@ -91,7 +96,7 @@ def _build_renderer(model_config: MockModelConfig):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
model_config
,
MockVllmConfig
(
model_config
)
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
...
...
tests/entrypoints/openai/test_serving_chat.py
View file @
2f308214
...
...
@@ -534,11 +534,16 @@ class MockModelConfig:
return
self
.
diff_sampling_param
or
{}
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
model_config
,
MockVllmConfig
(
model_config
)
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
...
...
@@ -749,7 +754,10 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
mock_engine
.
io_processor
=
MagicMock
()
mock_tokenizer
=
MagicMock
(
spec
=
MistralTokenizer
)
mock_renderer
=
MistralRenderer
(
mock_engine
.
model_config
,
tokenizer_kwargs
=
{})
mock_renderer
=
MistralRenderer
(
MockVllmConfig
(
mock_engine
.
model_config
),
tokenizer_kwargs
=
{},
)
mock_renderer
.
_tokenizer
=
mock_tokenizer
# Force the Mistral chat template renderer to return token IDs.
# Choose a prompt length that is < max_model_len, but large enough that
...
...
@@ -788,7 +796,10 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
mock_engine
.
io_processor
=
MagicMock
()
mock_tokenizer
=
MagicMock
(
spec
=
MistralTokenizer
)
mock_renderer
=
MistralRenderer
(
mock_engine
.
model_config
,
tokenizer_kwargs
=
{})
mock_renderer
=
MistralRenderer
(
MockVllmConfig
(
mock_engine
.
model_config
),
tokenizer_kwargs
=
{},
)
mock_renderer
.
_tokenizer
=
mock_tokenizer
# prompt_token_ids length == max_model_len should be rejected for
# completion-like requests (ChatCompletionRequest).
...
...
tests/renderers/test_completions.py
View file @
2f308214
...
...
@@ -40,6 +40,11 @@ class MockModelConfig:
is_encoder_decoder
:
bool
=
False
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
@
dataclass
class
DummyTokenizer
:
truncation_side
:
str
=
"left"
...
...
@@ -72,7 +77,7 @@ def _build_renderer(
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
renderer
=
HfRenderer
(
model_config
,
MockVllmConfig
(
model_config
)
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
...
...
@@ -104,14 +109,14 @@ class TestValidatePrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
with
pytest
.
raises
(
ValueError
,
match
=
"at least one prompt"
):
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
[]))
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
model_
config
,
[]))
def
test_invalid_type
(
self
):
renderer
=
_build_renderer
(
MockModelConfig
())
with
pytest
.
raises
(
TypeError
,
match
=
"should be a list of integers"
):
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
[[
1
,
2
],
[
"foo"
,
"bar"
]])
# type: ignore[arg-type]
_preprocess_prompt
(
renderer
.
model_
config
,
[[
1
,
2
],
[
"foo"
,
"bar"
]])
# type: ignore[arg-type]
)
...
...
@@ -120,7 +125,9 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
tokens
=
[
101
,
7592
,
2088
]
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
tokens
))
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
model_config
,
tokens
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
TokenizeParams
(
max_total_tokens
=
100
),
...
...
@@ -134,7 +141,7 @@ class TestRenderPrompt:
token_lists
=
[[
101
,
7592
,
2088
],
[
102
,
1234
,
5678
,
9012
],
[
103
,
4567
]]
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
token_lists
)
_preprocess_prompt
(
renderer
.
model_
config
,
token_lists
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -151,7 +158,7 @@ class TestRenderPrompt:
text_input
=
"x"
*
10
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
text_input
)
_preprocess_prompt
(
renderer
.
model_
config
,
text_input
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -166,7 +173,7 @@ class TestRenderPrompt:
text_list_input
=
[
"x"
*
10
,
"x"
*
12
,
"x"
*
14
]
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
text_list_input
)
_preprocess_prompt
(
renderer
.
model_
config
,
text_list_input
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -181,7 +188,7 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
"x"
*
200
)
_preprocess_prompt
(
renderer
.
model_
config
,
"x"
*
200
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -195,7 +202,7 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
"x"
*
200
)
_preprocess_prompt
(
renderer
.
model_
config
,
"x"
*
200
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -209,7 +216,7 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
"x"
*
200
)
_preprocess_prompt
(
renderer
.
model_
config
,
"x"
*
200
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -224,7 +231,7 @@ class TestRenderPrompt:
long_tokens
=
[
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
]
# 10 tokens
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
long_tokens
)
_preprocess_prompt
(
renderer
.
model_
config
,
long_tokens
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -240,7 +247,7 @@ class TestRenderPrompt:
long_tokens
=
[
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
]
# 10 tokens
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
long_tokens
)
_preprocess_prompt
(
renderer
.
model_
config
,
long_tokens
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -257,7 +264,7 @@ class TestRenderPrompt:
# Exceeds max_total_tokens and max_total_tokens * VLLM_MAX_CHARS_PER_TOKEN
long_tokens
=
"x"
*
150
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
long_tokens
)
_preprocess_prompt
(
renderer
.
model_
config
,
long_tokens
)
)
with
pytest
.
raises
(
...
...
@@ -278,7 +285,7 @@ class TestRenderPrompt:
# Exceeds max_total_tokens but not max_total_tokens * VLLM_MAX_CHARS_PER_TOKEN
long_tokens
=
"x"
*
150
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
long_tokens
)
_preprocess_prompt
(
renderer
.
model_
config
,
long_tokens
)
)
with
pytest
.
raises
(
...
...
@@ -299,7 +306,7 @@ class TestRenderPrompt:
long_tokens
=
list
(
range
(
150
))
# Exceeds max_total_tokens=100
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
long_tokens
)
_preprocess_prompt
(
renderer
.
model_
config
,
long_tokens
)
)
with
pytest
.
raises
(
...
...
@@ -315,7 +322,7 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
(
skip_tokenizer_init
=
True
))
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
"Hello world"
)
_preprocess_prompt
(
renderer
.
model_
config
,
"Hello world"
)
)
with
pytest
.
raises
(
ValueError
,
match
=
"`skip_tokenizer_init=True`"
):
...
...
@@ -328,7 +335,9 @@ class TestRenderPrompt:
renderer
=
_build_renderer
(
MockModelConfig
())
tokens
=
[
1
,
2
,
3
,
4
]
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
tokens
))
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
model_config
,
tokens
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
TokenizeParams
(
...
...
@@ -358,7 +367,7 @@ class TestRenderEmbedPrompt:
embed_bytes
=
self
.
_create_test_embed_bytes
(
tensor_input
)
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
embed_bytes
)
_preprocess_prompt
(
renderer
.
model_
config
,
embed_bytes
)
)
results
=
renderer
.
tokenize_prompts
(
prompts
,
...
...
@@ -379,7 +388,7 @@ class TestRenderEmbedPrompt:
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
renderer
.
model_
config
,
[
self
.
_create_test_embed_bytes
(
t
)
for
t
in
tensor_inputs
],
)
)
...
...
@@ -400,7 +409,7 @@ class TestRenderEmbedPrompt:
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
renderer
.
model_
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
)
)
results
=
renderer
.
tokenize_prompts
(
...
...
@@ -427,7 +436,7 @@ class TestRenderEmbedPrompt:
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
renderer
.
model_
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
)
)
results
=
renderer
.
tokenize_prompts
(
...
...
@@ -446,7 +455,7 @@ class TestRenderEmbedPrompt:
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
renderer
.
model_
config
,
self
.
_create_test_embed_bytes
(
tensor_input
)
)
)
results
=
renderer
.
tokenize_prompts
(
...
...
@@ -466,7 +475,7 @@ class TestRenderEmbedPrompt:
prompts
=
renderer
.
render_prompts
(
_preprocess_prompt
(
renderer
.
config
,
renderer
.
model_
config
,
[
text_input
,
self
.
_create_test_embed_bytes
(
tensor_input
)],
)
)
...
...
tests/renderers/test_mistral.py
View file @
2f308214
...
...
@@ -38,6 +38,11 @@ class MockModelConfig:
is_encoder_decoder
:
bool
=
False
@
dataclass
class
MockVllmConfig
:
model_config
:
MockModelConfig
@
pytest
.
mark
.
asyncio
async
def
test_async_mistral_tokenizer_does_not_block_event_loop
():
expected_tokens
=
[
1
,
2
,
3
]
...
...
@@ -50,7 +55,10 @@ async def test_async_mistral_tokenizer_does_not_block_event_loop():
mock_model_config
=
MockModelConfig
(
skip_tokenizer_init
=
True
)
mock_tokenizer
=
Mock
(
spec
=
MistralTokenizer
)
mock_tokenizer
.
apply_chat_template
=
mocked_apply_chat_template
mock_renderer
=
MistralRenderer
(
mock_model_config
,
tokenizer_kwargs
=
{})
mock_renderer
=
MistralRenderer
(
MockVllmConfig
(
mock_model_config
),
tokenizer_kwargs
=
{},
)
mock_renderer
.
_tokenizer
=
mock_tokenizer
task
=
mock_renderer
.
render_messages_async
([],
ChatParams
())
...
...
tests/test_inputs.py
View file @
2f308214
...
...
@@ -3,7 +3,7 @@
import
pytest
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.inputs.preprocess
import
InputPreprocessor
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
@@ -20,7 +20,8 @@ pytestmark = pytest.mark.cpu_test
)
def
test_preprocessor_always_mm_code_path
(
model_id
,
prompt
):
model_config
=
ModelConfig
(
model
=
model_id
)
input_preprocessor
=
InputPreprocessor
(
model_config
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
)
input_preprocessor
=
InputPreprocessor
(
vllm_config
)
# HF processor adds sep token
tokenizer
=
input_preprocessor
.
get_tokenizer
()
...
...
vllm/inputs/preprocess.py
View file @
2f308214
...
...
@@ -6,7 +6,7 @@ from typing import Any, overload
from
typing_extensions
import
assert_never
from
vllm.config
import
ModelConfig
,
Observability
Config
from
vllm.config
import
Vllm
Config
from
vllm.logger
import
init_logger
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.multimodal.cache
import
BaseMultiModalProcessorCache
...
...
@@ -54,17 +54,16 @@ logger = init_logger(__name__)
class
InputPreprocessor
:
def
__init__
(
self
,
model_config
:
ModelConfig
,
observability_config
:
ObservabilityConfig
|
None
=
None
,
vllm_config
:
VllmConfig
,
renderer
:
BaseRenderer
|
None
=
None
,
mm_registry
:
MultiModalRegistry
=
MULTIMODAL_REGISTRY
,
mm_processor_cache
:
BaseMultiModalProcessorCache
|
None
=
None
,
)
->
None
:
super
().
__init__
()
self
.
model_config
=
model_config
self
.
observability_config
=
observability_config
self
.
renderer
=
renderer
or
renderer_from_config
(
model
_config
)
self
.
model_config
=
vllm_config
.
model_config
self
.
observability_config
=
vllm_config
.
observability_config
self
.
renderer
=
renderer
or
renderer_from_config
(
vllm
_config
)
self
.
mm_registry
=
mm_registry
self
.
mm_processor_cache
=
mm_processor_cache
...
...
vllm/renderers/base.py
View file @
2f308214
...
...
@@ -21,7 +21,7 @@ from .inputs.preprocess import extract_target_prompt
from
.params
import
ChatParams
,
TokenizeParams
if
TYPE_CHECKING
:
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ConversationMessage
,
...
...
@@ -35,15 +35,15 @@ class BaseRenderer(ABC):
@
abstractmethod
def
from_config
(
cls
,
config
:
"
Model
Config"
,
config
:
"
Vllm
Config"
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
raise
NotImplementedError
def
__init__
(
self
,
config
:
"
Model
Config"
)
->
None
:
def
__init__
(
self
,
config
:
"
Vllm
Config"
)
->
None
:
super
().
__init__
()
self
.
config
=
config
self
.
model_
config
=
config
.
model_config
# Lazy initialization since offline LLM doesn't use async
self
.
_async_tokenizer
:
AsyncMicrobatchTokenizer
|
None
=
None
...
...
@@ -90,7 +90,7 @@ class BaseRenderer(ABC):
prompt
:
DictPrompt
|
bytes
,
)
->
DictPrompt
:
if
isinstance
(
prompt
,
bytes
):
embeds
=
safe_load_prompt_embeds
(
self
.
config
,
prompt
)
embeds
=
safe_load_prompt_embeds
(
self
.
model_
config
,
prompt
)
prompt
=
EmbedsPrompt
(
prompt_embeds
=
embeds
)
return
prompt
...
...
@@ -310,7 +310,7 @@ class BaseRenderer(ABC):
return
for
prompt
in
prompts
:
target_prompt
=
extract_target_prompt
(
self
.
config
,
prompt
)
target_prompt
=
extract_target_prompt
(
self
.
model_
config
,
prompt
)
target_prompt
.
update
(
prompt_extras
)
# type: ignore[arg-type]
# Top-level methods
...
...
@@ -325,7 +325,7 @@ class BaseRenderer(ABC):
# NOTE: Some MM models have non-default `add_special_tokens`
# so we handle tokenization in multi-modal processor
if
self
.
config
.
is_multimodal_model
:
if
self
.
model_
config
.
is_multimodal_model
:
self
.
_apply_prompt_extras
(
dict_prompts
,
prompt_extras
)
return
dict_prompts
...
...
vllm/renderers/deepseek_v32.py
View file @
2f308214
...
...
@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ConversationMessage
,
...
...
@@ -26,19 +26,20 @@ class DeepseekV32Renderer(BaseRenderer):
@
classmethod
def
from_config
(
cls
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
return
cls
(
config
,
tokenizer_kwargs
)
def
__init__
(
self
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
None
:
super
().
__init__
(
config
)
if
config
.
skip_tokenizer_init
:
model_config
=
self
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
...
...
@@ -67,7 +68,7 @@ class DeepseekV32Renderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
parse_chat_messages
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
@@ -93,7 +94,7 @@ class DeepseekV32Renderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
await
parse_chat_messages_async
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
vllm/renderers/grok2.py
View file @
2f308214
...
...
@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ConversationMessage
,
...
...
@@ -25,19 +25,20 @@ class Grok2Renderer(BaseRenderer):
@
classmethod
def
from_config
(
cls
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
return
cls
(
config
,
tokenizer_kwargs
)
def
__init__
(
self
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
None
:
super
().
__init__
(
config
)
if
config
.
skip_tokenizer_init
:
model_config
=
self
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
...
...
@@ -66,7 +67,7 @@ class Grok2Renderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
parse_chat_messages
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
@@ -92,7 +93,7 @@ class Grok2Renderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
await
parse_chat_messages_async
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
vllm/renderers/hf.py
View file @
2f308214
...
...
@@ -14,7 +14,7 @@ import jinja2.nodes
import
jinja2.parser
import
jinja2.sandbox
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatTemplateContentFormat
,
...
...
@@ -589,23 +589,24 @@ class HfRenderer(BaseRenderer):
@
classmethod
def
from_config
(
cls
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
return
cls
(
config
,
tokenizer_kwargs
)
def
__init__
(
self
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
None
:
super
().
__init__
(
config
)
model_config
=
self
.
model_config
self
.
use_unified_vision_chunk
=
getattr
(
config
.
hf_config
,
"use_unified_vision_chunk"
,
False
model_
config
.
hf_config
,
"use_unified_vision_chunk"
,
False
)
if
config
.
skip_tokenizer_init
:
if
model_
config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cast
(
...
...
@@ -634,7 +635,7 @@ class HfRenderer(BaseRenderer):
messages
:
list
[
ChatCompletionMessageParam
],
params
:
ChatParams
,
)
->
tuple
[
list
[
ConversationMessage
],
DictPrompt
]:
model_config
=
self
.
config
model_config
=
self
.
model_
config
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
parse_chat_messages
(
...
...
@@ -688,7 +689,7 @@ class HfRenderer(BaseRenderer):
messages
:
list
[
ChatCompletionMessageParam
],
params
:
ChatParams
,
)
->
tuple
[
list
[
ConversationMessage
],
DictPrompt
]:
model_config
=
self
.
config
model_config
=
self
.
model_
config
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
await
parse_chat_messages_async
(
...
...
vllm/renderers/mistral.py
View file @
2f308214
...
...
@@ -3,7 +3,7 @@
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Any
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ConversationMessage
,
...
...
@@ -54,19 +54,20 @@ class MistralRenderer(BaseRenderer):
@
classmethod
def
from_config
(
cls
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
return
cls
(
config
,
tokenizer_kwargs
)
def
__init__
(
self
,
config
:
Model
Config
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
None
:
super
().
__init__
(
config
)
if
config
.
skip_tokenizer_init
:
model_config
=
self
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
...
...
@@ -100,7 +101,7 @@ class MistralRenderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
parse_chat_messages
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
@@ -126,7 +127,7 @@ class MistralRenderer(BaseRenderer):
tokenizer
=
self
.
get_tokenizer
()
conversation
,
mm_data
,
mm_uuids
=
await
parse_chat_messages_async
(
messages
,
self
.
config
,
self
.
model_
config
,
content_format
=
"string"
,
)
...
...
vllm/renderers/registry.py
View file @
2f308214
...
...
@@ -10,7 +10,7 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
from
.base
import
BaseRenderer
if
TYPE_CHECKING
:
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
logger
=
init_logger
(
__name__
)
...
...
@@ -55,7 +55,7 @@ class RendererRegistry:
def
load_renderer
(
self
,
renderer_mode
:
str
,
config
:
"
Model
Config"
,
config
:
"
Vllm
Config"
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
BaseRenderer
:
renderer_cls
=
self
.
load_renderer_cls
(
renderer_mode
)
...
...
@@ -71,12 +71,16 @@ RENDERER_REGISTRY = RendererRegistry(
"""The global `RendererRegistry` instance."""
def
renderer_from_config
(
config
:
"ModelConfig"
,
**
kwargs
):
def
renderer_from_config
(
config
:
"VllmConfig"
,
**
kwargs
):
model_config
=
config
.
model_config
tokenizer_mode
,
tokenizer_name
,
args
,
kwargs
=
tokenizer_args_from_config
(
config
,
**
kwargs
model_
config
,
**
kwargs
)
if
config
.
tokenizer_mode
==
"auto"
and
config
.
model_impl
==
"terratorch"
:
if
(
model_config
.
tokenizer_mode
==
"auto"
and
model_config
.
model_impl
==
"terratorch"
):
renderer_mode
=
"terratorch"
else
:
renderer_mode
=
tokenizer_mode
...
...
vllm/renderers/terratorch.py
View file @
2f308214
...
...
@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
Model
Config
from
vllm.config
import
Vllm
Config
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ConversationMessage
,
...
...
@@ -24,15 +24,16 @@ class TerratorchRenderer(BaseRenderer):
@
classmethod
def
from_config
(
cls
,
config
:
"Model
Config
"
,
config
:
Vllm
Config
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
return
cls
(
config
)
def
__init__
(
self
,
config
:
Model
Config
)
->
None
:
def
__init__
(
self
,
config
:
Vllm
Config
)
->
None
:
super
().
__init__
(
config
)
if
not
config
.
skip_tokenizer_init
:
model_config
=
self
.
model_config
if
not
model_config
.
skip_tokenizer_init
:
raise
ValueError
(
"Terratorch renderer requires `skip_tokenizer_init=True`"
)
@
property
...
...
@@ -47,7 +48,7 @@ class TerratorchRenderer(BaseRenderer):
messages
:
list
[
ChatCompletionMessageParam
],
params
:
ChatParams
,
)
->
tuple
[
list
[
ConversationMessage
],
DictPrompt
]:
model_config
=
self
.
config
model_config
=
self
.
model_
config
conversation
,
mm_data
,
mm_uuids
=
parse_chat_messages
(
messages
,
...
...
@@ -68,7 +69,7 @@ class TerratorchRenderer(BaseRenderer):
messages
:
list
[
ChatCompletionMessageParam
],
params
:
ChatParams
,
)
->
tuple
[
list
[
ConversationMessage
],
DictPrompt
]:
model_config
=
self
.
config
model_config
=
self
.
model_
config
conversation
,
mm_data
,
mm_uuids
=
await
parse_chat_messages_async
(
messages
,
...
...
vllm/v1/engine/async_llm.py
View file @
2f308214
...
...
@@ -132,7 +132,7 @@ class AsyncLLM(EngineClient):
"enabling logging without default stat loggers."
)
self
.
renderer
=
renderer
=
renderer_from_config
(
self
.
model
_config
)
self
.
renderer
=
renderer
=
renderer_from_config
(
self
.
vllm
_config
)
self
.
io_processor
=
get_io_processor
(
self
.
vllm_config
,
self
.
model_config
.
io_processor_plugin
,
...
...
vllm/v1/engine/input_processor.py
View file @
2f308214
...
...
@@ -59,7 +59,7 @@ class InputProcessor:
self
.
generation_config_fields
=
model_config
.
try_get_generation_config
()
self
.
renderer
=
renderer
or
renderer_from_config
(
model
_config
)
self
.
renderer
=
renderer
or
renderer_from_config
(
vllm
_config
)
self
.
mm_registry
=
mm_registry
self
.
mm_processor_cache
=
mm_registry
.
processor_cache_from_config
(
vllm_config
)
...
...
@@ -75,8 +75,7 @@ class InputProcessor:
mm_budget
.
reset_cache
()
# Not used anymore
self
.
input_preprocessor
=
InputPreprocessor
(
model_config
,
self
.
observability_config
,
vllm_config
,
renderer
=
renderer
,
mm_registry
=
mm_registry
,
mm_processor_cache
=
self
.
mm_processor_cache
,
...
...
vllm/v1/engine/llm_engine.py
View file @
2f308214
...
...
@@ -90,7 +90,7 @@ class LLMEngine:
self
.
dp_group
=
None
self
.
should_execute_dummy_batch
=
False
self
.
renderer
=
renderer
=
renderer_from_config
(
self
.
model
_config
)
self
.
renderer
=
renderer
=
renderer_from_config
(
self
.
vllm
_config
)
self
.
io_processor
=
get_io_processor
(
self
.
vllm_config
,
self
.
model_config
.
io_processor_plugin
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment