Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2e225f7b
Unverified
Commit
2e225f7b
authored
Mar 26, 2026
by
Cyrus Leung
Committed by
GitHub
Mar 26, 2026
Browse files
[Renderer] Consolidate factory methods (#38218)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
757eafcf
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
33 additions
and
233 deletions
+33
-233
tests/entrypoints/openai/chat_completion/test_chat_error.py
tests/entrypoints/openai/chat_completion/test_chat_error.py
+3
-5
tests/entrypoints/openai/chat_completion/test_serving_chat.py
...s/entrypoints/openai/chat_completion/test_serving_chat.py
+3
-5
tests/entrypoints/openai/completion/test_completion_error.py
tests/entrypoints/openai/completion/test_completion_error.py
+3
-5
tests/entrypoints/openai/completion/test_lora_resolvers.py
tests/entrypoints/openai/completion/test_lora_resolvers.py
+3
-5
tests/renderers/test_completions.py
tests/renderers/test_completions.py
+0
-3
tests/renderers/test_process_multi_modal_uuids.py
tests/renderers/test_process_multi_modal_uuids.py
+3
-5
vllm/config/model.py
vllm/config/model.py
+3
-1
vllm/renderers/base.py
vllm/renderers/base.py
+0
-9
vllm/renderers/deepseek_v32.py
vllm/renderers/deepseek_v32.py
+0
-20
vllm/renderers/grok2.py
vllm/renderers/grok2.py
+0
-20
vllm/renderers/hf.py
vllm/renderers/hf.py
+1
-22
vllm/renderers/kimi_audio.py
vllm/renderers/kimi_audio.py
+0
-49
vllm/renderers/mistral.py
vllm/renderers/mistral.py
+0
-19
vllm/renderers/qwen_vl.py
vllm/renderers/qwen_vl.py
+0
-28
vllm/renderers/registry.py
vllm/renderers/registry.py
+14
-23
vllm/renderers/terratorch.py
vllm/renderers/terratorch.py
+0
-14
No files found.
tests/entrypoints/openai/chat_completion/test_chat_error.py
View file @
2e225f7b
...
@@ -16,7 +16,7 @@ from vllm.entrypoints.openai.models.serving import OpenAIServingModels
...
@@ -16,7 +16,7 @@ from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from
vllm.entrypoints.serve.render.serving
import
OpenAIServingRender
from
vllm.entrypoints.serve.render.serving
import
OpenAIServingRender
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.hf
import
HfRenderer
from
vllm.tokenizers.registry
import
tokenizer_
args_
from_config
from
vllm.tokenizers.registry
import
cached_
tokenizer_from_config
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
MODEL_NAME
=
"openai-community/gpt2"
MODEL_NAME
=
"openai-community/gpt2"
...
@@ -72,11 +72,9 @@ class MockVllmConfig:
...
@@ -72,11 +72,9 @@ class MockVllmConfig:
def
_build_renderer
(
model_config
:
MockModelConfig
):
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
return
HfRenderer
.
from_config
(
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
tokenizer_
kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
}
,
cached_
tokenizer_
from_config
(
model_config
)
,
)
)
...
...
tests/entrypoints/openai/chat_completion/test_serving_chat.py
View file @
2e225f7b
...
@@ -41,7 +41,7 @@ from vllm.renderers.hf import HfRenderer
...
@@ -41,7 +41,7 @@ from vllm.renderers.hf import HfRenderer
from
vllm.renderers.mistral
import
MistralRenderer
from
vllm.renderers.mistral
import
MistralRenderer
from
vllm.tokenizers
import
get_tokenizer
from
vllm.tokenizers
import
get_tokenizer
from
vllm.tokenizers.mistral
import
MistralTokenizer
from
vllm.tokenizers.mistral
import
MistralTokenizer
from
vllm.tokenizers.registry
import
tokenizer_
args_
from_config
from
vllm.tokenizers.registry
import
cached_
tokenizer_from_config
from
vllm.tool_parsers
import
ToolParserManager
from
vllm.tool_parsers
import
ToolParserManager
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
...
@@ -553,11 +553,9 @@ class MockVllmConfig:
...
@@ -553,11 +553,9 @@ class MockVllmConfig:
def
_build_renderer
(
model_config
:
MockModelConfig
):
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
return
HfRenderer
.
from_config
(
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
tokenizer_
kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
}
,
cached_
tokenizer_
from_config
(
model_config
)
,
)
)
...
...
tests/entrypoints/openai/completion/test_completion_error.py
View file @
2e225f7b
...
@@ -16,7 +16,7 @@ from vllm.entrypoints.openai.models.serving import OpenAIServingModels
...
@@ -16,7 +16,7 @@ from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from
vllm.entrypoints.serve.render.serving
import
OpenAIServingRender
from
vllm.entrypoints.serve.render.serving
import
OpenAIServingRender
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.hf
import
HfRenderer
from
vllm.tokenizers.registry
import
tokenizer_
args_
from_config
from
vllm.tokenizers.registry
import
cached_
tokenizer_from_config
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
MODEL_NAME
=
"openai-community/gpt2"
MODEL_NAME
=
"openai-community/gpt2"
...
@@ -93,11 +93,9 @@ def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion:
...
@@ -93,11 +93,9 @@ def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion:
def
_build_renderer
(
model_config
:
MockModelConfig
):
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
return
HfRenderer
.
from_config
(
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
tokenizer_
kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
}
,
cached_
tokenizer_
from_config
(
model_config
)
,
)
)
...
...
tests/entrypoints/openai/completion/test_lora_resolvers.py
View file @
2e225f7b
...
@@ -18,7 +18,7 @@ from vllm.entrypoints.serve.render.serving import OpenAIServingRender
...
@@ -18,7 +18,7 @@ from vllm.entrypoints.serve.render.serving import OpenAIServingRender
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.resolver
import
LoRAResolver
,
LoRAResolverRegistry
from
vllm.lora.resolver
import
LoRAResolver
,
LoRAResolverRegistry
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.hf
import
HfRenderer
from
vllm.tokenizers.registry
import
tokenizer_
args_
from_config
from
vllm.tokenizers.registry
import
cached_
tokenizer_from_config
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
MODEL_NAME
=
"openai-community/gpt2"
MODEL_NAME
=
"openai-community/gpt2"
...
@@ -101,11 +101,9 @@ def register_mock_resolver():
...
@@ -101,11 +101,9 @@ def register_mock_resolver():
def
_build_renderer
(
model_config
:
MockModelConfig
):
def
_build_renderer
(
model_config
:
MockModelConfig
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
return
HfRenderer
.
from_config
(
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
tokenizer_
kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
}
,
cached_
tokenizer_
from_config
(
model_config
)
,
)
)
...
...
tests/renderers/test_completions.py
View file @
2e225f7b
...
@@ -15,7 +15,6 @@ from vllm.inputs import SingletonPrompt
...
@@ -15,7 +15,6 @@ from vllm.inputs import SingletonPrompt
from
vllm.renderers
import
TokenizeParams
from
vllm.renderers
import
TokenizeParams
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.inputs.preprocess
import
parse_model_prompt
,
prompt_to_seq
from
vllm.renderers.inputs.preprocess
import
parse_model_prompt
,
prompt_to_seq
from
vllm.tokenizers.registry
import
tokenizer_args_from_config
MODEL_NAME
=
"openai-community/gpt2"
MODEL_NAME
=
"openai-community/gpt2"
...
@@ -81,8 +80,6 @@ def _build_renderer(
...
@@ -81,8 +80,6 @@ def _build_renderer(
truncation_side
:
str
=
"left"
,
truncation_side
:
str
=
"left"
,
max_chars_per_token
:
int
=
1
,
max_chars_per_token
:
int
=
1
,
):
):
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
renderer
=
HfRenderer
(
renderer
=
HfRenderer
(
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
MockVllmConfig
(
model_config
,
parallel_config
=
MockParallelConfig
()),
tokenizer
=
(
tokenizer
=
(
...
...
tests/renderers/test_process_multi_modal_uuids.py
View file @
2e225f7b
...
@@ -8,7 +8,7 @@ from vllm.assets.video import VideoAsset
...
@@ -8,7 +8,7 @@ from vllm.assets.video import VideoAsset
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.multimodal.parse
import
parse_mm_uuids
from
vllm.multimodal.parse
import
parse_mm_uuids
from
vllm.renderers.hf
import
HfRenderer
from
vllm.renderers.hf
import
HfRenderer
from
vllm.tokenizers.registry
import
tokenizer_
args_
from_config
from
vllm.tokenizers.registry
import
cached_
tokenizer_from_config
cherry_pil_image
=
ImageAsset
(
"cherry_blossom"
).
pil_image
cherry_pil_image
=
ImageAsset
(
"cherry_blossom"
).
pil_image
stop_pil_image
=
ImageAsset
(
"stop_sign"
).
pil_image
stop_pil_image
=
ImageAsset
(
"stop_sign"
).
pil_image
...
@@ -29,11 +29,9 @@ def _build_renderer(
...
@@ -29,11 +29,9 @@ def _build_renderer(
cache_config
=
CacheConfig
(
enable_prefix_caching
=
enable_prefix_caching
),
cache_config
=
CacheConfig
(
enable_prefix_caching
=
enable_prefix_caching
),
)
)
_
,
tokenizer_name
,
_
,
kwargs
=
tokenizer_args_from_config
(
model_config
)
return
HfRenderer
(
return
HfRenderer
.
from_config
(
vllm_config
,
vllm_config
,
tokenizer_
kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
}
,
cached_
tokenizer_
from_config
(
model_config
)
,
)
)
...
...
vllm/config/model.py
View file @
2e225f7b
...
@@ -542,7 +542,9 @@ class ModelConfig:
...
@@ -542,7 +542,9 @@ class ModelConfig:
# Set default tokenizer modes based on model architecture
# Set default tokenizer modes based on model architecture
if
self
.
tokenizer_mode
==
"auto"
:
if
self
.
tokenizer_mode
==
"auto"
:
if
arch
==
"Grok1ForCausalLM"
:
if
self
.
model_impl
==
"terratorch"
:
self
.
tokenizer_mode
=
"terratorch"
elif
arch
==
"Grok1ForCausalLM"
:
self
.
tokenizer_mode
=
"grok2"
self
.
tokenizer_mode
=
"grok2"
elif
arch
==
"MoonshotKimiaForCausalLM"
:
elif
arch
==
"MoonshotKimiaForCausalLM"
:
self
.
tokenizer_mode
=
"kimi_audio"
self
.
tokenizer_mode
=
"kimi_audio"
...
...
vllm/renderers/base.py
View file @
2e225f7b
...
@@ -69,15 +69,6 @@ _T = TypeVar("_T", bound=TokenizerLike, default=TokenizerLike)
...
@@ -69,15 +69,6 @@ _T = TypeVar("_T", bound=TokenizerLike, default=TokenizerLike)
class
BaseRenderer
(
ABC
,
Generic
[
_T
]):
class
BaseRenderer
(
ABC
,
Generic
[
_T
]):
@
classmethod
@
abstractmethod
def
from_config
(
cls
,
config
:
"VllmConfig"
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"BaseRenderer"
:
raise
NotImplementedError
def
__init__
(
self
,
config
:
"VllmConfig"
,
tokenizer
:
_T
|
None
)
->
None
:
def
__init__
(
self
,
config
:
"VllmConfig"
,
tokenizer
:
_T
|
None
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
...
vllm/renderers/deepseek_v32.py
View file @
2e225f7b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
VllmConfig
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatCompletionMessageParam
,
ConversationMessage
,
ConversationMessage
,
...
@@ -10,7 +8,6 @@ from vllm.entrypoints.chat_utils import (
...
@@ -10,7 +8,6 @@ from vllm.entrypoints.chat_utils import (
parse_chat_messages_async
,
parse_chat_messages_async
,
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
cached_get_tokenizer
from
vllm.tokenizers.deepseek_v32
import
DeepseekV32Tokenizer
from
vllm.tokenizers.deepseek_v32
import
DeepseekV32Tokenizer
from
.base
import
BaseRenderer
from
.base
import
BaseRenderer
...
@@ -22,23 +19,6 @@ logger = init_logger(__name__)
...
@@ -22,23 +19,6 @@ logger = init_logger(__name__)
class
DeepseekV32Renderer
(
BaseRenderer
[
DeepseekV32Tokenizer
]):
class
DeepseekV32Renderer
(
BaseRenderer
[
DeepseekV32Tokenizer
]):
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"DeepseekV32Renderer"
:
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
tokenizer_cls
=
DeepseekV32Tokenizer
,
**
tokenizer_kwargs
,
)
return
cls
(
config
,
tokenizer
)
def
render_messages
(
def
render_messages
(
self
,
self
,
messages
:
list
[
ChatCompletionMessageParam
],
messages
:
list
[
ChatCompletionMessageParam
],
...
...
vllm/renderers/grok2.py
View file @
2e225f7b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
VllmConfig
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatCompletionMessageParam
,
ConversationMessage
,
ConversationMessage
,
...
@@ -10,7 +8,6 @@ from vllm.entrypoints.chat_utils import (
...
@@ -10,7 +8,6 @@ from vllm.entrypoints.chat_utils import (
parse_chat_messages_async
,
parse_chat_messages_async
,
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
cached_get_tokenizer
from
vllm.tokenizers.grok2
import
Grok2Tokenizer
from
vllm.tokenizers.grok2
import
Grok2Tokenizer
from
.base
import
BaseRenderer
from
.base
import
BaseRenderer
...
@@ -22,23 +19,6 @@ logger = init_logger(__name__)
...
@@ -22,23 +19,6 @@ logger = init_logger(__name__)
class
Grok2Renderer
(
BaseRenderer
[
Grok2Tokenizer
]):
class
Grok2Renderer
(
BaseRenderer
[
Grok2Tokenizer
]):
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"Grok2Renderer"
:
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
tokenizer_cls
=
Grok2Tokenizer
,
**
tokenizer_kwargs
,
)
return
cls
(
config
,
tokenizer
)
def
render_messages
(
def
render_messages
(
self
,
self
,
messages
:
list
[
ChatCompletionMessageParam
],
messages
:
list
[
ChatCompletionMessageParam
],
...
...
vllm/renderers/hf.py
View file @
2e225f7b
...
@@ -27,8 +27,7 @@ from vllm.entrypoints.chat_utils import (
...
@@ -27,8 +27,7 @@ from vllm.entrypoints.chat_utils import (
)
)
from
vllm.inputs
import
MultiModalDataDict
,
MultiModalUUIDDict
from
vllm.inputs
import
MultiModalDataDict
,
MultiModalUUIDDict
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
cached_get_tokenizer
from
vllm.tokenizers.hf
import
HfTokenizer
from
vllm.tokenizers.hf
import
CachedHfTokenizer
,
HfTokenizer
from
vllm.transformers_utils.chat_templates
import
get_chat_template_fallback_path
from
vllm.transformers_utils.chat_templates
import
get_chat_template_fallback_path
from
vllm.transformers_utils.processor
import
cached_get_processor
from
vllm.transformers_utils.processor
import
cached_get_processor
from
vllm.utils.func_utils
import
supports_kw
from
vllm.utils.func_utils
import
supports_kw
...
@@ -604,26 +603,6 @@ def replace_vision_chunk_video_placeholder(
...
@@ -604,26 +603,6 @@ def replace_vision_chunk_video_placeholder(
class
HfRenderer
(
BaseRenderer
[
HfTokenizer
]):
class
HfRenderer
(
BaseRenderer
[
HfTokenizer
]):
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"HfRenderer"
:
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cast
(
HfTokenizer
,
cached_get_tokenizer
(
tokenizer_cls
=
CachedHfTokenizer
,
# type: ignore[type-abstract]
**
tokenizer_kwargs
,
),
)
return
cls
(
config
,
tokenizer
)
def
__init__
(
def
__init__
(
self
,
self
,
config
:
VllmConfig
,
config
:
VllmConfig
,
...
...
vllm/renderers/kimi_audio.py
deleted
100644 → 0
View file @
757eafcf
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
,
cast
from
vllm.config
import
VllmConfig
from
vllm.tokenizers.kimi_audio
import
KimiAudioTokenizer
from
vllm.tokenizers.registry
import
get_tokenizer
from
.hf
import
HfRenderer
,
HfTokenizer
class
KimiAudioRenderer
(
HfRenderer
):
"""Renderer for Kimi-Audio models.
This renderer uses HfRenderer internally with a custom TikToken tokenizer.
"""
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"HfRenderer"
:
"""Create an HfRenderer instance for Kimi-Audio models."""
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
# Extract tokenizer_name from kwargs (already processed by
# tokenizer_args_from_config for ModelScope/GGUF/etc)
tokenizer_name
=
tokenizer_kwargs
.
pop
(
"tokenizer_name"
,
model_config
.
tokenizer
)
# Remove tokenizer_cls from kwargs to avoid duplicate argument
tokenizer_kwargs
=
{
k
:
v
for
k
,
v
in
tokenizer_kwargs
.
items
()
if
k
!=
"tokenizer_cls"
}
# Use get_tokenizer directly instead of cached_get_tokenizer
# (KimiAudioTokenizer doesn't work with get_cached_tokenizer)
tokenizer
=
cast
(
HfTokenizer
,
get_tokenizer
(
tokenizer_name
,
tokenizer_cls
=
KimiAudioTokenizer
,
# type: ignore[arg-type]
**
tokenizer_kwargs
,
),
)
return
HfRenderer
(
config
,
tokenizer
)
vllm/renderers/mistral.py
View file @
2e225f7b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
concurrent.futures
import
ThreadPoolExecutor
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Any
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
...
@@ -11,7 +10,6 @@ from vllm.entrypoints.chat_utils import (
...
@@ -11,7 +10,6 @@ from vllm.entrypoints.chat_utils import (
parse_chat_messages_async
,
parse_chat_messages_async
,
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
cached_get_tokenizer
from
vllm.tokenizers.mistral
import
MistralTokenizer
from
vllm.tokenizers.mistral
import
MistralTokenizer
from
vllm.utils.async_utils
import
make_async
from
vllm.utils.async_utils
import
make_async
...
@@ -51,23 +49,6 @@ def safe_apply_chat_template(
...
@@ -51,23 +49,6 @@ def safe_apply_chat_template(
class
MistralRenderer
(
BaseRenderer
[
MistralTokenizer
]):
class
MistralRenderer
(
BaseRenderer
[
MistralTokenizer
]):
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"MistralRenderer"
:
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
tokenizer_cls
=
MistralTokenizer
,
**
tokenizer_kwargs
,
)
return
cls
(
config
,
tokenizer
)
def
__init__
(
def
__init__
(
self
,
self
,
config
:
VllmConfig
,
config
:
VllmConfig
,
...
...
vllm/renderers/qwen_vl.py
deleted
100644 → 0
View file @
757eafcf
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
VllmConfig
from
vllm.tokenizers
import
cached_get_tokenizer
from
vllm.tokenizers.qwen_vl
import
QwenVLTokenizer
from
.hf
import
HfRenderer
class
QwenVLRenderer
(
HfRenderer
):
@
classmethod
def
from_config
(
# type: ignore[override]
cls
,
config
:
VllmConfig
,
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"HfRenderer"
:
model_config
=
config
.
model_config
if
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
else
:
tokenizer
=
cached_get_tokenizer
(
tokenizer_cls
=
QwenVLTokenizer
,
**
tokenizer_kwargs
,
)
return
HfRenderer
(
config
,
tokenizer
)
vllm/renderers/registry.py
View file @
2e225f7b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
dataclasses
import
dataclass
,
field
from
dataclasses
import
dataclass
,
field
from
typing
import
TYPE_CHECKING
,
Any
from
typing
import
TYPE_CHECKING
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers.registry
import
tokenizer_args_from_config
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers.registry
import
(
cached_tokenizer_from_config
,
tokenizer_args_from_config
,
)
from
vllm.utils.import_utils
import
resolve_obj_by_qualname
from
vllm.utils.import_utils
import
resolve_obj_by_qualname
from
.base
import
BaseRenderer
from
.base
import
BaseRenderer
...
@@ -19,9 +23,9 @@ _VLLM_RENDERERS = {
...
@@ -19,9 +23,9 @@ _VLLM_RENDERERS = {
"deepseek_v32"
:
(
"deepseek_v32"
,
"DeepseekV32Renderer"
),
"deepseek_v32"
:
(
"deepseek_v32"
,
"DeepseekV32Renderer"
),
"hf"
:
(
"hf"
,
"HfRenderer"
),
"hf"
:
(
"hf"
,
"HfRenderer"
),
"grok2"
:
(
"grok2"
,
"Grok2Renderer"
),
"grok2"
:
(
"grok2"
,
"Grok2Renderer"
),
"kimi_audio"
:
(
"
kimi_audio"
,
"KimiAudio
Renderer"
),
"kimi_audio"
:
(
"
hf"
,
"Hf
Renderer"
),
"mistral"
:
(
"mistral"
,
"MistralRenderer"
),
"mistral"
:
(
"mistral"
,
"MistralRenderer"
),
"qwen_vl"
:
(
"
qwen_vl"
,
"QwenVL
Renderer"
),
"qwen_vl"
:
(
"
hf"
,
"Hf
Renderer"
),
"terratorch"
:
(
"terratorch"
,
"TerratorchRenderer"
),
"terratorch"
:
(
"terratorch"
,
"TerratorchRenderer"
),
}
}
...
@@ -58,10 +62,10 @@ class RendererRegistry:
...
@@ -58,10 +62,10 @@ class RendererRegistry:
self
,
self
,
renderer_mode
:
str
,
renderer_mode
:
str
,
config
:
"VllmConfig"
,
config
:
"VllmConfig"
,
tokenizer
_kwargs
:
dict
[
str
,
Any
]
,
tokenizer
:
TokenizerLike
|
None
,
)
->
BaseRenderer
:
)
->
BaseRenderer
:
renderer_cls
=
self
.
load_renderer_cls
(
renderer_mode
)
renderer_cls
=
self
.
load_renderer_cls
(
renderer_mode
)
return
renderer_cls
.
from_config
(
config
,
tokenizer
_kwargs
)
return
renderer_cls
(
config
,
tokenizer
)
RENDERER_REGISTRY
=
RendererRegistry
(
RENDERER_REGISTRY
=
RendererRegistry
(
...
@@ -76,20 +80,7 @@ RENDERER_REGISTRY = RendererRegistry(
...
@@ -76,20 +80,7 @@ RENDERER_REGISTRY = RendererRegistry(
def
renderer_from_config
(
config
:
"VllmConfig"
,
**
kwargs
):
def
renderer_from_config
(
config
:
"VllmConfig"
,
**
kwargs
):
model_config
=
config
.
model_config
model_config
=
config
.
model_config
tokenizer_mode
,
tokenizer_name
,
args
,
kwargs
=
tokenizer_args_from_config
(
tokenizer
=
cached_tokenizer_from_config
(
model_config
,
**
kwargs
)
model_config
,
**
kwargs
renderer_mode
,
*
_
=
tokenizer_args_from_config
(
model_config
,
**
kwargs
)
)
if
(
model_config
.
tokenizer_mode
==
"auto"
and
model_config
.
model_impl
==
"terratorch"
):
renderer_mode
=
"terratorch"
else
:
renderer_mode
=
tokenizer_mode
return
RENDERER_REGISTRY
.
load_renderer
(
return
RENDERER_REGISTRY
.
load_renderer
(
renderer_mode
,
config
,
tokenizer
)
renderer_mode
,
config
,
tokenizer_kwargs
=
{
**
kwargs
,
"tokenizer_name"
:
tokenizer_name
},
)
vllm/renderers/terratorch.py
View file @
2e225f7b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Any
from
vllm.config
import
VllmConfig
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatCompletionMessageParam
,
ConversationMessage
,
ConversationMessage
,
...
@@ -20,18 +18,6 @@ logger = init_logger(__name__)
...
@@ -20,18 +18,6 @@ logger = init_logger(__name__)
class
TerratorchRenderer
(
BaseRenderer
):
class
TerratorchRenderer
(
BaseRenderer
):
@
classmethod
def
from_config
(
cls
,
config
:
VllmConfig
,
# type: ignore[override]
tokenizer_kwargs
:
dict
[
str
,
Any
],
)
->
"TerratorchRenderer"
:
model_config
=
config
.
model_config
if
not
model_config
.
skip_tokenizer_init
:
raise
ValueError
(
"Terratorch renderer requires `skip_tokenizer_init=True`"
)
return
cls
(
config
,
None
)
def
render_messages
(
def
render_messages
(
self
,
self
,
messages
:
list
[
ChatCompletionMessageParam
],
messages
:
list
[
ChatCompletionMessageParam
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment