Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a3f8d5dd
Commit
a3f8d5dd
authored
Dec 17, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.13.0rc2' into v0.13.0rc2-ori
parents
8d75f22e
f34eca5f
Changes
499
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
220 additions
and
62 deletions
+220
-62
vllm/tool_parsers/mistral_tool_parser.py
vllm/tool_parsers/mistral_tool_parser.py
+6
-3
vllm/tool_parsers/olmo3_tool_parser.py
vllm/tool_parsers/olmo3_tool_parser.py
+2
-2
vllm/tool_parsers/openai_tool_parser.py
vllm/tool_parsers/openai_tool_parser.py
+8
-3
vllm/tool_parsers/phi4mini_tool_parser.py
vllm/tool_parsers/phi4mini_tool_parser.py
+2
-2
vllm/tool_parsers/pythonic_tool_parser.py
vllm/tool_parsers/pythonic_tool_parser.py
+2
-2
vllm/tool_parsers/qwen3coder_tool_parser.py
vllm/tool_parsers/qwen3coder_tool_parser.py
+3
-3
vllm/tool_parsers/qwen3xml_tool_parser.py
vllm/tool_parsers/qwen3xml_tool_parser.py
+3
-3
vllm/tool_parsers/seed_oss_tool_parser.py
vllm/tool_parsers/seed_oss_tool_parser.py
+3
-3
vllm/tool_parsers/step3_tool_parser.py
vllm/tool_parsers/step3_tool_parser.py
+3
-3
vllm/tool_parsers/utils.py
vllm/tool_parsers/utils.py
+0
-0
vllm/tool_parsers/xlam_tool_parser.py
vllm/tool_parsers/xlam_tool_parser.py
+1
-1
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+36
-4
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+2
-0
vllm/transformers_utils/configs/bagel.py
vllm/transformers_utils/configs/bagel.py
+53
-0
vllm/transformers_utils/processors/__init__.py
vllm/transformers_utils/processors/__init__.py
+2
-0
vllm/transformers_utils/processors/bagel.py
vllm/transformers_utils/processors/bagel.py
+73
-0
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+10
-10
vllm/transformers_utils/tokenizer_base.py
vllm/transformers_utils/tokenizer_base.py
+2
-2
vllm/utils/deep_gemm.py
vllm/utils/deep_gemm.py
+2
-19
vllm/utils/flashinfer.py
vllm/utils/flashinfer.py
+7
-2
No files found.
vllm/
entrypoints/openai/
tool_parsers/mistral_tool_parser.py
→
vllm/tool_parsers/mistral_tool_parser.py
View file @
a3f8d5dd
...
@@ -6,6 +6,7 @@ from collections.abc import Sequence
...
@@ -6,6 +6,7 @@ from collections.abc import Sequence
from
enum
import
Enum
,
auto
from
enum
import
Enum
,
auto
from
random
import
choices
from
random
import
choices
from
string
import
ascii_letters
,
digits
from
string
import
ascii_letters
,
digits
from
typing
import
Any
import
ijson
import
ijson
import
regex
as
re
import
regex
as
re
...
@@ -20,11 +21,12 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -20,11 +21,12 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers.mistral
import
MistralTokenizer
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
MistralTokenizer
,
TokenizerLike
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -84,6 +86,7 @@ class MistralToolParser(ToolParser):
...
@@ -84,6 +86,7 @@ class MistralToolParser(ToolParser):
# initialize properties used for state when parsing tool calls in
# initialize properties used for state when parsing tool calls in
# streaming mode
# streaming mode
self
.
prev_tool_call_arr
:
list
[
dict
[
str
,
Any
]]
=
[]
self
.
current_tool_id
:
int
=
-
1
self
.
current_tool_id
:
int
=
-
1
self
.
streaming_state
:
StreamingState
=
StreamingState
.
WAITING_FOR_TOOL_START
self
.
streaming_state
:
StreamingState
=
StreamingState
.
WAITING_FOR_TOOL_START
...
...
vllm/
entrypoints/openai/
tool_parsers/olmo3_tool_parser.py
→
vllm/tool_parsers/olmo3_tool_parser.py
View file @
a3f8d5dd
...
@@ -18,10 +18,10 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -18,10 +18,10 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
from
vllm.logger
import
init_logger
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/openai_tool_parser.py
→
vllm/tool_parsers/openai_tool_parser.py
View file @
a3f8d5dd
...
@@ -12,10 +12,10 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -12,10 +12,10 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
from
vllm.logger
import
init_logger
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
...
@@ -43,6 +43,7 @@ class OpenAIToolParser(ToolParser):
...
@@ -43,6 +43,7 @@ class OpenAIToolParser(ToolParser):
parser
=
parse_output_into_messages
(
token_ids
)
parser
=
parse_output_into_messages
(
token_ids
)
tool_calls
=
[]
tool_calls
=
[]
final_content
=
None
final_content
=
None
commentary_content
=
None
if
len
(
parser
.
messages
)
>
0
:
if
len
(
parser
.
messages
)
>
0
:
for
msg
in
parser
.
messages
:
for
msg
in
parser
.
messages
:
...
@@ -75,11 +76,15 @@ class OpenAIToolParser(ToolParser):
...
@@ -75,11 +76,15 @@ class OpenAIToolParser(ToolParser):
)
)
elif
msg
.
channel
==
"final"
:
elif
msg
.
channel
==
"final"
:
final_content
=
msg_text
final_content
=
msg_text
elif
msg
.
channel
==
"commentary"
and
not
msg
.
recipient
:
commentary_content
=
msg_text
return
ExtractedToolCallInformation
(
return
ExtractedToolCallInformation
(
tools_called
=
len
(
tool_calls
)
>
0
,
tools_called
=
len
(
tool_calls
)
>
0
,
tool_calls
=
tool_calls
,
tool_calls
=
tool_calls
,
content
=
final_content
,
# prefer final content over commentary content if both are present
# commentary content is tool call preambles meant to be shown to the user
content
=
final_content
or
commentary_content
,
)
)
def
extract_tool_calls_streaming
(
def
extract_tool_calls_streaming
(
...
...
vllm/
entrypoints/openai/
tool_parsers/phi4mini_tool_parser.py
→
vllm/tool_parsers/phi4mini_tool_parser.py
View file @
a3f8d5dd
...
@@ -16,10 +16,10 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -16,10 +16,10 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
from
vllm.logger
import
init_logger
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/pythonic_tool_parser.py
→
vllm/tool_parsers/pythonic_tool_parser.py
View file @
a3f8d5dd
...
@@ -19,10 +19,10 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -19,10 +19,10 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
from
vllm.logger
import
init_logger
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/qwen3coder_tool_parser.py
→
vllm/tool_parsers/qwen3coder_tool_parser.py
View file @
a3f8d5dd
...
@@ -18,11 +18,11 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -18,11 +18,11 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/qwen3xml_tool_parser.py
→
vllm/tool_parsers/qwen3xml_tool_parser.py
View file @
a3f8d5dd
...
@@ -19,11 +19,11 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -19,11 +19,11 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/seed_oss_tool_parser.py
→
vllm/tool_parsers/seed_oss_tool_parser.py
View file @
a3f8d5dd
...
@@ -21,11 +21,11 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -21,11 +21,11 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/step3_tool_parser.py
→
vllm/tool_parsers/step3_tool_parser.py
View file @
a3f8d5dd
...
@@ -17,11 +17,11 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -17,11 +17,11 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
)
from
vllm.utils
import
random_uuid
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/
entrypoints/openai/
tool_parsers/utils.py
→
vllm/tool_parsers/utils.py
View file @
a3f8d5dd
File moved
vllm/
entrypoints/openai/
tool_parsers/xlam_tool_parser.py
→
vllm/tool_parsers/xlam_tool_parser.py
View file @
a3f8d5dd
...
@@ -17,7 +17,7 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -17,7 +17,7 @@ from vllm.entrypoints.openai.protocol import (
FunctionCall
,
FunctionCall
,
ToolCall
,
ToolCall
,
)
)
from
vllm.
entrypoints.openai.
tool_parsers.abstract_tool_parser
import
(
from
vllm.tool_parsers.abstract_tool_parser
import
(
ToolParser
,
ToolParser
,
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
...
vllm/transformers_utils/config.py
View file @
a3f8d5dd
...
@@ -66,6 +66,7 @@ class LazyConfigDict(dict):
...
@@ -66,6 +66,7 @@ class LazyConfigDict(dict):
_CONFIG_REGISTRY
:
dict
[
str
,
type
[
PretrainedConfig
]]
=
LazyConfigDict
(
_CONFIG_REGISTRY
:
dict
[
str
,
type
[
PretrainedConfig
]]
=
LazyConfigDict
(
afmoe
=
"AfmoeConfig"
,
afmoe
=
"AfmoeConfig"
,
bagel
=
"BagelConfig"
,
chatglm
=
"ChatGLMConfig"
,
chatglm
=
"ChatGLMConfig"
,
deepseek_vl_v2
=
"DeepseekVLV2Config"
,
deepseek_vl_v2
=
"DeepseekVLV2Config"
,
deepseek_v32
=
"DeepseekV3Config"
,
deepseek_v32
=
"DeepseekV3Config"
,
...
@@ -306,8 +307,13 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
...
@@ -306,8 +307,13 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
"""Provide backwards compatibility for RoPE."""
"""Provide backwards compatibility for RoPE."""
from
vllm.config.utils
import
getattr_iter
from
vllm.config.utils
import
getattr_iter
rope_theta_names
=
(
"rope_theta"
,
"rotary_emb_base"
)
# Older custom models may use non-standard field names
rope_theta
=
getattr_iter
(
config
,
rope_theta_names
,
None
)
# which need patching for both Transformers v4 and v5.
names
=
[
"rope_theta"
,
"rotary_emb_base"
]
rope_theta
=
getattr_iter
(
config
,
names
,
None
,
warn
=
True
)
names
=
[
"partial_rotary_factor"
,
"rotary_pct"
,
"rotary_emb_fraction"
]
partial_rotary_factor
=
getattr_iter
(
config
,
names
,
None
,
warn
=
True
)
if
Version
(
version
(
"transformers"
))
<
Version
(
"5.0.0.dev0"
):
if
Version
(
version
(
"transformers"
))
<
Version
(
"5.0.0.dev0"
):
# Transformers v4 installed, legacy config fields may be present
# Transformers v4 installed, legacy config fields may be present
if
(
rope_scaling
:
=
getattr
(
config
,
"rope_scaling"
,
None
))
is
not
None
:
if
(
rope_scaling
:
=
getattr
(
config
,
"rope_scaling"
,
None
))
is
not
None
:
...
@@ -316,14 +322,18 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
...
@@ -316,14 +322,18 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
if
not
hasattr
(
config
,
"rope_parameters"
):
if
not
hasattr
(
config
,
"rope_parameters"
):
config
.
rope_parameters
=
{
"rope_type"
:
"default"
}
config
.
rope_parameters
=
{
"rope_type"
:
"default"
}
config
.
rope_parameters
[
"rope_theta"
]
=
rope_theta
config
.
rope_parameters
[
"rope_theta"
]
=
rope_theta
partial_rotary_factor_names
=
(
"partial_rotary_factor"
,
"rotary_pct"
)
partial_rotary_factor
=
getattr_iter
(
config
,
partial_rotary_factor_names
,
None
)
if
partial_rotary_factor
is
not
None
:
if
partial_rotary_factor
is
not
None
:
if
not
hasattr
(
config
,
"rope_parameters"
):
if
not
hasattr
(
config
,
"rope_parameters"
):
config
.
rope_parameters
=
{
"rope_type"
:
"default"
}
config
.
rope_parameters
=
{
"rope_type"
:
"default"
}
config
.
rope_parameters
[
"partial_rotary_factor"
]
=
partial_rotary_factor
config
.
rope_parameters
[
"partial_rotary_factor"
]
=
partial_rotary_factor
elif
rope_theta
is
not
None
or
hasattr
(
config
,
"rope_parameters"
):
elif
rope_theta
is
not
None
or
hasattr
(
config
,
"rope_parameters"
):
# Transformers v5 installed
# Transformers v5 installed
# Patch these fields in case they used non-standard names
if
rope_theta
is
not
None
:
config
.
rope_theta
=
rope_theta
if
partial_rotary_factor
is
not
None
:
config
.
partial_rotary_factor
=
partial_rotary_factor
# Standardize and validate RoPE parameters
config
.
standardize_rope_params
()
config
.
standardize_rope_params
()
config
.
validate_rope
()
config
.
validate_rope
()
...
@@ -608,6 +618,28 @@ def get_config(
...
@@ -608,6 +618,28 @@ def get_config(
hf_overrides
=
hf_overrides_kw
,
hf_overrides
=
hf_overrides_kw
,
**
kwargs
,
**
kwargs
,
)
)
# Patching defaults for GGUF models
if
_is_gguf
:
# Some models have different default values between GGUF and HF.
def
apply_gguf_default
(
key
:
str
,
gguf_default
:
Any
):
"""
Apply GGUF defaults unless explicitly configured.
This function reads/writes external `config` and `config_dict`.
If the specified `key` is not in `config_dict` (i.e. not explicitly
configured and the default HF value is used), it updates the
corresponding `config` value to `gguf_default`.
"""
if
key
not
in
config_dict
:
config
.
update
({
key
:
gguf_default
})
# Apply architecture-specific GGUF defaults.
if
config
.
model_type
in
{
"qwen3_moe"
}:
# Qwen3 MoE: norm_topk_prob is always true.
# Note that, this parameter is always false (HF default) on Qwen2 MoE.
apply_gguf_default
(
"norm_topk_prob"
,
True
)
# Special architecture mapping check for GGUF models
# Special architecture mapping check for GGUF models
if
_is_gguf
:
if
_is_gguf
:
if
config
.
model_type
not
in
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
:
if
config
.
model_type
not
in
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
:
...
...
vllm/transformers_utils/configs/__init__.py
View file @
a3f8d5dd
...
@@ -16,6 +16,7 @@ import importlib
...
@@ -16,6 +16,7 @@ import importlib
_CLASS_TO_MODULE
:
dict
[
str
,
str
]
=
{
_CLASS_TO_MODULE
:
dict
[
str
,
str
]
=
{
"AfmoeConfig"
:
"vllm.transformers_utils.configs.afmoe"
,
"AfmoeConfig"
:
"vllm.transformers_utils.configs.afmoe"
,
"BagelConfig"
:
"vllm.transformers_utils.configs.bagel"
,
"ChatGLMConfig"
:
"vllm.transformers_utils.configs.chatglm"
,
"ChatGLMConfig"
:
"vllm.transformers_utils.configs.chatglm"
,
"DeepseekVLV2Config"
:
"vllm.transformers_utils.configs.deepseek_vl2"
,
"DeepseekVLV2Config"
:
"vllm.transformers_utils.configs.deepseek_vl2"
,
"DotsOCRConfig"
:
"vllm.transformers_utils.configs.dotsocr"
,
"DotsOCRConfig"
:
"vllm.transformers_utils.configs.dotsocr"
,
...
@@ -54,6 +55,7 @@ _CLASS_TO_MODULE: dict[str, str] = {
...
@@ -54,6 +55,7 @@ _CLASS_TO_MODULE: dict[str, str] = {
__all__
=
[
__all__
=
[
"AfmoeConfig"
,
"AfmoeConfig"
,
"BagelConfig"
,
"ChatGLMConfig"
,
"ChatGLMConfig"
,
"DeepseekVLV2Config"
,
"DeepseekVLV2Config"
,
"DeepseekV3Config"
,
"DeepseekV3Config"
,
...
...
vllm/transformers_utils/configs/bagel.py
0 → 100644
View file @
a3f8d5dd
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
transformers
import
PretrainedConfig
,
SiglipVisionConfig
from
transformers.models.qwen2
import
Qwen2Config
class
BagelConfig
(
PretrainedConfig
):
"""Configuration class for BAGEL model."""
model_type
=
"bagel"
def
__init__
(
self
,
visual_gen
:
bool
=
True
,
visual_und
:
bool
=
True
,
llm_config
:
dict
|
Qwen2Config
|
None
=
None
,
vit_config
:
dict
|
SiglipVisionConfig
|
None
=
None
,
vae_config
:
dict
|
None
=
None
,
latent_patch_size
:
int
=
2
,
max_latent_size
:
int
=
32
,
vit_max_num_patch_per_side
:
int
=
70
,
connector_act
:
str
=
"gelu_pytorch_tanh"
,
interpolate_pos
:
bool
=
False
,
timestep_shift
:
float
=
1.0
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
visual_gen
=
visual_gen
self
.
visual_und
=
visual_und
# Convert dict configs to proper config objects
if
isinstance
(
llm_config
,
dict
):
self
.
llm_config
=
Qwen2Config
(
**
llm_config
)
else
:
self
.
llm_config
=
llm_config
or
Qwen2Config
()
if
isinstance
(
vit_config
,
dict
):
self
.
vit_config
=
SiglipVisionConfig
(
**
vit_config
)
else
:
self
.
vit_config
=
vit_config
or
SiglipVisionConfig
()
self
.
vae_config
=
vae_config
or
{
"z_channels"
:
16
,
"downsample"
:
8
}
self
.
latent_patch_size
=
latent_patch_size
self
.
max_latent_size
=
max_latent_size
self
.
vit_max_num_patch_per_side
=
vit_max_num_patch_per_side
self
.
connector_act
=
connector_act
self
.
interpolate_pos
=
interpolate_pos
self
.
timestep_shift
=
timestep_shift
@
property
def
hidden_size
(
self
)
->
int
:
"""Return the hidden size of the language model."""
return
self
.
llm_config
.
hidden_size
vllm/transformers_utils/processors/__init__.py
View file @
a3f8d5dd
...
@@ -8,6 +8,7 @@ reasons:
...
@@ -8,6 +8,7 @@ reasons:
- There is a need to override the existing processor to support vLLM.
- There is a need to override the existing processor to support vLLM.
"""
"""
from
vllm.transformers_utils.processors.bagel
import
BagelProcessor
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.processors.hunyuan_vl
import
HunYuanVLProcessor
from
vllm.transformers_utils.processors.hunyuan_vl
import
HunYuanVLProcessor
from
vllm.transformers_utils.processors.hunyuan_vl_image
import
HunYuanVLImageProcessor
from
vllm.transformers_utils.processors.hunyuan_vl_image
import
HunYuanVLImageProcessor
...
@@ -15,6 +16,7 @@ from vllm.transformers_utils.processors.ovis import OvisProcessor
...
@@ -15,6 +16,7 @@ from vllm.transformers_utils.processors.ovis import OvisProcessor
from
vllm.transformers_utils.processors.ovis2_5
import
Ovis2_5Processor
from
vllm.transformers_utils.processors.ovis2_5
import
Ovis2_5Processor
__all__
=
[
__all__
=
[
"BagelProcessor"
,
"DeepseekVLV2Processor"
,
"DeepseekVLV2Processor"
,
"HunYuanVLProcessor"
,
"HunYuanVLProcessor"
,
"HunYuanVLImageProcessor"
,
"HunYuanVLImageProcessor"
,
...
...
vllm/transformers_utils/processors/bagel.py
0 → 100644
View file @
a3f8d5dd
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Copyright 2025 Bytedance Ltd. and/or its affiliates.
"""BAGEL processor for image and text inputs."""
from
transformers
import
AutoProcessor
from
transformers.image_utils
import
ImageInput
from
transformers.processing_utils
import
ProcessorMixin
from
transformers.tokenization_utils_base
import
PreTokenizedInput
,
TextInput
class
BagelProcessor
(
ProcessorMixin
):
"""
Constructs a BAGEL processor which wraps a
SigLIP image processor and a Qwen2 tokenizer.
"""
attributes
=
[
"image_processor"
,
"tokenizer"
]
image_processor_class
=
"SiglipImageProcessor"
tokenizer_class
=
"AutoTokenizer"
def
__call__
(
self
,
text
:
TextInput
|
PreTokenizedInput
|
list
[
TextInput
]
|
list
[
PreTokenizedInput
]
=
None
,
images
:
ImageInput
=
None
,
**
kwargs
,
):
"""
Main method to prepare for the model one or several sequences(s) and image(s).
"""
if
images
is
not
None
:
# Process images with the image processor
# Ensure return_tensors is set to "pt" for PyTorch tensors
image_kwargs
=
{
**
kwargs
}
if
"return_tensors"
not
in
image_kwargs
:
image_kwargs
[
"return_tensors"
]
=
"pt"
pixel_values
=
self
.
image_processor
(
images
,
**
image_kwargs
)
else
:
pixel_values
=
None
text_inputs
=
self
.
tokenizer
(
text
,
**
kwargs
)
if
text
is
not
None
else
None
if
pixel_values
is
not
None
and
text_inputs
is
not
None
:
text_inputs
[
"pixel_values"
]
=
pixel_values
[
"pixel_values"
]
return
text_inputs
elif
pixel_values
is
not
None
:
return
pixel_values
else
:
return
text_inputs
def
batch_decode
(
self
,
*
args
,
**
kwargs
):
"""
This method forwards all its arguments to Qwen2TokenizerFast's batch_decode.
"""
return
self
.
tokenizer
.
batch_decode
(
*
args
,
**
kwargs
)
def
decode
(
self
,
*
args
,
**
kwargs
):
"""
This method forwards all its arguments to Qwen2TokenizerFast's decode.
"""
return
self
.
tokenizer
.
decode
(
*
args
,
**
kwargs
)
@
property
def
model_input_names
(
self
):
tokenizer_input_names
=
self
.
tokenizer
.
model_input_names
image_processor_input_names
=
self
.
image_processor
.
model_input_names
return
list
(
dict
.
fromkeys
(
tokenizer_input_names
+
image_processor_input_names
))
AutoProcessor
.
register
(
"BagelProcessor"
,
BagelProcessor
)
vllm/transformers_utils/tokenizer.py
View file @
a3f8d5dd
...
@@ -17,7 +17,7 @@ def __getattr__(name: str):
...
@@ -17,7 +17,7 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.AnyTokenizer` has been moved to "
"`vllm.transformers_utils.tokenizer.AnyTokenizer` has been moved to "
"`vllm.tokenizers.TokenizerLike`. "
"`vllm.tokenizers.TokenizerLike`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
...
@@ -29,7 +29,7 @@ def __getattr__(name: str):
...
@@ -29,7 +29,7 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
"has been moved to `vllm.tokenizers.get_tokenizer`. "
"has been moved to `vllm.tokenizers.get_tokenizer`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
...
@@ -41,7 +41,7 @@ def __getattr__(name: str):
...
@@ -41,7 +41,7 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
...
@@ -53,29 +53,29 @@ def __getattr__(name: str):
...
@@ -53,29 +53,29 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
return
cached_tokenizer_from_config
return
cached_tokenizer_from_config
if
name
==
"init_tokenizer_from_configs"
:
if
name
==
"init_tokenizer_from_configs"
:
from
vllm.tokenizers
import
init
_tokenizer_from_config
from
vllm.tokenizers
import
cached
_tokenizer_from_config
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
"`vllm.transformers_utils.tokenizer.init_tokenizer_from_configs` "
"has been moved to `vllm.tokenizers.
init
_tokenizer_from_config`. "
"has been moved to `vllm.tokenizers.
cached
_tokenizer_from_config`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
return
init
_tokenizer_from_config
return
cached
_tokenizer_from_config
raise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
raise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
@
deprecated
(
"Will be removed in v0.1
3
. Please use `tokenizer.decode()` instead."
)
@
deprecated
(
"Will be removed in v0.1
4
. Please use `tokenizer.decode()` instead."
)
def
decode_tokens
(
def
decode_tokens
(
tokenizer
:
TokenizerLike
,
tokenizer
:
TokenizerLike
,
token_ids
:
list
[
int
],
token_ids
:
list
[
int
],
...
@@ -97,7 +97,7 @@ def decode_tokens(
...
@@ -97,7 +97,7 @@ def decode_tokens(
return
tokenizer
.
decode
(
token_ids
,
**
kw_args
)
return
tokenizer
.
decode
(
token_ids
,
**
kw_args
)
@
deprecated
(
"Will be removed in v0.1
3
. Please use `tokenizer.encode()` instead."
)
@
deprecated
(
"Will be removed in v0.1
4
. Please use `tokenizer.encode()` instead."
)
def
encode_tokens
(
def
encode_tokens
(
tokenizer
:
TokenizerLike
,
tokenizer
:
TokenizerLike
,
text
:
str
,
text
:
str
,
...
...
vllm/transformers_utils/tokenizer_base.py
View file @
a3f8d5dd
...
@@ -11,7 +11,7 @@ def __getattr__(name: str):
...
@@ -11,7 +11,7 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer_base.TokenizerBase` has been "
"`vllm.transformers_utils.tokenizer_base.TokenizerBase` has been "
"moved to `vllm.tokenizers.TokenizerLike`. "
"moved to `vllm.tokenizers.TokenizerLike`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
...
@@ -23,7 +23,7 @@ def __getattr__(name: str):
...
@@ -23,7 +23,7 @@ def __getattr__(name: str):
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer_base.TokenizerRegistry` has been "
"`vllm.transformers_utils.tokenizer_base.TokenizerRegistry` has been "
"moved to `vllm.tokenizers.TokenizerRegistry`. "
"moved to `vllm.tokenizers.TokenizerRegistry`. "
"The old name will be removed in v0.1
3
."
,
"The old name will be removed in v0.1
4
."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
...
...
vllm/utils/deep_gemm.py
View file @
a3f8d5dd
...
@@ -38,7 +38,7 @@ class DeepGemmQuantScaleFMT(Enum):
...
@@ -38,7 +38,7 @@ class DeepGemmQuantScaleFMT(Enum):
return
DeepGemmQuantScaleFMT
.
FLOAT32
return
DeepGemmQuantScaleFMT
.
FLOAT32
return
(
return
(
DeepGemmQuantScaleFMT
.
UE8M0
DeepGemmQuantScaleFMT
.
UE8M0
if
current_platform
.
is_device_capability
(
100
)
if
current_platform
.
is_device_capability
_family
(
100
)
else
DeepGemmQuantScaleFMT
.
FLOAT32_CEIL_UE8M0
else
DeepGemmQuantScaleFMT
.
FLOAT32_CEIL_UE8M0
)
)
...
@@ -50,7 +50,7 @@ def is_deep_gemm_supported() -> bool:
...
@@ -50,7 +50,7 @@ def is_deep_gemm_supported() -> bool:
"""
"""
is_supported_arch
=
current_platform
.
is_cuda
()
and
(
is_supported_arch
=
current_platform
.
is_cuda
()
and
(
current_platform
.
is_device_capability
(
90
)
current_platform
.
is_device_capability
(
90
)
or
current_platform
.
is_device_capability
(
100
)
or
current_platform
.
is_device_capability
_family
(
100
)
)
)
return
envs
.
VLLM_USE_DEEP_GEMM
and
has_deep_gemm
()
and
is_supported_arch
return
envs
.
VLLM_USE_DEEP_GEMM
and
has_deep_gemm
()
and
is_supported_arch
...
@@ -481,22 +481,6 @@ def should_use_deepgemm_for_fp8_linear(
...
@@ -481,22 +481,6 @@ def should_use_deepgemm_for_fp8_linear(
)
)
def
should_use_deepgemm_for_fp8_linear_for_nk
(
output_dtype
:
torch
.
dtype
,
shape0
:
int
,
shape1
:
int
,
supports_deep_gemm
:
bool
|
None
=
None
,
):
if
supports_deep_gemm
is
None
:
supports_deep_gemm
=
is_deep_gemm_supported
()
return
(
supports_deep_gemm
and
output_dtype
==
torch
.
bfloat16
and
shape0
%
128
==
0
and
shape1
%
128
==
0
)
__all__
=
[
__all__
=
[
"calc_diff"
,
"calc_diff"
,
"DeepGemmQuantScaleFMT"
,
"DeepGemmQuantScaleFMT"
,
...
@@ -511,7 +495,6 @@ __all__ = [
...
@@ -511,7 +495,6 @@ __all__ = [
"is_deep_gemm_supported"
,
"is_deep_gemm_supported"
,
"get_num_sms"
,
"get_num_sms"
,
"should_use_deepgemm_for_fp8_linear"
,
"should_use_deepgemm_for_fp8_linear"
,
"should_use_deepgemm_for_fp8_linear_for_nk"
,
"get_col_major_tma_aligned_tensor"
,
"get_col_major_tma_aligned_tensor"
,
"get_mk_alignment_for_contiguous_layout"
,
"get_mk_alignment_for_contiguous_layout"
,
]
]
vllm/utils/flashinfer.py
View file @
a3f8d5dd
...
@@ -264,11 +264,15 @@ def supports_trtllm_attention() -> bool:
...
@@ -264,11 +264,15 @@ def supports_trtllm_attention() -> bool:
return
False
return
False
# Requires SM100 and NVIDIA artifactory to be accessible to download cubins
# Requires SM100 and NVIDIA artifactory to be accessible to download cubins
return
current_platform
.
is_device_capability
(
100
)
and
has_nvidia_artifactory
()
return
(
current_platform
.
is_device_capability_family
(
100
)
and
has_nvidia_artifactory
()
)
def
force_use_trtllm_attention
()
->
bool
|
None
:
def
force_use_trtllm_attention
()
->
bool
|
None
:
"""
"""
This function should only be called during initialization stage when vllm config
is set.
Return `None` if --attention-config.use_trtllm_attention is not set,
Return `None` if --attention-config.use_trtllm_attention is not set,
return `True` if TRTLLM attention is forced to be used,
return `True` if TRTLLM attention is forced to be used,
return `False` if TRTLLM attention is forced to be not used.
return `False` if TRTLLM attention is forced to be not used.
...
@@ -296,11 +300,12 @@ def use_trtllm_attention(
...
@@ -296,11 +300,12 @@ def use_trtllm_attention(
kv_cache_dtype
:
str
,
kv_cache_dtype
:
str
,
q_dtype
:
torch
.
dtype
,
q_dtype
:
torch
.
dtype
,
is_prefill
:
bool
,
is_prefill
:
bool
,
# None means auto-detection, True means force on, False means force off
force_use_trtllm
:
bool
|
None
=
None
,
has_sinks
:
bool
=
False
,
has_sinks
:
bool
=
False
,
has_spec
:
bool
=
False
,
has_spec
:
bool
=
False
,
)
->
bool
:
)
->
bool
:
"""Return `True` if TRTLLM attention is used."""
"""Return `True` if TRTLLM attention is used."""
force_use_trtllm
=
force_use_trtllm_attention
()
# CLI argument is set to 0 - respect it
# CLI argument is set to 0 - respect it
if
force_use_trtllm
is
not
None
and
not
force_use_trtllm
:
if
force_use_trtllm
is
not
None
and
not
force_use_trtllm
:
...
...
Prev
1
…
19
20
21
22
23
24
25
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment