Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8d75f22e
Commit
8d75f22e
authored
Dec 13, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.13.0rc1' into v0.13.0rc1-ori
parents
ce888aa4
7d80c73d
Changes
679
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
57 additions
and
51 deletions
+57
-51
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+2
-4
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granite_speech.py
+0
-1
vllm/model_executor/models/gritlm.py
vllm/model_executor/models/gritlm.py
+2
-4
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+3
-4
vllm/model_executor/models/hyperclovax_vision.py
vllm/model_executor/models/hyperclovax_vision.py
+0
-2
vllm/model_executor/models/idefics3.py
vllm/model_executor/models/idefics3.py
+0
-2
vllm/model_executor/models/interfaces.py
vllm/model_executor/models/interfaces.py
+33
-5
vllm/model_executor/models/interns1.py
vllm/model_executor/models/interns1.py
+0
-2
vllm/model_executor/models/internvl.py
vllm/model_executor/models/internvl.py
+0
-2
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+2
-4
vllm/model_executor/models/keye_vl1_5.py
vllm/model_executor/models/keye_vl1_5.py
+2
-2
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+0
-2
vllm/model_executor/models/lightonocr.py
vllm/model_executor/models/lightonocr.py
+2
-2
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+0
-3
vllm/model_executor/models/llama4_eagle.py
vllm/model_executor/models/llama4_eagle.py
+11
-2
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava.py
+0
-2
vllm/model_executor/models/llava_next.py
vllm/model_executor/models/llava_next.py
+0
-2
vllm/model_executor/models/llava_next_video.py
vllm/model_executor/models/llava_next_video.py
+0
-2
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/llava_onevision.py
+0
-2
vllm/model_executor/models/midashenglm.py
vllm/model_executor/models/midashenglm.py
+0
-2
No files found.
Too many changes to show.
To preserve performance only
679 of 679+
files are displayed.
Plain diff
Email patch
vllm/model_executor/models/gpt_neox.py
View file @
8d75f22e
...
...
@@ -89,16 +89,14 @@ class GPTNeoXAttention(nn.Module):
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.dense"
,
)
scaling
=
self
.
head_size
**-
0.5
rotary_dim
=
int
(
self
.
head_size
*
config
.
rotary_pct
)
assert
rotary_dim
%
2
==
0
max_position_embeddings
=
getattr
(
config
,
"max_position_embeddings"
,
8192
)
self
.
rotary_emb
=
get_rope
(
self
.
head_size
,
rotary_dim
=
rotary_dim
,
rotary_dim
=
self
.
head_size
,
max_position
=
max_position_embeddings
,
rope_parameters
=
config
.
rope_parameters
,
)
scaling
=
self
.
head_size
**-
0.5
self
.
attn
=
Attention
(
self
.
num_heads
,
self
.
head_size
,
...
...
vllm/model_executor/models/granite_speech.py
View file @
8d75f22e
...
...
@@ -564,7 +564,6 @@ class GraniteSpeechForConditionalGeneration(
SupportsLoRA
,
SupportsTranscription
,
):
merge_by_field_config
=
True
supported_languages
=
ISO639_1_SUPPORTED_LANGS
packed_modules_mapping
=
{
...
...
vllm/model_executor/models/gritlm.py
View file @
8d75f22e
...
...
@@ -14,8 +14,6 @@ from vllm.model_executor.layers.pooler import (
PoolerHead
,
PoolerNormalize
,
PoolingParamsUpdate
,
get_prompt_lens
,
get_prompt_token_ids
,
)
from
vllm.model_executor.models.llama
import
LlamaForCausalLM
from
vllm.tasks
import
PoolingTask
...
...
@@ -153,11 +151,11 @@ class GritLMMeanPool(nn.Module):
hidden_states
:
torch
.
Tensor
|
list
[
torch
.
Tensor
],
pooling_metadata
:
PoolingMetadata
,
)
->
list
[
torch
.
Tensor
]
|
torch
.
Tensor
:
prompt_lens
=
get_prompt_lens
(
hidden_states
,
pooling_metadata
)
prompt_lens
=
pooling_metadata
.
prompt_lens
instr_lens
=
torch
.
tensor
(
[
self
.
_get_instruction_len
(
token_ids
.
cpu
().
numpy
())
for
token_ids
in
get_prompt_token_ids
(
pooling_metadata
)
for
token_ids
in
pooling_metadata
.
get_prompt_token_ids
()
],
device
=
"cpu"
,
)
...
...
vllm/model_executor/models/hunyuan_vision.py
View file @
8d75f22e
...
...
@@ -62,6 +62,7 @@ from vllm.multimodal.inputs import (
from
vllm.multimodal.parse
import
(
DictEmbeddingItems
,
ImageSize
,
ModalityDataItems
,
MultiModalDataItems
,
MultiModalDataParser
,
)
...
...
@@ -562,7 +563,7 @@ def _hunyuan_vl_field_config(hf_inputs: Mapping[str, torch.Tensor]):
return
dict
(
pixel_values
=
MultiModalFieldConfig
.
flat_from_sizes
(
"image"
,
image_grid_sizes
),
image_embeds
=
MultiModalFieldConfig
.
flat_from_sizes
(
"image"
,
image_grid_sizes
),
image_grid_thw
=
MultiModalFieldConfig
.
batched
(
"image"
),
image_grid_thw
=
MultiModalFieldConfig
.
batched
(
"image"
,
keep_on_cpu
=
True
),
)
...
...
@@ -570,7 +571,7 @@ class HunYuanVLMultiModalDataParser(MultiModalDataParser):
def
_parse_image_data
(
self
,
data
:
dict
[
str
,
torch
.
Tensor
]
|
ModalityData
[
ImageItem
],
):
)
->
ModalityDataItems
[
Any
,
Any
]
|
None
:
if
isinstance
(
data
,
dict
):
return
DictEmbeddingItems
(
data
,
...
...
@@ -785,8 +786,6 @@ class HunYuanVLForConditionalGeneration(
SupportsQuant
,
SupportsXDRoPE
,
):
multimodal_cpu_fields
=
{
"image_grid_thw"
}
# To ensure correct weight loading and mapping.
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
...
...
vllm/model_executor/models/hyperclovax_vision.py
View file @
8d75f22e
...
...
@@ -592,8 +592,6 @@ class HCXVisionCAbstractor(nn.Module):
dummy_inputs
=
HCXVisionDummyInputsBuilder
,
)
class
HCXVisionForCausalLM
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
"k_proj"
,
"v_proj"
],
"gate_up_proj"
:
[
"gate_proj"
,
"up_proj"
],
...
...
vllm/model_executor/models/idefics3.py
View file @
8d75f22e
...
...
@@ -576,8 +576,6 @@ class Idefics3Model(nn.Module):
dummy_inputs
=
Idefics3DummyInputsBuilder
,
)
class
Idefics3ForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsLoRA
):
merge_by_field_config
=
True
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
...
...
vllm/model_executor/models/interfaces.py
View file @
8d75f22e
...
...
@@ -78,15 +78,15 @@ class SupportsMultiModal(Protocol):
`multimodal_config.mm_encoder_tp_mode="data"`.
"""
merge_by_field_config
:
ClassVar
[
bool
]
=
Fals
e
merge_by_field_config
:
ClassVar
[
bool
|
None
]
=
Non
e
"""
A flag that indicates which implementation of
[DEPRECATED]
A flag that indicates which implementation of
`vllm.multimodal.utils.group_mm_kwargs_by_modality` to use.
"""
multimodal_cpu_fields
:
ClassVar
[
Set
[
str
]
]
=
frozenset
()
multimodal_cpu_fields
:
ClassVar
[
Set
[
str
]
|
None
]
=
None
"""
A set indicating CPU-only multimodal fields.
[DEPRECATED]
A set indicating CPU-only multimodal fields.
"""
_processor_factory
:
ClassVar
[
_ProcessorFactories
]
...
...
@@ -260,7 +260,35 @@ def supports_multimodal(model: object) -> TypeIs[SupportsMultiModal]: ...
def
supports_multimodal
(
model
:
type
[
object
]
|
object
,
)
->
TypeIs
[
type
[
SupportsMultiModal
]]
|
TypeIs
[
SupportsMultiModal
]:
return
getattr
(
model
,
"supports_multimodal"
,
False
)
res
=
getattr
(
model
,
"supports_multimodal"
,
False
)
if
res
:
# We can remove this starting from v0.14
merge_by_field_config
=
getattr
(
model
,
"merge_by_field_config"
,
None
)
if
merge_by_field_config
is
False
:
raise
ValueError
(
"`merge_by_field_config=False` is no longer effective, "
"please update your model to consider the new batching logic "
"in `group_mm_kwargs_by_modality` (refer to "
"https://github.com/vllm-project/vllm/issues/26149), "
"and then remove the override from your model."
)
if
merge_by_field_config
is
True
:
logger
.
warning_once
(
"`merge_by_field_config=True` is redundant, "
"please remove the override from your model."
)
multimodal_cpu_fields
=
getattr
(
model
,
"multimodal_cpu_fields"
,
None
)
if
multimodal_cpu_fields
is
not
None
:
raise
ValueError
(
"`multimodal_cpu_fields` is no longer effective, "
"please set `keep_on_cpu=True` in `MultiModalFieldConfig` "
"(refer to https://github.com/vllm-project/vllm/pull/30181), "
"and then remove the override from your model."
)
return
res
def
supports_multimodal_raw_input_only
(
model
:
type
[
object
]
|
object
)
->
bool
:
...
...
vllm/model_executor/models/interns1.py
View file @
8d75f22e
...
...
@@ -509,8 +509,6 @@ class InternS1MultiModalProcessor(BaseMultiModalProcessor[InternS1ProcessingInfo
class
InternS1ForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
,
SupportsLoRA
):
merge_by_field_config
=
True
# To ensure correct weight loading and mapping.
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
...
...
vllm/model_executor/models/internvl.py
View file @
8d75f22e
...
...
@@ -1074,8 +1074,6 @@ class InternVLMultiModalProcessor(
dummy_inputs
=
InternVLDummyInputsBuilder
,
)
class
InternVLChatModel
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
,
SupportsLoRA
):
merge_by_field_config
=
True
supports_encoder_tp_data
=
True
@
classmethod
...
...
vllm/model_executor/models/keye.py
View file @
8d75f22e
...
...
@@ -1000,7 +1000,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
def
_parse_image_data
(
self
,
data
:
dict
[
str
,
torch
.
Tensor
]
|
ModalityData
[
ImageItem
],
)
->
ModalityDataItems
[
Any
,
Any
]:
)
->
ModalityDataItems
[
Any
,
Any
]
|
None
:
if
isinstance
(
data
,
dict
):
return
DictEmbeddingItems
(
data
,
...
...
@@ -1017,7 +1017,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
def
_parse_video_data
(
self
,
data
:
dict
[
str
,
torch
.
Tensor
]
|
ModalityData
[
VideoItem
],
)
->
ModalityDataItems
[
Any
,
Any
]:
)
->
ModalityDataItems
[
Any
,
Any
]
|
None
:
if
isinstance
(
data
,
dict
):
return
DictEmbeddingItems
(
data
,
...
...
@@ -1292,8 +1292,6 @@ class KeyeMultiModalProcessor(BaseMultiModalProcessor[KeyeProcessingInfo]):
class
BaseKeyeModule
(
nn
.
Module
):
merge_by_field_config
=
True
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
...
...
vllm/model_executor/models/keye_vl1_5.py
View file @
8d75f22e
...
...
@@ -333,7 +333,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def
_parse_image_data
(
self
,
data
:
dict
[
str
,
torch
.
Tensor
]
|
ModalityData
[
ImageItem
],
)
->
ModalityDataItems
[
Any
,
Any
]:
)
->
ModalityDataItems
[
Any
,
Any
]
|
None
:
if
isinstance
(
data
,
dict
):
return
DictEmbeddingItems
(
data
,
...
...
@@ -350,7 +350,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def
_parse_video_data
(
self
,
data
:
dict
[
str
,
torch
.
Tensor
]
|
ModalityData
[
VideoItem
],
)
->
ModalityDataItems
[
Any
,
Any
]:
)
->
ModalityDataItems
[
Any
,
Any
]
|
None
:
if
isinstance
(
data
,
dict
):
return
DictEmbeddingItems
(
data
,
...
...
vllm/model_executor/models/kimi_vl.py
View file @
8d75f22e
...
...
@@ -298,8 +298,6 @@ class KimiVLMultiModalProcessor(BaseMultiModalProcessor[KimiVLProcessingInfo]):
dummy_inputs
=
KimiVLDummyInputsBuilder
,
)
class
KimiVLForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
supports_encoder_tp_data
=
True
@
classmethod
...
...
vllm/model_executor/models/lightonocr.py
View file @
8d75f22e
...
...
@@ -28,7 +28,7 @@ from vllm.model_executor.models.utils import (
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.cache
import
BaseMultiModalProcessorCache
from
vllm.multimodal.inputs
import
MultiModalFieldConfig
,
MultiModalKwargs
from
vllm.multimodal.inputs
import
MultiModalFieldConfig
,
MultiModalKwargs
Items
from
vllm.multimodal.parse
import
ImageProcessorItems
,
MultiModalDataItems
from
vllm.multimodal.processing
import
(
BaseMultiModalProcessor
,
...
...
@@ -103,7 +103,7 @@ class LightOnOCRMultiModalProcessor(BaseMultiModalProcessor[Mistral3ProcessingIn
self
,
mm_items
:
MultiModalDataItems
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
out_mm_kwargs
:
MultiModalKwargs
,
out_mm_kwargs
:
MultiModalKwargs
Items
,
)
->
Sequence
[
PromptUpdate
]:
hf_config
=
self
.
info
.
get_hf_config
()
image_token_id
=
hf_config
.
image_token_index
...
...
vllm/model_executor/models/llama.py
View file @
8d75f22e
...
...
@@ -149,8 +149,6 @@ class LlamaAttention(nn.Module):
if
head_dim
is
None
:
head_dim
=
self
.
hidden_size
//
self
.
total_num_heads
self
.
head_dim
=
head_dim
# Phi models introduced a partial_rotary_factor parameter in the config
self
.
partial_rotary_factor
=
getattr
(
config
,
"partial_rotary_factor"
,
1
)
self
.
q_size
=
self
.
num_heads
*
self
.
head_dim
self
.
kv_size
=
self
.
num_kv_heads
*
self
.
head_dim
self
.
scaling
=
self
.
head_dim
**-
0.5
...
...
@@ -265,7 +263,6 @@ class LlamaAttention(nn.Module):
max_position
=
self
.
max_position_embeddings
,
rope_parameters
=
getattr
(
config
,
"rope_parameters"
,
None
),
is_neox_style
=
is_neox_style
,
partial_rotary_factor
=
self
.
partial_rotary_factor
,
)
...
...
vllm/model_executor/models/llama4_eagle.py
View file @
8d75f22e
...
...
@@ -28,7 +28,10 @@ from vllm.model_executor.layers.layernorm import RMSNorm
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.torchao
import
TorchAOConfig
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.models.llama4
import
Llama4DecoderLayer
,
Llama4ForCausalLM
from
vllm.model_executor.models.utils
import
extract_layer_index
...
...
@@ -182,6 +185,12 @@ class EagleLlama4ForCausalLM(Llama4ForCausalLM):
self
.
config
.
vocab_size
,
scale
=
logit_scale
)
self
.
lm_head
=
ParallelLMHead
(
self
.
config
.
draft_vocab_size
,
self
.
config
.
hidden_size
,
prefix
=
maybe_prefix
(
prefix
,
"lm_head"
),
)
# Set MoE hyperparameters
self
.
set_moe_parameters
()
...
...
@@ -211,6 +220,6 @@ class EagleLlama4ForCausalLM(Llama4ForCausalLM):
loader
=
AutoWeightsLoader
(
self
,
# lm_head is tied with target model (Llama4ForCausalLM)
skip_prefixes
=
([
"lm_head."
]),
skip_prefixes
=
([]),
)
loader
.
load_weights
(
map
(
transform
,
weights
))
vllm/model_executor/models/llava.py
View file @
8d75f22e
...
...
@@ -506,8 +506,6 @@ def init_vision_tower_for_llava(
dummy_inputs
=
LlavaDummyInputsBuilder
,
)
class
LlavaForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
"k_proj"
,
"v_proj"
],
"gate_up_proj"
:
[
"gate_proj"
,
"up_proj"
],
...
...
vllm/model_executor/models/llava_next.py
View file @
8d75f22e
...
...
@@ -223,8 +223,6 @@ class LlavaNextMultiModalProcessor(
dummy_inputs
=
LlavaDummyInputsBuilder
,
)
class
LlavaNextForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
# mapping for new names in checkpoint saved after transformers v4.52
...
...
vllm/model_executor/models/llava_next_video.py
View file @
8d75f22e
...
...
@@ -299,8 +299,6 @@ class LlavaNextMultiModalProjector(nn.Module):
dummy_inputs
=
LlavaNextVideoDummyInputsBuilder
,
)
class
LlavaNextVideoForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
# mapping for new names in checkpoint saved after transformers v4.52
...
...
vllm/model_executor/models/llava_onevision.py
View file @
8d75f22e
...
...
@@ -479,8 +479,6 @@ class LlavaOnevisionMultiModalProjector(nn.Module):
dummy_inputs
=
LlavaOnevisionDummyInputsBuilder
,
)
class
LlavaOnevisionForConditionalGeneration
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
# mapping for new names in checkpoint saved after transformers v4.52
...
...
vllm/model_executor/models/midashenglm.py
View file @
8d75f22e
...
...
@@ -683,8 +683,6 @@ class MiDashengLMMultiModalProcessor(
dummy_inputs
=
MiDashengLMDummyInputsBuilder
,
)
class
MiDashengLMModel
(
nn
.
Module
,
SupportsMultiModal
,
SupportsPP
):
merge_by_field_config
=
True
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
...
...
Prev
1
…
24
25
26
27
28
29
30
31
32
…
34
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment