Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
df704163
Commit
df704163
authored
Feb 06, 2026
by
zhuwenwen
Browse files
sync v0.15.1 (models)
parent
d7db129a
Changes
169
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
54 additions
and
47 deletions
+54
-47
vllm/model_executor/models/mimo_v2_flash.py
vllm/model_executor/models/mimo_v2_flash.py
+3
-3
vllm/model_executor/models/minicpm.py
vllm/model_executor/models/minicpm.py
+3
-3
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+2
-2
vllm/model_executor/models/minimax_m2.py
vllm/model_executor/models/minimax_m2.py
+3
-3
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_text_01.py
+2
-2
vllm/model_executor/models/minimax_vl_01.py
vllm/model_executor/models/minimax_vl_01.py
+2
-2
vllm/model_executor/models/mistral.py
vllm/model_executor/models/mistral.py
+9
-1
vllm/model_executor/models/mistral3.py
vllm/model_executor/models/mistral3.py
+2
-2
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+2
-2
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mllama4.py
+2
-2
vllm/model_executor/models/mlp_speculator.py
vllm/model_executor/models/mlp_speculator.py
+0
-2
vllm/model_executor/models/modernbert.py
vllm/model_executor/models/modernbert.py
+6
-5
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+2
-2
vllm/model_executor/models/molmo2.py
vllm/model_executor/models/molmo2.py
+2
-2
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+3
-3
vllm/model_executor/models/nano_nemotron_vl.py
vllm/model_executor/models/nano_nemotron_vl.py
+1
-1
vllm/model_executor/models/nemotron.py
vllm/model_executor/models/nemotron.py
+2
-2
vllm/model_executor/models/nemotron_h.py
vllm/model_executor/models/nemotron_h.py
+3
-3
vllm/model_executor/models/nemotron_nas.py
vllm/model_executor/models/nemotron_nas.py
+2
-2
vllm/model_executor/models/nemotron_parse.py
vllm/model_executor/models/nemotron_parse.py
+3
-3
No files found.
vllm/model_executor/models/mimo_v2_flash.py
View file @
df704163
...
...
@@ -478,7 +478,7 @@ class MiMoV2Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -693,7 +693,7 @@ class MiMoV2FlashForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -715,4 +715,4 @@ class MiMoV2FlashForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/minicpm.py
View file @
df704163
...
...
@@ -440,7 +440,7 @@ class MiniCPMModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -620,7 +620,7 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -654,4 +654,4 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3):
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/minicpmv.py
View file @
df704163
...
...
@@ -1147,7 +1147,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1740,4 +1740,4 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
# so update values before init is called
cls
.
packed_modules_mapping
.
update
(
instance_cls
.
packed_modules_mapping
)
cls
.
embedding_modules
.
update
(
instance_cls
.
embedding_modules
)
return
instance_cls
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
return
instance_cls
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
\ No newline at end of file
vllm/model_executor/models/minimax_m2.py
View file @
df704163
...
...
@@ -362,7 +362,7 @@ class MiniMaxM2Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -521,7 +521,7 @@ class MiniMaxM2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -555,4 +555,4 @@ def get_spec_layer_idx_from_weight_name(
for
i
in
range
(
config
.
num_mtp_modules
):
if
weight_name
.
startswith
(
f
"model.layers.
{
layer_idx
+
i
}
."
):
return
layer_idx
+
i
return
None
return
None
\ No newline at end of file
vllm/model_executor/models/minimax_text_01.py
View file @
df704163
...
...
@@ -712,7 +712,7 @@ class MiniMaxText01ForCausalLM(nn.Module, HasInnerState, IsHybrid):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1011,4 +1011,4 @@ class MiniMaxText01ForCausalLM(nn.Module, HasInnerState, IsHybrid):
@
classmethod
def
get_mamba_state_copy_func
(
cls
)
->
tuple
[
MambaStateCopyFunc
]:
return
MambaStateCopyFuncCalculator
.
linear_attention_state_copy_func
()
return
MambaStateCopyFuncCalculator
.
linear_attention_state_copy_func
()
\ No newline at end of file
vllm/model_executor/models/minimax_vl_01.py
View file @
df704163
...
...
@@ -359,7 +359,7 @@ class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, Support
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -382,4 +382,4 @@ class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, Support
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/mistral.py
View file @
df704163
...
...
@@ -156,8 +156,16 @@ class MistralDecoderLayer(LlamaDecoderLayer):
)
self
.
layer_idx
=
int
(
prefix
.
split
(
sep
=
"."
)[
-
1
])
quant_config
=
self
.
get_quant_config
(
vllm_config
)
config
=
config
or
vllm_config
.
model_config
.
hf_config
do_fusion
=
getattr
(
quant_config
,
"enable_quantization_scaling_fusion"
,
False
)
and
vllm_config
.
cache_config
.
cache_dtype
.
startswith
(
"fp8"
)
if
do_fusion
:
self
.
input_layernorm
.
quant_scaling_from
=
self
.
self_attn
.
qkv_proj
self
.
post_attention_layernorm
.
quant_scaling_from
=
self
.
mlp
.
gate_up_proj
if
getattr
(
config
,
"ada_rms_norm_t_cond"
,
False
):
self
.
ada_rms_norm_t_cond
=
nn
.
Sequential
(
ColumnParallelLinear
(
...
...
@@ -339,4 +347,4 @@ class MistralForCausalLM(LlamaForCausalLM):
elif
item
in
mapping
and
mapping
[
item
]
not
in
name
:
name
=
name
.
replace
(
item
,
mapping
[
item
])
return
name
,
loaded_weight
return
name
,
loaded_weight
\ No newline at end of file
vllm/model_executor/models/mistral3.py
View file @
df704163
...
...
@@ -539,7 +539,7 @@ class Mistral3ForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -609,4 +609,4 @@ class Mistral3ForConditionalGeneration(
language_model
=
"language_model"
,
connector
=
"multi_modal_projector"
,
tower_model
=
"vision_tower"
,
)
)
\ No newline at end of file
vllm/model_executor/models/mixtral.py
View file @
df704163
...
...
@@ -347,7 +347,7 @@ class MixtralModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -608,7 +608,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/mllama4.py
View file @
df704163
...
...
@@ -901,7 +901,7 @@ class Llama4ForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1161,4 +1161,4 @@ class Llama4ForConditionalGeneration(
language_model
=
"language_model"
,
connector
=
"multi_modal_projector."
,
tower_model
=
"vision_model."
,
)
)
\ No newline at end of file
vllm/model_executor/models/mlp_speculator.py
View file @
df704163
...
...
@@ -3,7 +3,6 @@
import
os
import
math
from
typing
import
Iterable
,
List
,
Set
,
Tuple
,
Optional
from
collections.abc
import
Iterable
import
torch
...
...
@@ -17,7 +16,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm
import
_custom_ops
as
ops
from
vllm.distributed
import
tensor_model_parallel_all_gather
,
tensor_model_parallel_gather
from
vllm
import
envs
from
.utils
import
maybe_prefix
...
...
vllm/model_executor/models/modernbert.py
View file @
df704163
...
...
@@ -54,11 +54,12 @@ class ModernBertEmbeddings(nn.Module):
input_ids
:
torch
.
Tensor
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
)
->
torch
.
Tensor
:
if
inputs_embeds
is
None
:
if
inputs_embeds
is
not
None
:
return
self
.
norm
(
inputs_embeds
)
else
:
inputs_embeds
=
self
.
tok_embeddings
(
input_ids
)
embeddings
=
self
.
norm
(
inputs_embeds
)
return
embeddings
embeddings
=
self
.
norm
(
inputs_embeds
)
return
embeddings
class
ModernBertAttention
(
nn
.
Module
):
...
...
@@ -454,4 +455,4 @@ class ModernBertForTokenClassification(nn.Module):
)
hidden_states
=
self
.
head
(
hidden_states
)
hidden_states
=
hidden_states
.
to
(
self
.
head_dtype
)
return
self
.
classifier
(
hidden_states
)
return
self
.
classifier
(
hidden_states
)
\ No newline at end of file
vllm/model_executor/models/molmo.py
View file @
df704163
...
...
@@ -871,7 +871,7 @@ class MolmoModel(nn.Module, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1591,4 +1591,4 @@ def _get_weights_with_merged_embedding(
[
embedding_weights
[
"embedding"
],
embedding_weights
[
"new_embedding"
]],
dim
=
0
,
)
yield
(
"model.embed_tokens.weight"
,
embedding_weights
)
yield
(
"model.embed_tokens.weight"
,
embedding_weights
)
\ No newline at end of file
vllm/model_executor/models/molmo2.py
View file @
df704163
...
...
@@ -1217,7 +1217,7 @@ class Molmo2TextModel(nn.Module, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -2805,4 +2805,4 @@ def _get_weights_with_merged_embedding(
[
embedding_weights
[
"embedding"
],
embedding_weights
[
"new_embedding"
]],
dim
=
0
,
)
yield
(
"model.embed_tokens.weight"
,
embedding_weights
)
yield
(
"model.embed_tokens.weight"
,
embedding_weights
)
\ No newline at end of file
vllm/model_executor/models/mpt.py
View file @
df704163
...
...
@@ -253,7 +253,7 @@ class MPTModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -313,7 +313,7 @@ class MPTForCausalLM(nn.Module, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -332,4 +332,4 @@ class MPTForCausalLM(nn.Module, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/nano_nemotron_vl.py
View file @
df704163
...
...
@@ -1917,7 +1917,7 @@ class NemotronH_Nano_VL_V2(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/nemotron.py
View file @
df704163
...
...
@@ -477,7 +477,7 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -496,4 +496,4 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/nemotron_h.py
View file @
df704163
...
...
@@ -601,7 +601,7 @@ class NemotronHModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -887,7 +887,7 @@ class NemotronHForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -908,4 +908,4 @@ class NemotronHForCausalLM(
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
[
"mtp"
])
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/nemotron_nas.py
View file @
df704163
...
...
@@ -449,7 +449,7 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -471,4 +471,4 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps):
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/nemotron_parse.py
View file @
df704163
...
...
@@ -289,7 +289,7 @@ class MBartDecoderNoPos(nn.Module):
def
forward
(
self
,
decoder_input_ids
:
torch
.
Tensor
|
None
,
decoder_input_ids
:
torch
.
Tensor
,
*
,
encoder_hidden_states
:
torch
.
Tensor
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -897,7 +897,7 @@ class NemotronParseForConditionalGeneration(nn.Module, SupportsMultiModal):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
encoder_outputs
:
list
[
torch
.
Tensor
]
|
None
=
None
,
**
kwargs
,
...
...
@@ -957,4 +957,4 @@ class NemotronParseForConditionalGeneration(nn.Module, SupportsMultiModal):
# Load encoder weights
self
.
encoder
.
load_weights
(
encoder_weights
)
# Load decoder weights
self
.
decoder
.
load_weights
(
decoder_weights
)
self
.
decoder
.
load_weights
(
decoder_weights
)
\ No newline at end of file
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment