Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
df704163
Commit
df704163
authored
Feb 06, 2026
by
zhuwenwen
Browse files
sync v0.15.1 (models)
parent
d7db129a
Changes
169
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
46 deletions
+42
-46
vllm/model_executor/models/kanana_v.py
vllm/model_executor/models/kanana_v.py
+2
-2
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+2
-2
vllm/model_executor/models/kimi_linear.py
vllm/model_executor/models/kimi_linear.py
+2
-2
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+2
-2
vllm/model_executor/models/lfm2.py
vllm/model_executor/models/lfm2.py
+3
-3
vllm/model_executor/models/lfm2_moe.py
vllm/model_executor/models/lfm2_moe.py
+3
-3
vllm/model_executor/models/lfm2_vl.py
vllm/model_executor/models/lfm2_vl.py
+2
-2
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+1
-1
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava.py
+2
-2
vllm/model_executor/models/llava_next.py
vllm/model_executor/models/llava_next.py
+2
-2
vllm/model_executor/models/llava_next_video.py
vllm/model_executor/models/llava_next_video.py
+2
-2
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/llava_onevision.py
+2
-2
vllm/model_executor/models/longcat_flash.py
vllm/model_executor/models/longcat_flash.py
+3
-3
vllm/model_executor/models/longcat_flash_mtp.py
vllm/model_executor/models/longcat_flash_mtp.py
+2
-2
vllm/model_executor/models/mamba.py
vllm/model_executor/models/mamba.py
+3
-3
vllm/model_executor/models/mamba2.py
vllm/model_executor/models/mamba2.py
+3
-3
vllm/model_executor/models/medusa.py
vllm/model_executor/models/medusa.py
+0
-4
vllm/model_executor/models/midashenglm.py
vllm/model_executor/models/midashenglm.py
+2
-2
vllm/model_executor/models/mimo.py
vllm/model_executor/models/mimo.py
+2
-2
vllm/model_executor/models/mimo_mtp.py
vllm/model_executor/models/mimo_mtp.py
+2
-2
No files found.
vllm/model_executor/models/kanana_v.py
View file @
df704163
...
...
@@ -732,7 +732,7 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -755,4 +755,4 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/keye.py
View file @
df704163
...
...
@@ -1438,7 +1438,7 @@ class BaseKeyeModule(nn.Module, SupportsMultiModal):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1692,4 +1692,4 @@ class KeyeForConditionalGeneration(
llm_positions
=
torch
.
cat
(
llm_pos_ids_list
,
dim
=
1
).
reshape
(
3
,
-
1
)
mrope_position_delta
=
(
llm_positions
.
max
()
+
1
-
len
(
input_tokens
)).
item
()
return
llm_positions
,
mrope_position_delta
return
llm_positions
,
mrope_position_delta
\ No newline at end of file
vllm/model_executor/models/kimi_linear.py
View file @
df704163
...
...
@@ -506,7 +506,7 @@ class KimiLinearForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -666,4 +666,4 @@ def get_spec_layer_idx_from_weight_name(
for
i
in
range
(
config
.
num_nextn_predict_layers
):
if
weight_name
.
startswith
(
f
"model.layers.
{
layer_idx
+
i
}
."
):
return
layer_idx
+
i
return
None
return
None
\ No newline at end of file
vllm/model_executor/models/kimi_vl.py
View file @
df704163
...
...
@@ -389,7 +389,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -412,4 +412,4 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/lfm2.py
View file @
df704163
...
...
@@ -342,7 +342,7 @@ class Lfm2Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -503,7 +503,7 @@ class Lfm2ForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -523,4 +523,4 @@ class Lfm2ForCausalLM(
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/lfm2_moe.py
View file @
df704163
...
...
@@ -457,7 +457,7 @@ class Lfm2MoeModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -730,7 +730,7 @@ class Lfm2MoeForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -753,4 +753,4 @@ class Lfm2MoeForCausalLM(
return
loader
.
load_weights
(
weights
)
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
self
.
model
.
get_expert_mapping
()
return
self
.
model
.
get_expert_mapping
()
\ No newline at end of file
vllm/model_executor/models/lfm2_vl.py
View file @
df704163
...
...
@@ -769,7 +769,7 @@ class Lfm2VLForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -804,4 +804,4 @@ class Lfm2VLForConditionalGeneration(
language_model
=
"language_model"
,
connector
=
"multi_modal_projector"
,
tower_model
=
"vision_tower"
,
)
)
\ No newline at end of file
vllm/model_executor/models/llama.py
View file @
df704163
...
...
@@ -651,7 +651,7 @@ class LlamaForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/llava.py
View file @
df704163
...
...
@@ -662,7 +662,7 @@ class LlavaForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -853,4 +853,4 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
dummy_inputs
=
LlavaDummyInputsBuilder
,
)
class
MantisForConditionalGeneration
(
LlavaForConditionalGeneration
):
pass
pass
\ No newline at end of file
vllm/model_executor/models/llava_next.py
View file @
df704163
...
...
@@ -509,7 +509,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -577,4 +577,4 @@ model_executor.models.llava_next.LlavaNextProcessingInfo.get_num_image_tokens].
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/llava_next_video.py
View file @
df704163
...
...
@@ -426,7 +426,7 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -459,4 +459,4 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
# This model doesn't support images for now
ignore_unexpected_prefixes
=
[
"image_newline"
],
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/llava_onevision.py
View file @
df704163
...
...
@@ -887,7 +887,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -916,4 +916,4 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/longcat_flash.py
View file @
df704163
...
...
@@ -520,7 +520,7 @@ class FlashModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -605,7 +605,7 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -764,4 +764,4 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
self_attn
.
kv_a_layernorm
.
weight
.
data
*=
(
self
.
config
.
hidden_size
/
self
.
config
.
kv_lora_rank
)
**
0.5
return
loaded_params
return
loaded_params
\ No newline at end of file
vllm/model_executor/models/longcat_flash_mtp.py
View file @
df704163
...
...
@@ -150,7 +150,7 @@ class LongCatFlashMTP(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
...
...
@@ -345,4 +345,4 @@ class LongCatFlashMTP(nn.Module):
)
->
int
|
None
:
if
"model.mtp"
in
weight_name
:
return
config
.
num_hidden_layers
*
2
return
None
return
None
\ No newline at end of file
vllm/model_executor/models/mamba.py
View file @
df704163
...
...
@@ -142,7 +142,7 @@ class MambaModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -225,7 +225,7 @@ class MambaForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -279,4 +279,4 @@ class MambaForCausalLM(
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/mamba2.py
View file @
df704163
...
...
@@ -137,7 +137,7 @@ class Mamba2Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -267,7 +267,7 @@ class Mamba2ForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -291,4 +291,4 @@ class Mamba2ForCausalLM(
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/medusa.py
View file @
df704163
...
...
@@ -2,8 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
from
typing
import
Iterable
,
List
,
Optional
,
Set
,
Tuple
,
Any
,
Dict
from
collections.abc
import
Iterable
import
torch
...
...
@@ -20,8 +18,6 @@ from .utils import maybe_prefix
from
vllm
import
_custom_ops
as
ops
TOPK
=
10
# topk for sparse tree (10 is a placeholder and it is sufficient)
class
ResidualBlock
(
nn
.
Module
):
def
__init__
(
self
,
config
:
VllmConfig
,
hidden_size
:
int
,
num_layers
:
int
)
->
None
:
...
...
vllm/model_executor/models/midashenglm.py
View file @
df704163
...
...
@@ -796,7 +796,7 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -820,4 +820,4 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/mimo.py
View file @
df704163
...
...
@@ -61,7 +61,7 @@ logger = init_logger(__name__)
class
MiMoModel
(
Qwen2Model
):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -185,4 +185,4 @@ class MiMoForCausalLM(Qwen2ForCausalLM, nn.Module):
)
->
torch
.
Tensor
|
None
:
hidden_states
=
self
.
model
.
norm
(
hidden_states
)
logits
=
self
.
logits_processor
(
self
.
lm_head
,
hidden_states
)
return
logits
return
logits
\ No newline at end of file
vllm/model_executor/models/mimo_mtp.py
View file @
df704163
...
...
@@ -169,7 +169,7 @@ class MiMoMTP(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
...
...
@@ -291,4 +291,4 @@ class MiMoMTP(nn.Module):
name
=
name
.
replace
(
f
"model.layers.
{
spec_layer
}
."
,
f
"model.layers.
{
spec_layer
}
.mtp_block."
)
return
name
return
name
\ No newline at end of file
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment