Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
df704163
Commit
df704163
authored
Feb 06, 2026
by
zhuwenwen
Browse files
sync v0.15.1 (models)
parent
d7db129a
Changes
169
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
44 additions
and
551 deletions
+44
-551
vllm/model_executor/models/afmoe.py
vllm/model_executor/models/afmoe.py
+3
-3
vllm/model_executor/models/apertus.py
vllm/model_executor/models/apertus.py
+2
-2
vllm/model_executor/models/arcee.py
vllm/model_executor/models/arcee.py
+2
-2
vllm/model_executor/models/arctic.py
vllm/model_executor/models/arctic.py
+3
-3
vllm/model_executor/models/aria.py
vllm/model_executor/models/aria.py
+2
-2
vllm/model_executor/models/audioflamingo3.py
vllm/model_executor/models/audioflamingo3.py
+2
-2
vllm/model_executor/models/aya_vision.py
vllm/model_executor/models/aya_vision.py
+2
-2
vllm/model_executor/models/bagel.py
vllm/model_executor/models/bagel.py
+2
-2
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+2
-2
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bailing_moe.py
+3
-3
vllm/model_executor/models/bamba.py
vllm/model_executor/models/bamba.py
+3
-3
vllm/model_executor/models/bert_with_rope.py
vllm/model_executor/models/bert_with_rope.py
+2
-2
vllm/model_executor/models/blip2.py
vllm/model_executor/models/blip2.py
+2
-2
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+2
-2
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+2
-2
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+2
-2
vllm/model_executor/models/cohere2_vision.py
vllm/model_executor/models/cohere2_vision.py
+2
-2
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+3
-3
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+3
-3
vllm/model_executor/models/deepseek.py
vllm/model_executor/models/deepseek.py
+0
-507
No files found.
vllm/model_executor/models/afmoe.py
View file @
df704163
...
...
@@ -425,7 +425,7 @@ class AfmoeModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -675,7 +675,7 @@ class AfmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -694,4 +694,4 @@ class AfmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
self
.
model
.
get_expert_mapping
()
return
self
.
model
.
get_expert_mapping
()
\ No newline at end of file
vllm/model_executor/models/apertus.py
View file @
df704163
...
...
@@ -542,7 +542,7 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -564,4 +564,4 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
self
,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/arcee.py
View file @
df704163
...
...
@@ -394,7 +394,7 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -425,4 +425,4 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
)
# AutoWeightLoader handles weight name remapping, including fusing
# separate q_proj, k_proj, v_proj into qkv_proj
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/arctic.py
View file @
df704163
...
...
@@ -406,7 +406,7 @@ class ArcticModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -460,7 +460,7 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -591,4 +591,4 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
)
weight_loader
(
param
,
loaded_weight
)
loaded_params
.
add
(
name
)
return
loaded_params
return
loaded_params
\ No newline at end of file
vllm/model_executor/models/aria.py
View file @
df704163
...
...
@@ -629,7 +629,7 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -656,4 +656,4 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
loader
=
AutoWeightsLoader
(
self
)
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/audioflamingo3.py
View file @
df704163
...
...
@@ -609,7 +609,7 @@ class AudioFlamingo3ForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -634,4 +634,4 @@ class AudioFlamingo3ForConditionalGeneration(
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/aya_vision.py
View file @
df704163
...
...
@@ -420,7 +420,7 @@ class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -441,4 +441,4 @@ class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
self
,
hidden_states
:
torch
.
Tensor
,
)
->
torch
.
Tensor
|
None
:
return
self
.
language_model
.
compute_logits
(
hidden_states
)
return
self
.
language_model
.
compute_logits
(
hidden_states
)
\ No newline at end of file
vllm/model_executor/models/bagel.py
View file @
df704163
...
...
@@ -507,7 +507,7 @@ class BagelForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -581,4 +581,4 @@ class BagelForConditionalGeneration(
# Skip vit_pos_embed.pos_embed as it's handled by PositionEmbedding module
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
[
"vit_pos_embed.pos_embed"
])
return
loader
.
load_weights
(
filtered_weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
return
loader
.
load_weights
(
filtered_weights
,
mapper
=
self
.
hf_to_vllm_mapper
)
\ No newline at end of file
vllm/model_executor/models/baichuan.py
View file @
df704163
...
...
@@ -334,7 +334,7 @@ class BaiChuanModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -534,7 +534,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bailing_moe.py
View file @
df704163
...
...
@@ -440,7 +440,7 @@ class BailingMoeModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -611,7 +611,7 @@ class BailingMoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -640,4 +640,4 @@ class BailingMoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
class
BailingMoeV2ForCausalLM
(
BailingMoeForCausalLM
):
pass
pass
\ No newline at end of file
vllm/model_executor/models/bamba.py
View file @
df704163
...
...
@@ -311,7 +311,7 @@ class BambaModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -493,7 +493,7 @@ class BambaForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -514,4 +514,4 @@ class BambaForCausalLM(
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/bert_with_rope.py
View file @
df704163
...
...
@@ -475,7 +475,7 @@ class BertWithRope(nn.Module, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -726,4 +726,4 @@ class GteNewForSequenceClassification(nn.Module, SupportsCrossEncoding):
positions
=
positions
,
inputs_embeds
=
inputs_embeds
,
intermediate_tensors
=
intermediate_tensors
,
)
)
\ No newline at end of file
vllm/model_executor/models/blip2.py
View file @
df704163
...
...
@@ -641,7 +641,7 @@ class Blip2ForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -727,4 +727,4 @@ class Blip2ForConditionalGeneration(
"the number of tokens per image."
)
num_images
=
num_vision_tokens
/
self
.
_vision_tokens_per_image
return
num_images
*
self
.
config
.
num_query_tokens
return
num_images
*
self
.
config
.
num_query_tokens
\ No newline at end of file
vllm/model_executor/models/bloom.py
View file @
df704163
...
...
@@ -294,7 +294,7 @@ class BloomModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -412,7 +412,7 @@ class BloomForCausalLM(nn.Module, SupportsPP, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/chameleon.py
View file @
df704163
...
...
@@ -994,7 +994,7 @@ class ChameleonForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1100,4 +1100,4 @@ class ChameleonForConditionalGeneration(
weight_loader
=
getattr
(
param
,
"weight_loader"
,
default_weight_loader
)
weight_loader
(
param
,
loaded_weight
)
loaded_params
.
add
(
name
)
return
loaded_params
return
loaded_params
\ No newline at end of file
vllm/model_executor/models/chatglm.py
View file @
df704163
...
...
@@ -381,7 +381,7 @@ class ChatGLMModel(nn.Module, SupportsQuant):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -554,7 +554,7 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP, SupportsQua
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/cohere2_vision.py
View file @
df704163
...
...
@@ -446,7 +446,7 @@ class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, Suppo
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -467,4 +467,4 @@ class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, Suppo
self
,
hidden_states
:
torch
.
Tensor
,
)
->
torch
.
Tensor
|
None
:
return
self
.
language_model
.
compute_logits
(
hidden_states
)
return
self
.
language_model
.
compute_logits
(
hidden_states
)
\ No newline at end of file
vllm/model_executor/models/commandr.py
View file @
df704163
...
...
@@ -312,7 +312,7 @@ class CohereModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -438,7 +438,7 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
@
torch
.
no_grad
()
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -466,4 +466,4 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
[
"lm_head"
,
"rotary_emb.inv_freq"
]
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/dbrx.py
View file @
df704163
...
...
@@ -361,7 +361,7 @@ class DbrxModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -462,7 +462,7 @@ class DbrxForCausalLM(nn.Module, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
|
None
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -481,4 +481,4 @@ class DbrxForCausalLM(nn.Module, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
vllm/model_executor/models/deepseek.py
deleted
100644 → 0
View file @
d7db129a
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
…
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment