Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d76fc11e
Commit
d76fc11e
authored
Jan 28, 2026
by
zhuwenwen
Browse files
Merge tag 'v0.15.0rc1' into v0.15.0rc1-dev
parents
38166ec4
58996f35
Changes
313
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
64 additions
and
27 deletions
+64
-27
vllm/model_executor/models/deepseek_ocr.py
vllm/model_executor/models/deepseek_ocr.py
+1
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+2
-2
vllm/model_executor/models/deepseek_vl2.py
vllm/model_executor/models/deepseek_vl2.py
+1
-1
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+2
-2
vllm/model_executor/models/dots_ocr.py
vllm/model_executor/models/dots_ocr.py
+1
-1
vllm/model_executor/models/eagle2_5_vl.py
vllm/model_executor/models/eagle2_5_vl.py
+1
-2
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+2
-2
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+1
-1
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie45_vl_moe.py
+2
-2
vllm/model_executor/models/ernie_mtp.py
vllm/model_executor/models/ernie_mtp.py
+1
-1
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone.py
+1
-1
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/exaone4.py
+1
-1
vllm/model_executor/models/exaone_moe.py
vllm/model_executor/models/exaone_moe.py
+1
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+1
-1
vllm/model_executor/models/falcon_h1.py
vllm/model_executor/models/falcon_h1.py
+2
-2
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/fuyu.py
+1
-1
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+2
-2
vllm/model_executor/models/gemma2.py
vllm/model_executor/models/gemma2.py
+1
-1
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3.py
+1
-1
vllm/model_executor/models/gemma3_mm.py
vllm/model_executor/models/gemma3_mm.py
+39
-1
No files found.
vllm/model_executor/models/deepseek_ocr.py
View file @
d76fc11e
...
...
@@ -562,7 +562,7 @@ class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, Supports
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
d76fc11e
...
...
@@ -1101,7 +1101,7 @@ class DeepseekV2Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -1284,7 +1284,7 @@ class DeepseekV2ForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/deepseek_vl2.py
View file @
d76fc11e
...
...
@@ -614,7 +614,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/dots1.py
View file @
d76fc11e
...
...
@@ -394,7 +394,7 @@ class Dots1Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -538,7 +538,7 @@ class Dots1ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/dots_ocr.py
View file @
d76fc11e
...
...
@@ -754,7 +754,7 @@ class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/eagle2_5_vl.py
View file @
d76fc11e
...
...
@@ -432,7 +432,7 @@ class Eagle2_5_VLForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -440,7 +440,6 @@ class Eagle2_5_VLForConditionalGeneration(
)
->
IntermediateTensors
:
"""Forward pass through the model."""
if
intermediate_tensors
is
not
None
:
input_ids
=
None
inputs_embeds
=
None
forward_kwargs
=
{
...
...
vllm/model_executor/models/ernie45_moe.py
View file @
d76fc11e
...
...
@@ -466,7 +466,7 @@ class Ernie4_5_MoeModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -727,7 +727,7 @@ class Ernie4_5_MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, MixtureOfExpe
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
d76fc11e
...
...
@@ -1650,7 +1650,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/ernie45_vl_moe.py
View file @
d76fc11e
...
...
@@ -565,7 +565,7 @@ class Ernie4_5_VLMoeModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -646,7 +646,7 @@ class Ernie4_5_VLMoeForCausalLM(nn.Module, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/ernie_mtp.py
View file @
d76fc11e
...
...
@@ -164,7 +164,7 @@ class ErnieMTP(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
...
...
vllm/model_executor/models/exaone.py
View file @
d76fc11e
...
...
@@ -496,7 +496,7 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/exaone4.py
View file @
d76fc11e
...
...
@@ -490,7 +490,7 @@ class Exaone4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/exaone_moe.py
View file @
d76fc11e
...
...
@@ -549,7 +549,7 @@ class ExaoneMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/falcon.py
View file @
d76fc11e
...
...
@@ -423,7 +423,7 @@ class FalconModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/falcon_h1.py
View file @
d76fc11e
...
...
@@ -459,7 +459,7 @@ class FalconH1Model(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -602,7 +602,7 @@ class FalconH1ForCausalLM(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/fuyu.py
View file @
d76fc11e
...
...
@@ -340,7 +340,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/gemma.py
View file @
d76fc11e
...
...
@@ -297,7 +297,7 @@ class GemmaModel(nn.Module):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -400,7 +400,7 @@ class GemmaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/gemma2.py
View file @
d76fc11e
...
...
@@ -410,7 +410,7 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/gemma3.py
View file @
d76fc11e
...
...
@@ -494,7 +494,7 @@ class Gemma3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/gemma3_mm.py
View file @
d76fc11e
...
...
@@ -618,7 +618,7 @@ class Gemma3ForConditionalGeneration(
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
@@ -656,3 +656,41 @@ class Gemma3ForConditionalGeneration(
connector
=
"multi_modal_projector"
,
tower_model
=
"vision_tower"
,
)
def
get_num_mm_encoder_tokens
(
self
,
num_image_tokens
:
int
)
->
int
:
"""
Calculate the number of tokens output by the vision encoder.
The vision encoder processes images into patch embeddings. For Gemma3,
the relationship between prompt placeholder tokens and actual vision
encoder output tokens depends on the patch grid size.
Args:
num_image_tokens: Number of image placeholder tokens in the prompt
(typically mm_tokens_per_image per image)
Returns:
Number of tokens output by the vision encoder
"""
# For Gemma3, the vision encoder outputs tokens_per_side x tokens_per_side
# tokens per image. Since num_image_tokens represents the number of
# connector output tokens (mm_tokens_per_image = 256), and tokens_per_side
# is sqrt(256) = 16, we need to account for the token expansion.
# Based on empirical testing, the multiplier of 16 works correctly.
return
num_image_tokens
*
16
def
get_num_mm_connector_tokens
(
self
,
num_vision_tokens
:
int
)
->
int
:
"""
Calculate the number of tokens output by the multimodal connector.
The connector applies projection and normalization but maintains the
token count for Gemma3.
Args:
num_vision_tokens: Number of tokens from vision encoder
Returns:
Number of tokens after connector processing
"""
# The Gemma3 connector maintains a 1:1 token mapping
return
num_vision_tokens
Prev
1
…
4
5
6
7
8
9
10
11
12
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment