Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
192ad464
Unverified
Commit
192ad464
authored
Feb 05, 2026
by
Isotr0py
Committed by
GitHub
Feb 04, 2026
Browse files
[Bugfix] Fix interns1-pro initialization and PP (#33793)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
0e922986
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
43 additions
and
22 deletions
+43
-22
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+4
-0
tests/models/multimodal/processing/test_tensor_schema.py
tests/models/multimodal/processing/test_tensor_schema.py
+3
-0
tests/models/registry.py
tests/models/registry.py
+0
-2
vllm/model_executor/models/interns1_pro.py
vllm/model_executor/models/interns1_pro.py
+26
-12
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+5
-4
vllm/model_executor/models/qwen3_vl_moe.py
vllm/model_executor/models/qwen3_vl_moe.py
+5
-4
No files found.
tests/models/multimodal/processing/test_common.py
View file @
192ad464
...
...
@@ -124,6 +124,7 @@ MM_DATA_PATCHES = {
"glm4v_moe"
:
glm4_1v_patch_mm_data
,
"glm_ocr"
:
glm4_1v_patch_mm_data
,
"glmasr"
:
glmasr_patch_mm_data
,
"interns1_pro"
:
qwen3_vl_patch_mm_data
,
"molmo2"
:
qwen3_vl_patch_mm_data
,
"qwen3_vl"
:
qwen3_vl_patch_mm_data
,
"qwen3_vl_moe"
:
qwen3_vl_patch_mm_data
,
...
...
@@ -439,6 +440,9 @@ def test_processing_correctness(
"Qwen-VL tokenizer requires downloading a font file from "
"servers that often refuse connections in CI"
)
if
model_id
==
"internlm/Intern-S1-Pro"
:
# FIXME(Isotr0py): Fix later.
pytest
.
skip
(
"Tokenization issue. Fix later"
)
_test_processing_correctness
(
model_id
,
...
...
tests/models/multimodal/processing/test_tensor_schema.py
View file @
192ad464
...
...
@@ -160,6 +160,9 @@ def test_model_tensor_schema(model_id: str):
pytest
.
skip
(
"Kimi-K2.5's offline inference has issues about vision chunks. Fix later."
)
if
model_id
==
"internlm/Intern-S1-Pro"
:
# FIXME(Isotr0py): Fix later.
pytest
.
skip
(
"Intern-S1-Pro has issue to pass the test."
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model_id
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
...
...
tests/models/registry.py
View file @
192ad464
...
...
@@ -758,8 +758,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"InternS1ProForConditionalGeneration"
:
_HfExamplesInfo
(
"internlm/Intern-S1-Pro"
,
trust_remote_code
=
True
,
min_transformers_version
=
"5.0.0"
,
is_available_online
=
False
,
),
"InternVLChatModel"
:
_HfExamplesInfo
(
"OpenGVLab/InternVL2-1B"
,
...
...
vllm/model_executor/models/interns1_pro.py
View file @
192ad464
...
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
transformers
import
AutoProcessor
,
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
get_ep_group
,
...
...
@@ -41,8 +40,8 @@ from vllm.distributed import (
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe.config
import
RoutingMethodType
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
@@ -188,7 +187,6 @@ class InternS1ProMoeSparseMoeBlock(nn.Module):
enable_eplb
=
self
.
enable_eplb
,
num_redundant_experts
=
self
.
n_redundant_experts
,
is_sequence_parallel
=
self
.
is_sequence_parallel
,
routing_method_type
=
RoutingMethodType
.
Renormalize
,
custom_routing_function
=
self
.
_custom_routing_function
,
)
...
...
@@ -479,7 +477,7 @@ class InternS1ProMoeLLMModel(Qwen3MoeLLMModel):
class
InternS1ProMoeLLMForCausalLM
(
Qwen3MoeForCausalLM
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
(
Qwen3MoeForCausalLM
,
self
).
__init__
()
self
.
config
=
vllm_config
.
model_config
.
hf_config
.
text_config
self
.
quant_config
=
vllm_config
.
quant_config
self
.
model
=
InternS1ProMoeLLMModel
(
...
...
@@ -567,15 +565,10 @@ class InternS1ProForConditionalGeneration(
"lm_head."
:
"language_model.lm_head."
,
"model.language_model."
:
"language_model.model."
,
},
orig_to_new_suffix
=
{
# Handle FOPE rotary embeddings
".rotary_emb.sin_coef"
:
".layers.0.self_attn.rotary_emb.sin_coef"
,
".rotary_emb.cos_coef"
:
".layers.0.self_attn.rotary_emb.cos_coef"
,
},
)
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
(
Qwen3VLForConditionalGeneration
,
self
).
__init__
()
config
:
PretrainedConfig
=
vllm_config
.
model_config
.
hf_config
multimodal_config
=
vllm_config
.
model_config
.
multimodal_config
...
...
@@ -595,7 +588,6 @@ class InternS1ProForConditionalGeneration(
self
.
visual
=
Qwen3_VisionTransformer
(
config
.
vision_config
,
norm_eps
=
getattr
(
config
,
"rms_norm_eps"
,
1e-6
),
multimodal_config
=
multimodal_config
,
prefix
=
maybe_prefix
(
prefix
,
"visual"
),
)
...
...
@@ -624,10 +616,32 @@ class InternS1ProForConditionalGeneration(
# Set MoE hyperparameters
self
.
set_moe_parameters
()
def
get_frope_params_map
(
self
)
->
str
:
mapper
=
{}
for
name
,
params
in
self
.
language_model
.
model
.
named_parameters
():
if
"rotary_emb.sin_coef"
in
name
:
mapper
[
"language_model.model.rotary_emb.sin_coef"
]
=
(
f
"language_model.model.
{
name
}
"
)
if
"rotary_emb.cos_coef"
in
name
:
mapper
[
"language_model.model.rotary_emb.cos_coef"
]
=
(
f
"language_model.model.
{
name
}
"
)
return
mapper
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
"""load weights"""
skip_prefixes
=
[
"model.time_series."
]
if
self
.
visual
is
None
:
skip_prefixes
.
append
(
"visual."
)
# FIXME(Isotr0py): See if we can avoid tighing FoPE to PP layers
weights_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
"model.visual."
:
"visual."
,
"lm_head."
:
"language_model.lm_head."
,
"model.language_model."
:
"language_model.model."
,
},
orig_to_new_suffix
=
self
.
get_frope_params_map
(),
)
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
skip_prefixes
)
return
loader
.
load_weights
(
weights
,
mapper
=
self
.
hf_to_vllm
_mapper
)
return
loader
.
load_weights
(
weights
,
mapper
=
weights
_mapper
)
vllm/model_executor/models/qwen3_vl.py
View file @
192ad464
...
...
@@ -1114,10 +1114,11 @@ class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo])
class
Qwen3LLMModel
(
Qwen3Model
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
if
not
get_pp_group
().
is_first_rank
:
assert
self
.
start_layer
>=
len
(
vllm_config
.
model_config
.
hf_config
.
vision_config
.
deepstack_visual_indexes
),
(
vision_config
=
vllm_config
.
model_config
.
hf_config
.
vision_config
if
not
get_pp_group
().
is_first_rank
and
hasattr
(
vision_config
,
"deepstack_visual_indexes"
):
assert
self
.
start_layer
>=
len
(
vision_config
.
deepstack_visual_indexes
),
(
"start_layer should be greater than or equal to "
"len(deepstack_visual_indexes)"
)
...
...
vllm/model_executor/models/qwen3_vl_moe.py
View file @
192ad464
...
...
@@ -95,10 +95,11 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
prefix
=
prefix
,
decoder_layer_type
=
decoder_layer_type
,
)
if
not
get_pp_group
().
is_first_rank
:
assert
self
.
start_layer
>=
len
(
vllm_config
.
model_config
.
hf_config
.
vision_config
.
deepstack_visual_indexes
),
(
vision_config
=
vllm_config
.
model_config
.
hf_config
.
vision_config
if
not
get_pp_group
().
is_first_rank
and
hasattr
(
vision_config
,
"deepstack_visual_indexes"
):
assert
self
.
start_layer
>=
len
(
vision_config
.
deepstack_visual_indexes
),
(
"start_layer should be greater than or equal to "
"len(deepstack_visual_indexes)"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment