Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
db4ede97
Unverified
Commit
db4ede97
authored
Feb 07, 2026
by
Jee Jee Li
Committed by
GitHub
Feb 07, 2026
Browse files
[Model] Enable Step3p5ForCausalLM testing (#33755)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
2cb2340f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
32 deletions
+28
-32
docs/models/supported_models.md
docs/models/supported_models.md
+1
-1
tests/models/registry.py
tests/models/registry.py
+15
-6
vllm/model_executor/models/step3p5.py
vllm/model_executor/models/step3p5.py
+12
-25
No files found.
docs/models/supported_models.md
View file @
db4ede97
...
...
@@ -471,7 +471,7 @@ th {
|
`StableLMEpochForCausalLM`
| StableLM Epoch |
`stabilityai/stablelm-zephyr-3b`
, etc. | | ✅︎ |
|
`Starcoder2ForCausalLM`
| Starcoder2 |
`bigcode/starcoder2-3b`
,
`bigcode/starcoder2-7b`
,
`bigcode/starcoder2-15b`
, etc. | | ✅︎ |
|
`Step1ForCausalLM`
| Step-Audio |
`stepfun-ai/Step-Audio-EditX`
, etc. | ✅︎ | ✅︎ |
|
`Step3p5ForCausalLM`
| Step-3.5-flash |
`stepfun-ai/
s
tep-3.5-
f
lash`
, etc. | | ✅︎ |
|
`Step3p5ForCausalLM`
| Step-3.5-flash |
`stepfun-ai/
S
tep-3.5-
F
lash`
, etc. | | ✅︎ |
|
`TeleChatForCausalLM`
| TeleChat |
`chuhac/TeleChat2-35B`
, etc. | ✅︎ | ✅︎ |
|
`TeleChat2ForCausalLM`
| TeleChat2 |
`Tele-AI/TeleChat2-3B`
,
`Tele-AI/TeleChat2-7B`
,
`Tele-AI/TeleChat2-35B`
, etc. | ✅︎ | ✅︎ |
|
`TeleFLMForCausalLM`
| TeleFLM |
`CofeAI/FLM-2-52B-Instruct-2407`
,
`CofeAI/Tele-FLM`
, etc. | ✅︎ | ✅︎ |
...
...
tests/models/registry.py
View file @
db4ede97
...
...
@@ -481,16 +481,21 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"ByteDance-Seed/Seed-OSS-36B-Instruct"
,
trust_remote_code
=
True
,
),
"SmolLM3ForCausalLM"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolLM3-3B"
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"Step1ForCausalLM"
:
_HfExamplesInfo
(
"stepfun-ai/Step-Audio-EditX"
,
trust_remote_code
=
True
),
"Step3p5ForCausalLM"
:
_HfExamplesInfo
(
"stepfun-ai/step-3.5-flash"
,
is_available_online
=
False
"stepfun-ai/Step-3.5-Flash"
,
use_original_num_layers
=
True
,
# Initialize at least one MoE layer
hf_overrides
=
{
"num_hidden_layers"
:
4
,
},
),
"SmolLM3ForCausalLM"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolLM3-3B"
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"Step3TextForCausalLM"
:
_HfExamplesInfo
(
"stepfun-ai/step3"
,
trust_remote_code
=
True
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
,
trust_remote_code
=
True
...
...
@@ -1129,8 +1134,12 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
),
"Step3p5MTP"
:
_HfExamplesInfo
(
"stepfun-ai/Step-3.5-Flash"
,
trust_remote_code
=
True
,
speculative_model
=
"stepfun-ai/Step-3.5-Flash"
,
use_original_num_layers
=
True
,
# Initialize at least one MoE layer
hf_overrides
=
{
"num_hidden_layers"
:
4
,
},
is_available_online
=
False
,
),
}
...
...
vllm/model_executor/models/step3p5.py
View file @
db4ede97
...
...
@@ -36,7 +36,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from
vllm.model_executor.layers.quantization.base_config
import
QuantizationConfig
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
,
)
...
...
@@ -770,37 +769,17 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
):
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
lora_config
=
vllm_config
.
lora_config
self
.
config
=
config
self
.
vllm_config
=
vllm_config
self
.
model
=
Step3p5Model
(
vllm_config
=
vllm_config
,
prefix
=
maybe_prefix
(
prefix
,
"model"
)
)
self
.
moe_layers
:
list
[
FusedMoEBlock
]
=
[]
for
layer
in
self
.
model
.
layers
:
if
isinstance
(
layer
,
PPMissingLayer
):
continue
assert
isinstance
(
layer
,
Step3p5DecoderLayer
)
if
hasattr
(
layer
,
"moe"
)
and
isinstance
(
layer
.
moe
,
FusedMoEBlock
):
self
.
moe_layers
.
append
(
layer
.
moe
)
if
get_pp_group
().
is_last_rank
:
self
.
unpadded_vocab_size
=
config
.
vocab_size
if
lora_config
:
self
.
unpadded_vocab_size
+=
lora_config
.
lora_extra_vocab_size
self
.
lm_head
=
ParallelLMHead
(
self
.
unpadded_
vocab_size
,
config
.
vocab_size
,
config
.
hidden_size
,
org_num_embeddings
=
config
.
vocab_size
,
padding_size
=
DEFAULT_VOCAB_PADDING_SIZE
if
not
lora_config
else
lora_config
.
lora_vocab_padding_size
,
)
self
.
logits_processor
=
LogitsProcessor
(
self
.
unpadded_vocab_size
,
config
.
vocab_size
quant_config
=
vllm_config
.
quant_config
,
prefix
=
maybe_prefix
(
prefix
,
"lm_head"
),
)
self
.
logits_processor
=
LogitsProcessor
(
config
.
vocab_size
)
else
:
self
.
lm_head
=
PPMissingLayer
()
...
...
@@ -809,6 +788,14 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
)
# Set MoE hyperparameters
self
.
moe_layers
:
list
[
FusedMoEBlock
]
=
[]
for
layer
in
self
.
model
.
layers
:
if
isinstance
(
layer
,
PPMissingLayer
):
continue
assert
isinstance
(
layer
,
Step3p5DecoderLayer
)
if
hasattr
(
layer
,
"moe"
)
and
isinstance
(
layer
.
moe
,
FusedMoEBlock
):
self
.
moe_layers
.
append
(
layer
.
moe
)
self
.
expert_weights
=
[]
assert
len
(
self
.
moe_layers
)
>
0
,
"No MoE layers found in the model."
example_layer
=
self
.
moe_layers
[
0
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment