Unverified Commit db4ede97 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Model] Enable Step3p5ForCausalLM testing (#33755)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 2cb2340f
......@@ -471,7 +471,7 @@ th {
| `StableLMEpochForCausalLM` | StableLM Epoch | `stabilityai/stablelm-zephyr-3b`, etc. | | ✅︎ |
| `Starcoder2ForCausalLM` | Starcoder2 | `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc. | | ✅︎ |
| `Step1ForCausalLM` | Step-Audio | `stepfun-ai/Step-Audio-EditX`, etc. | ✅︎ | ✅︎ |
| `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/step-3.5-flash`, etc. | | ✅︎ |
| `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/Step-3.5-Flash`, etc. | | ✅︎ |
| `TeleChatForCausalLM` | TeleChat | `chuhac/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
| `TeleChat2ForCausalLM` | TeleChat2 | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
| `TeleFLMForCausalLM` | TeleFLM | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc. | ✅︎ | ✅︎ |
......
......@@ -481,16 +481,21 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"ByteDance-Seed/Seed-OSS-36B-Instruct",
trust_remote_code=True,
),
"SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
"Step1ForCausalLM": _HfExamplesInfo(
"stepfun-ai/Step-Audio-EditX", trust_remote_code=True
),
"Step3p5ForCausalLM": _HfExamplesInfo(
"stepfun-ai/step-3.5-flash", is_available_online=False
"stepfun-ai/Step-3.5-Flash",
use_original_num_layers=True,
# Initialize at least one MoE layer
hf_overrides={
"num_hidden_layers": 4,
},
),
"SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
"Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", trust_remote_code=True),
"SolarForCausalLM": _HfExamplesInfo(
"upstage/solar-pro-preview-instruct", trust_remote_code=True
......@@ -1129,8 +1134,12 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
),
"Step3p5MTP": _HfExamplesInfo(
"stepfun-ai/Step-3.5-Flash",
trust_remote_code=True,
speculative_model="stepfun-ai/Step-3.5-Flash",
use_original_num_layers=True,
# Initialize at least one MoE layer
hf_overrides={
"num_hidden_layers": 4,
},
is_available_online=False,
),
}
......
......@@ -36,7 +36,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE,
ParallelLMHead,
VocabParallelEmbedding,
)
......@@ -770,37 +769,17 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
):
super().__init__()
config = vllm_config.model_config.hf_config
lora_config = vllm_config.lora_config
self.config = config
self.vllm_config = vllm_config
self.model = Step3p5Model(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
)
self.moe_layers: list[FusedMoEBlock] = []
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, Step3p5DecoderLayer)
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
self.moe_layers.append(layer.moe)
if get_pp_group().is_last_rank:
self.unpadded_vocab_size = config.vocab_size
if lora_config:
self.unpadded_vocab_size += lora_config.lora_extra_vocab_size
self.lm_head = ParallelLMHead(
self.unpadded_vocab_size,
config.vocab_size,
config.hidden_size,
org_num_embeddings=config.vocab_size,
padding_size=DEFAULT_VOCAB_PADDING_SIZE
if not lora_config
else lora_config.lora_vocab_padding_size,
)
self.logits_processor = LogitsProcessor(
self.unpadded_vocab_size, config.vocab_size
quant_config=vllm_config.quant_config,
prefix=maybe_prefix(prefix, "lm_head"),
)
self.logits_processor = LogitsProcessor(config.vocab_size)
else:
self.lm_head = PPMissingLayer()
......@@ -809,6 +788,14 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
)
# Set MoE hyperparameters
self.moe_layers: list[FusedMoEBlock] = []
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, Step3p5DecoderLayer)
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
self.moe_layers.append(layer.moe)
self.expert_weights = []
assert len(self.moe_layers) > 0, "No MoE layers found in the model."
example_layer = self.moe_layers[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment