Unverified Commit db4ede97 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Model] Enable Step3p5ForCausalLM testing (#33755)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 2cb2340f
...@@ -471,7 +471,7 @@ th { ...@@ -471,7 +471,7 @@ th {
| `StableLMEpochForCausalLM` | StableLM Epoch | `stabilityai/stablelm-zephyr-3b`, etc. | | ✅︎ | | `StableLMEpochForCausalLM` | StableLM Epoch | `stabilityai/stablelm-zephyr-3b`, etc. | | ✅︎ |
| `Starcoder2ForCausalLM` | Starcoder2 | `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc. | | ✅︎ | | `Starcoder2ForCausalLM` | Starcoder2 | `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc. | | ✅︎ |
| `Step1ForCausalLM` | Step-Audio | `stepfun-ai/Step-Audio-EditX`, etc. | ✅︎ | ✅︎ | | `Step1ForCausalLM` | Step-Audio | `stepfun-ai/Step-Audio-EditX`, etc. | ✅︎ | ✅︎ |
| `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/step-3.5-flash`, etc. | | ✅︎ | | `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/Step-3.5-Flash`, etc. | | ✅︎ |
| `TeleChatForCausalLM` | TeleChat | `chuhac/TeleChat2-35B`, etc. | ✅︎ | ✅︎ | | `TeleChatForCausalLM` | TeleChat | `chuhac/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
| `TeleChat2ForCausalLM` | TeleChat2 | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc. | ✅︎ | ✅︎ | | `TeleChat2ForCausalLM` | TeleChat2 | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
| `TeleFLMForCausalLM` | TeleFLM | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc. | ✅︎ | ✅︎ | | `TeleFLMForCausalLM` | TeleFLM | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc. | ✅︎ | ✅︎ |
......
...@@ -481,16 +481,21 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -481,16 +481,21 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"ByteDance-Seed/Seed-OSS-36B-Instruct", "ByteDance-Seed/Seed-OSS-36B-Instruct",
trust_remote_code=True, trust_remote_code=True,
), ),
"SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
"Step1ForCausalLM": _HfExamplesInfo( "Step1ForCausalLM": _HfExamplesInfo(
"stepfun-ai/Step-Audio-EditX", trust_remote_code=True "stepfun-ai/Step-Audio-EditX", trust_remote_code=True
), ),
"Step3p5ForCausalLM": _HfExamplesInfo( "Step3p5ForCausalLM": _HfExamplesInfo(
"stepfun-ai/step-3.5-flash", is_available_online=False "stepfun-ai/Step-3.5-Flash",
use_original_num_layers=True,
# Initialize at least one MoE layer
hf_overrides={
"num_hidden_layers": 4,
},
), ),
"SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
"Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", trust_remote_code=True), "Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", trust_remote_code=True),
"SolarForCausalLM": _HfExamplesInfo( "SolarForCausalLM": _HfExamplesInfo(
"upstage/solar-pro-preview-instruct", trust_remote_code=True "upstage/solar-pro-preview-instruct", trust_remote_code=True
...@@ -1129,8 +1134,12 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = { ...@@ -1129,8 +1134,12 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
), ),
"Step3p5MTP": _HfExamplesInfo( "Step3p5MTP": _HfExamplesInfo(
"stepfun-ai/Step-3.5-Flash", "stepfun-ai/Step-3.5-Flash",
trust_remote_code=True,
speculative_model="stepfun-ai/Step-3.5-Flash", speculative_model="stepfun-ai/Step-3.5-Flash",
use_original_num_layers=True,
# Initialize at least one MoE layer
hf_overrides={
"num_hidden_layers": 4,
},
is_available_online=False, is_available_online=False,
), ),
} }
......
...@@ -36,7 +36,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor ...@@ -36,7 +36,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE,
ParallelLMHead, ParallelLMHead,
VocabParallelEmbedding, VocabParallelEmbedding,
) )
...@@ -770,37 +769,17 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts): ...@@ -770,37 +769,17 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
): ):
super().__init__() super().__init__()
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
lora_config = vllm_config.lora_config
self.config = config
self.vllm_config = vllm_config
self.model = Step3p5Model( self.model = Step3p5Model(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model") vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
) )
self.moe_layers: list[FusedMoEBlock] = []
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, Step3p5DecoderLayer)
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
self.moe_layers.append(layer.moe)
if get_pp_group().is_last_rank: if get_pp_group().is_last_rank:
self.unpadded_vocab_size = config.vocab_size
if lora_config:
self.unpadded_vocab_size += lora_config.lora_extra_vocab_size
self.lm_head = ParallelLMHead( self.lm_head = ParallelLMHead(
self.unpadded_vocab_size, config.vocab_size,
config.hidden_size, config.hidden_size,
org_num_embeddings=config.vocab_size, quant_config=vllm_config.quant_config,
padding_size=DEFAULT_VOCAB_PADDING_SIZE prefix=maybe_prefix(prefix, "lm_head"),
if not lora_config
else lora_config.lora_vocab_padding_size,
)
self.logits_processor = LogitsProcessor(
self.unpadded_vocab_size, config.vocab_size
) )
self.logits_processor = LogitsProcessor(config.vocab_size)
else: else:
self.lm_head = PPMissingLayer() self.lm_head = PPMissingLayer()
...@@ -809,6 +788,14 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts): ...@@ -809,6 +788,14 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
) )
# Set MoE hyperparameters # Set MoE hyperparameters
self.moe_layers: list[FusedMoEBlock] = []
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, Step3p5DecoderLayer)
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
self.moe_layers.append(layer.moe)
self.expert_weights = [] self.expert_weights = []
assert len(self.moe_layers) > 0, "No MoE layers found in the model." assert len(self.moe_layers) > 0, "No MoE layers found in the model."
example_layer = self.moe_layers[0] example_layer = self.moe_layers[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment