Unverified Commit dc45efc8 authored by Dezhan's avatar Dezhan Committed by GitHub
Browse files

[BugFix] Fix Llama4 Pipeline Parallelism Assert Error (#28577)


Co-authored-by: default avatarDezhan Tu <dztu@meta.com>
parent fb8851f2
......@@ -53,6 +53,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk
from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel
from .utils import (
AutoWeightsLoader,
PPMissingLayer,
extract_layer_index,
fast_topk,
is_pp_missing_parameter,
......@@ -729,6 +730,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
self.moe_layers = []
example_moe = None
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
assert isinstance(layer, Llama4DecoderLayer)
if isinstance(layer.feed_forward, Llama4MoE):
# Pick last one layer since the first ones may be dense layers.
......@@ -765,6 +769,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
self.num_local_physical_experts = num_local_physical_experts
self.num_redundant_experts = num_physical_experts - self.num_logical_experts
for layer in self.model.layers:
if isinstance(layer, PPMissingLayer):
continue
if isinstance(layer.feed_forward, Llama4MoE):
moe = layer.feed_forward
moe.n_local_physical_experts = num_local_physical_experts
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment