[BugFix] Fix Llama4 Pipeline Parallelism Assert Error (#28577)

Co-authored-by: Dezhan Tu <dztu@meta.com>

[BugFix] Fix Llama4 Pipeline Parallelism Assert Error (#28577)
Co-authored-by: Dezhan Tu <dztu@meta.com>
dc45efc8 · Dezhan · GitHub · fb8851f2 · dc45efc8
Unverified Commit dc45efc8 authored Nov 20, 2025 by Dezhan Committed by GitHub Nov 20, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

vllm/model_executor/models/llama4.py vllm/model_executor/models/llama4.py +7 -0

No files found.
--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -53,6 +53,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk
 from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel
 from .utils import (
    AutoWeightsLoader,
+    PPMissingLayer,
    extract_layer_index,
    fast_topk,
    is_pp_missing_parameter,
@@ -729,6 +730,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
        self.moe_layers = []
        example_moe = None
        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
            assert isinstance(layer, Llama4DecoderLayer)
            if isinstance(layer.feed_forward, Llama4MoE):
                # Pick last one layer since the first ones may be dense layers.
@@ -765,6 +769,9 @@ class Llama4ForCausalLM(LlamaForCausalLM, MixtureOfExperts):
        self.num_local_physical_experts = num_local_physical_experts
        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
            if isinstance(layer.feed_forward, Llama4MoE):
                moe = layer.feed_forward
                moe.n_local_physical_experts = num_local_physical_experts