Fix models which use `layer_type_validation` for Transformers v5 (#37398)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Fix models which use `layer_type_validation` for Transformers v5 (#37398)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
5ce2d10e · Harry Mellor · GitHub · 738d0a28 · 5ce2d10e · 5ce2d10e
Unverified Commit 5ce2d10e authored Mar 18, 2026 by Harry Mellor Committed by GitHub Mar 18, 2026
4 changed files
--- a/vllm/transformers_utils/configs/olmo_hybrid.py
+++ b/vllm/transformers_utils/configs/olmo_hybrid.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project


-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig


 class OlmoHybridConfig(PretrainedConfig):
@@ -228,6 +228,14 @@ class OlmoHybridConfig(PretrainedConfig):
            if "full_attention" not in layer_types:
                layer_types[-1] = "full_attention"

+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            self.layer_types = layer_types
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+
            layer_type_validation(layer_types, num_hidden_layers)
        if "linear_attention" not in layer_types:
            raise ValueError(

--- a/vllm/transformers_utils/configs/qwen3_5.py
+++ b/vllm/transformers_utils/configs/qwen3_5.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3.5 model configuration"""

-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig


 class Qwen3_5TextConfig(PretrainedConfig):
@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
        eos_token_id=None,
        **kwargs,
    ):
-        kwargs["ignore_keys_at_rope_validation"] = [
-            "mrope_section",
-            "mrope_interleaved",
-        ]
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
@@ -98,6 +94,17 @@ class Qwen3_5TextConfig(PretrainedConfig):
                else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            kwargs["ignore_keys_at_rope_validation"] = {
+                "mrope_section",
+                "mrope_interleaved",
+            }
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+
            layer_type_validation(self.layer_types, self.num_hidden_layers)

        # linear attention part

--- a/vllm/transformers_utils/configs/qwen3_5_moe.py
+++ b/vllm/transformers_utils/configs/qwen3_5_moe.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3.5-MoE model configuration"""

-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig


 class Qwen3_5MoeTextConfig(PretrainedConfig):
@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
        eos_token_id=None,
        **kwargs,
    ):
-        kwargs["ignore_keys_at_rope_validation"] = [
-            "mrope_section",
-            "mrope_interleaved",
-        ]
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
@@ -104,6 +100,17 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
                else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            kwargs["ignore_keys_at_rope_validation"] = {
+                "mrope_section",
+                "mrope_interleaved",
+            }
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+
            layer_type_validation(self.layer_types, self.num_hidden_layers)

        # linear attention part

--- a/vllm/transformers_utils/configs/qwen3_next.py
+++ b/vllm/transformers_utils/configs/qwen3_next.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3-Next model configuration"""

-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging

 logger = logging.get_logger(__name__)
@@ -253,6 +253,13 @@ class Qwen3NextConfig(PretrainedConfig):
                "linear_attention" if bool((i + 1) % 4) else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+
            layer_type_validation(self.layer_types)

        # linear attention part