Fix models which use `layer_type_validation` for Transformers v5 (#37398)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Fix models which use `layer_type_validation` for Transformers v5 (#37398)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
5ce2d10e · Harry Mellor · GitHub · 738d0a28 · 5ce2d10e · 5ce2d10e
Unverified Commit 5ce2d10e authored Mar 18, 2026 by Harry Mellor Committed by GitHub Mar 18, 2026
4 changed files
--- a/vllm/transformers_utils/configs/olmo_hybrid.py
+++ b/vllm/transformers_utils/configs/olmo_hybrid.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig
 class OlmoHybridConfig(PretrainedConfig):
@@ -228,7 +228,15 @@ class OlmoHybridConfig(PretrainedConfig):
            if "full_attention" not in layer_types:
                layer_types[-1] = "full_attention"
-        layer_type_validation(layer_types, num_hidden_layers)
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            self.layer_types = layer_types
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+            layer_type_validation(layer_types, num_hidden_layers)
        if "linear_attention" not in layer_types:
            raise ValueError(
                "OLMoHybrid expects at least one 'linear_attention' layer."

--- a/vllm/transformers_utils/configs/qwen3_5.py
+++ b/vllm/transformers_utils/configs/qwen3_5.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3.5 model configuration"""
-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig
 class Qwen3_5TextConfig(PretrainedConfig):
@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
        eos_token_id=None,
        **kwargs,
    ):
-        kwargs["ignore_keys_at_rope_validation"] = [
-            "mrope_section",
-            "mrope_interleaved",
-        ]
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
@@ -98,7 +94,18 @@ class Qwen3_5TextConfig(PretrainedConfig):
                else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
-        layer_type_validation(self.layer_types, self.num_hidden_layers)
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            kwargs["ignore_keys_at_rope_validation"] = {
+                "mrope_section",
+                "mrope_interleaved",
+            }
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+            layer_type_validation(self.layer_types, self.num_hidden_layers)
        # linear attention part
        self.linear_conv_kernel_dim = linear_conv_kernel_dim

--- a/vllm/transformers_utils/configs/qwen3_5_moe.py
+++ b/vllm/transformers_utils/configs/qwen3_5_moe.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3.5-MoE model configuration"""
-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig
 class Qwen3_5MoeTextConfig(PretrainedConfig):
@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
        eos_token_id=None,
        **kwargs,
    ):
-        kwargs["ignore_keys_at_rope_validation"] = [
-            "mrope_section",
-            "mrope_interleaved",
-        ]
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.hidden_size = hidden_size
@@ -104,7 +100,18 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
                else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
-        layer_type_validation(self.layer_types, self.num_hidden_layers)
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            kwargs["ignore_keys_at_rope_validation"] = {
+                "mrope_section",
+                "mrope_interleaved",
+            }
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+            layer_type_validation(self.layer_types, self.num_hidden_layers)
        # linear attention part
        self.linear_conv_kernel_dim = linear_conv_kernel_dim

--- a/vllm/transformers_utils/configs/qwen3_next.py
+++ b/vllm/transformers_utils/configs/qwen3_next.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """Qwen3-Next model configuration"""
-from transformers.configuration_utils import PretrainedConfig, layer_type_validation
+from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
 logger = logging.get_logger(__name__)
@@ -253,7 +253,14 @@ class Qwen3NextConfig(PretrainedConfig):
                "linear_attention" if bool((i + 1) % 4) else "full_attention"
                for i in range(self.num_hidden_layers)
            ]
-        layer_type_validation(self.layer_types)
+        if hasattr(self, "validate_layer_type"):
+            # Transformers v5
+            self.validate_layer_type()
+        else:
+            # Transformers v4
+            from transformers.configuration_utils import layer_type_validation
+            layer_type_validation(self.layer_types)
        # linear attention part
        self.linear_conv_kernel_dim = linear_conv_kernel_dim