Unverified Commit 5ce2d10e authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix models which use `layer_type_validation` for Transformers v5 (#37398)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 738d0a28
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from transformers.configuration_utils import PretrainedConfig, layer_type_validation from transformers.configuration_utils import PretrainedConfig
class OlmoHybridConfig(PretrainedConfig): class OlmoHybridConfig(PretrainedConfig):
...@@ -228,7 +228,15 @@ class OlmoHybridConfig(PretrainedConfig): ...@@ -228,7 +228,15 @@ class OlmoHybridConfig(PretrainedConfig):
if "full_attention" not in layer_types: if "full_attention" not in layer_types:
layer_types[-1] = "full_attention" layer_types[-1] = "full_attention"
layer_type_validation(layer_types, num_hidden_layers) if hasattr(self, "validate_layer_type"):
# Transformers v5
self.layer_types = layer_types
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(layer_types, num_hidden_layers)
if "linear_attention" not in layer_types: if "linear_attention" not in layer_types:
raise ValueError( raise ValueError(
"OLMoHybrid expects at least one 'linear_attention' layer." "OLMoHybrid expects at least one 'linear_attention' layer."
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
"""Qwen3.5 model configuration""" """Qwen3.5 model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation from transformers.configuration_utils import PretrainedConfig
class Qwen3_5TextConfig(PretrainedConfig): class Qwen3_5TextConfig(PretrainedConfig):
...@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig): ...@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
eos_token_id=None, eos_token_id=None,
**kwargs, **kwargs,
): ):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size self.hidden_size = hidden_size
...@@ -98,7 +94,18 @@ class Qwen3_5TextConfig(PretrainedConfig): ...@@ -98,7 +94,18 @@ class Qwen3_5TextConfig(PretrainedConfig):
else "full_attention" else "full_attention"
for i in range(self.num_hidden_layers) for i in range(self.num_hidden_layers)
] ]
layer_type_validation(self.layer_types, self.num_hidden_layers) if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part # linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim self.linear_conv_kernel_dim = linear_conv_kernel_dim
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
"""Qwen3.5-MoE model configuration""" """Qwen3.5-MoE model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation from transformers.configuration_utils import PretrainedConfig
class Qwen3_5MoeTextConfig(PretrainedConfig): class Qwen3_5MoeTextConfig(PretrainedConfig):
...@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): ...@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
eos_token_id=None, eos_token_id=None,
**kwargs, **kwargs,
): ):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size self.hidden_size = hidden_size
...@@ -104,7 +100,18 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): ...@@ -104,7 +100,18 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
else "full_attention" else "full_attention"
for i in range(self.num_hidden_layers) for i in range(self.num_hidden_layers)
] ]
layer_type_validation(self.layer_types, self.num_hidden_layers) if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part # linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim self.linear_conv_kernel_dim = linear_conv_kernel_dim
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
"""Qwen3-Next model configuration""" """Qwen3-Next model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging from transformers.utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -253,7 +253,14 @@ class Qwen3NextConfig(PretrainedConfig): ...@@ -253,7 +253,14 @@ class Qwen3NextConfig(PretrainedConfig):
"linear_attention" if bool((i + 1) % 4) else "full_attention" "linear_attention" if bool((i + 1) % 4) else "full_attention"
for i in range(self.num_hidden_layers) for i in range(self.num_hidden_layers)
] ]
layer_type_validation(self.layer_types) if hasattr(self, "validate_layer_type"):
# Transformers v5
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types)
# linear attention part # linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim self.linear_conv_kernel_dim = linear_conv_kernel_dim
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment