Unverified Commit 5ce2d10e authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix models which use `layer_type_validation` for Transformers v5 (#37398)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 738d0a28
......@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class OlmoHybridConfig(PretrainedConfig):
......@@ -228,6 +228,14 @@ class OlmoHybridConfig(PretrainedConfig):
if "full_attention" not in layer_types:
layer_types[-1] = "full_attention"
if hasattr(self, "validate_layer_type"):
# Transformers v5
self.layer_types = layer_types
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(layer_types, num_hidden_layers)
if "linear_attention" not in layer_types:
raise ValueError(
......
......@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3.5 model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class Qwen3_5TextConfig(PretrainedConfig):
......@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
eos_token_id=None,
**kwargs,
):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
......@@ -98,6 +94,17 @@ class Qwen3_5TextConfig(PretrainedConfig):
else "full_attention"
for i in range(self.num_hidden_layers)
]
if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part
......
......@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3.5-MoE model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class Qwen3_5MoeTextConfig(PretrainedConfig):
......@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
eos_token_id=None,
**kwargs,
):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
......@@ -104,6 +100,17 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
else "full_attention"
for i in range(self.num_hidden_layers)
]
if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part
......
......@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3-Next model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
......@@ -253,6 +253,13 @@ class Qwen3NextConfig(PretrainedConfig):
"linear_attention" if bool((i + 1) % 4) else "full_attention"
for i in range(self.num_hidden_layers)
]
if hasattr(self, "validate_layer_type"):
# Transformers v5
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types)
# linear attention part
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment