Unverified Commit cf3eacfe authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Standardise `get_rope` to use `rope_parameters["partial_rotary_factor"]`, not `rotary_dim` (#30389)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 92fea56f
...@@ -196,7 +196,6 @@ class Step3TextAttention(nn.Module): ...@@ -196,7 +196,6 @@ class Step3TextAttention(nn.Module):
) )
self.rotary_emb = get_rope( self.rotary_emb = get_rope(
self.head_dim, self.head_dim,
rotary_dim=self.head_dim,
max_position=max_position_embedding, max_position=max_position_embedding,
rope_parameters=rope_parameters, rope_parameters=rope_parameters,
) )
......
...@@ -230,7 +230,6 @@ class Zamba2Attention(nn.Module): ...@@ -230,7 +230,6 @@ class Zamba2Attention(nn.Module):
if config.use_mem_rope: if config.use_mem_rope:
self.rotary_emb = get_rope( self.rotary_emb = get_rope(
head_size=self.attention_head_dim, head_size=self.attention_head_dim,
rotary_dim=self.attention_head_dim,
max_position=config.max_position_embeddings, max_position=config.max_position_embeddings,
rope_parameters=config.rope_parameters, rope_parameters=config.rope_parameters,
is_neox_style=True, is_neox_style=True,
......
...@@ -306,8 +306,13 @@ def patch_rope_parameters(config: PretrainedConfig) -> None: ...@@ -306,8 +306,13 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
"""Provide backwards compatibility for RoPE.""" """Provide backwards compatibility for RoPE."""
from vllm.config.utils import getattr_iter from vllm.config.utils import getattr_iter
rope_theta_names = ("rope_theta", "rotary_emb_base") # Older custom models may use non-standard field names
rope_theta = getattr_iter(config, rope_theta_names, None) # which need patching for both Transformers v4 and v5.
names = ["rope_theta", "rotary_emb_base"]
rope_theta = getattr_iter(config, names, None, warn=True)
names = ["partial_rotary_factor", "rotary_pct", "rotary_emb_fraction"]
partial_rotary_factor = getattr_iter(config, names, None, warn=True)
if Version(version("transformers")) < Version("5.0.0.dev0"): if Version(version("transformers")) < Version("5.0.0.dev0"):
# Transformers v4 installed, legacy config fields may be present # Transformers v4 installed, legacy config fields may be present
if (rope_scaling := getattr(config, "rope_scaling", None)) is not None: if (rope_scaling := getattr(config, "rope_scaling", None)) is not None:
...@@ -316,14 +321,18 @@ def patch_rope_parameters(config: PretrainedConfig) -> None: ...@@ -316,14 +321,18 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
if not hasattr(config, "rope_parameters"): if not hasattr(config, "rope_parameters"):
config.rope_parameters = {"rope_type": "default"} config.rope_parameters = {"rope_type": "default"}
config.rope_parameters["rope_theta"] = rope_theta config.rope_parameters["rope_theta"] = rope_theta
partial_rotary_factor_names = ("partial_rotary_factor", "rotary_pct")
partial_rotary_factor = getattr_iter(config, partial_rotary_factor_names, None)
if partial_rotary_factor is not None: if partial_rotary_factor is not None:
if not hasattr(config, "rope_parameters"): if not hasattr(config, "rope_parameters"):
config.rope_parameters = {"rope_type": "default"} config.rope_parameters = {"rope_type": "default"}
config.rope_parameters["partial_rotary_factor"] = partial_rotary_factor config.rope_parameters["partial_rotary_factor"] = partial_rotary_factor
elif rope_theta is not None or hasattr(config, "rope_parameters"): elif rope_theta is not None or hasattr(config, "rope_parameters"):
# Transformers v5 installed # Transformers v5 installed
# Patch these fields in case they used non-standard names
if rope_theta is not None:
config.rope_theta = rope_theta
if partial_rotary_factor is not None:
config.partial_rotary_factor = partial_rotary_factor
# Standardize and validate RoPE parameters
config.standardize_rope_params() config.standardize_rope_params()
config.validate_rope() config.validate_rope()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment