Unverified Commit 2f186635 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Bugfix] Fix Qwen3.5 config loading (#34554)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
parent 342a7cda
...@@ -72,10 +72,6 @@ class Qwen3_5TextConfig(PretrainedConfig): ...@@ -72,10 +72,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
"mrope_section", "mrope_section",
"mrope_interleaved", "mrope_interleaved",
] ]
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size self.hidden_size = hidden_size
...@@ -111,6 +107,13 @@ class Qwen3_5TextConfig(PretrainedConfig): ...@@ -111,6 +107,13 @@ class Qwen3_5TextConfig(PretrainedConfig):
self.linear_num_key_heads = linear_num_key_heads self.linear_num_key_heads = linear_num_key_heads
self.linear_num_value_heads = linear_num_value_heads self.linear_num_value_heads = linear_num_value_heads
super().__init__(**kwargs) super().__init__(**kwargs)
# Set these AFTER super().__init__() because transformers v4's
# PretrainedConfig.__init__ has these as explicit params with different
# defaults (e.g. tie_word_embeddings=True) that would overwrite our values.
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings
class Qwen3_5VisionConfig(PretrainedConfig): class Qwen3_5VisionConfig(PretrainedConfig):
...@@ -182,8 +185,9 @@ class Qwen3_5Config(PretrainedConfig): ...@@ -182,8 +185,9 @@ class Qwen3_5Config(PretrainedConfig):
self.video_token_id = video_token_id self.video_token_id = video_token_id
self.vision_start_token_id = vision_start_token_id self.vision_start_token_id = vision_start_token_id
self.vision_end_token_id = vision_end_token_id self.vision_end_token_id = vision_end_token_id
self.tie_word_embeddings = tie_word_embeddings
super().__init__(**kwargs) super().__init__(**kwargs)
# Set after super().__init__() to avoid v4 PretrainedConfig overwrite
self.tie_word_embeddings = tie_word_embeddings
__all__ = ["Qwen3_5Config", "Qwen3_5TextConfig"] __all__ = ["Qwen3_5Config", "Qwen3_5TextConfig"]
...@@ -79,10 +79,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): ...@@ -79,10 +79,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
"mrope_section", "mrope_section",
"mrope_interleaved", "mrope_interleaved",
] ]
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size self.hidden_size = hidden_size
...@@ -123,6 +119,13 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): ...@@ -123,6 +119,13 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
self.output_router_logits = output_router_logits self.output_router_logits = output_router_logits
self.router_aux_loss_coef = router_aux_loss_coef self.router_aux_loss_coef = router_aux_loss_coef
super().__init__(**kwargs) super().__init__(**kwargs)
# Set these AFTER super().__init__() because transformers v4's
# PretrainedConfig.__init__ has these as explicit params with different
# defaults (e.g. tie_word_embeddings=True) that would overwrite our values.
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings
class Qwen3_5MoeVisionConfig(PretrainedConfig): class Qwen3_5MoeVisionConfig(PretrainedConfig):
...@@ -194,8 +197,9 @@ class Qwen3_5MoeConfig(PretrainedConfig): ...@@ -194,8 +197,9 @@ class Qwen3_5MoeConfig(PretrainedConfig):
self.video_token_id = video_token_id self.video_token_id = video_token_id
self.vision_start_token_id = vision_start_token_id self.vision_start_token_id = vision_start_token_id
self.vision_end_token_id = vision_end_token_id self.vision_end_token_id = vision_end_token_id
self.tie_word_embeddings = tie_word_embeddings
super().__init__(**kwargs) super().__init__(**kwargs)
# Set after super().__init__() to avoid v4 PretrainedConfig overwrite
self.tie_word_embeddings = tie_word_embeddings
__all__ = ["Qwen3_5MoeConfig", "Qwen3_5MoeTextConfig"] __all__ = ["Qwen3_5MoeConfig", "Qwen3_5MoeTextConfig"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment