"xcode/Samples/vscode:/vscode.git/clone" did not exist on "6149876141b4a5d16d1481835bf5519618183980"
Unverified Commit 909b1471 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix] Fix prefix creation for Qwen3.5 (#34723)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent a88b3be7
...@@ -676,9 +676,10 @@ class Qwen3_5ForCausalLMBase( ...@@ -676,9 +676,10 @@ class Qwen3_5ForCausalLMBase(
super().__init__() super().__init__()
self.config = config self.config = config
self.scheduler_config = scheduler_config self.scheduler_config = scheduler_config
self.model = Qwen3_5Model( # Deal with the case where the prefix is already "language_model" since
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model") # Qwen/Qwen3.5-397B-A17B has naming like: model.language_model.layers.0
) model_prefix = prefix if "model" in prefix else "model"
self.model = Qwen3_5Model(vllm_config=vllm_config, prefix=model_prefix)
if get_pp_group().is_last_rank: if get_pp_group().is_last_rank:
if config.tie_word_embeddings: if config.tie_word_embeddings:
...@@ -754,7 +755,7 @@ class Qwen3_5MoeForCausalLM(Qwen3_5ForCausalLMBase, QwenNextMixtureOfExperts): ...@@ -754,7 +755,7 @@ class Qwen3_5MoeForCausalLM(Qwen3_5ForCausalLMBase, QwenNextMixtureOfExperts):
dummy_inputs=Qwen3VLDummyInputsBuilder, dummy_inputs=Qwen3VLDummyInputsBuilder,
) )
class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid): class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
# protocols have not __init__ method, so we need to use nn.Module.__init__ # protocols have not __init__ method, so we need to use nn.Module.__init__
nn.Module.__init__(self) nn.Module.__init__(self)
config: Qwen3_5Config = vllm_config.model_config.hf_config config: Qwen3_5Config = vllm_config.model_config.hf_config
...@@ -962,7 +963,7 @@ class Qwen3_5_MoeMixtureOfExperts(MixtureOfExperts): ...@@ -962,7 +963,7 @@ class Qwen3_5_MoeMixtureOfExperts(MixtureOfExperts):
class Qwen3_5MoeForConditionalGeneration( class Qwen3_5MoeForConditionalGeneration(
Qwen3_5ForConditionalGeneration, Qwen3_5_MoeMixtureOfExperts Qwen3_5ForConditionalGeneration, Qwen3_5_MoeMixtureOfExperts
): ):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
# protocols have not __init__ method, so we need to use nn.Module.__init__ # protocols have not __init__ method, so we need to use nn.Module.__init__
nn.Module.__init__(self) nn.Module.__init__(self)
config: Qwen3_5MoeConfig = vllm_config.model_config.hf_config config: Qwen3_5MoeConfig = vllm_config.model_config.hf_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment