Unverified Commit 73a484ca authored by Tsukasa OI's avatar Tsukasa OI Committed by GitHub
Browse files

[Model][Quantization] Fix / Add GGUF support for Qwen2 MoE models (#30307)


Signed-off-by: default avatarTsukasa OI <floss_llm@irq.a4lg.com>
parent b37bf51e
...@@ -367,6 +367,8 @@ class Qwen2MoeModel(nn.Module): ...@@ -367,6 +367,8 @@ class Qwen2MoeModel(nn.Module):
self.embed_tokens = VocabParallelEmbedding( self.embed_tokens = VocabParallelEmbedding(
config.vocab_size, config.vocab_size,
config.hidden_size, config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens",
) )
self.start_layer, self.end_layer, self.layers = make_layers( self.start_layer, self.end_layer, self.layers = make_layers(
config.num_hidden_layers, config.num_hidden_layers,
...@@ -512,6 +514,12 @@ class Qwen2MoeModel(nn.Module): ...@@ -512,6 +514,12 @@ class Qwen2MoeModel(nn.Module):
continue continue
else: else:
name = remapped_kv_scale_name name = remapped_kv_scale_name
# GGUF: make sure that shared_expert_gate is a 2D tensor.
if (
"mlp.shared_expert_gate" in name
and len(loaded_weight.shape) == 1
):
loaded_weight = loaded_weight[None, :]
param = params_dict[name] param = params_dict[name]
weight_loader = getattr( weight_loader = getattr(
param, "weight_loader", default_weight_loader param, "weight_loader", default_weight_loader
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment