Unverified Commit 58d5b3f5 authored by Tsukasa OI's avatar Tsukasa OI Committed by GitHub
Browse files

[Model][Quantization] Restore MoE + GGUF models support (incl. Qwen3 MoE) by...


[Model][Quantization] Restore MoE + GGUF models support (incl. Qwen3 MoE) by allowing Sideload Parameters (#30116)
Signed-off-by: default avatarTsukasa OI <floss_llm@irq.a4lg.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent c2e1987a
...@@ -82,6 +82,7 @@ class GGUFConfig(QuantizationConfig): ...@@ -82,6 +82,7 @@ class GGUFConfig(QuantizationConfig):
return UnquantizedEmbeddingMethod() return UnquantizedEmbeddingMethod()
return GGUFEmbeddingMethod(self) return GGUFEmbeddingMethod(self)
elif isinstance(layer, FusedMoE): elif isinstance(layer, FusedMoE):
# TODO: Select UnquantizedFusedMoEMethod on unquantized layers.
return GGUFMoEMethod(self, layer.moe_config) return GGUFMoEMethod(self, layer.moe_config)
return None return None
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
from collections.abc import Generator from collections.abc import Generator
import gguf import gguf
import regex as re
import torch import torch
import torch.nn as nn import torch.nn as nn
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
...@@ -94,6 +95,7 @@ class GGUFModelLoader(BaseModelLoader): ...@@ -94,6 +95,7 @@ class GGUFModelLoader(BaseModelLoader):
hasattr(config, "vision_config") and config.vision_config is not None hasattr(config, "vision_config") and config.vision_config is not None
) )
gguf_to_hf_name_map = {} gguf_to_hf_name_map = {}
sideload_params: list[re.Pattern] = []
# hack: ggufs have a different name than transformers # hack: ggufs have a different name than transformers
if model_type == "cohere": if model_type == "cohere":
model_type = "command-r" model_type = "command-r"
...@@ -118,6 +120,12 @@ class GGUFModelLoader(BaseModelLoader): ...@@ -118,6 +120,12 @@ class GGUFModelLoader(BaseModelLoader):
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = ( gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = (
f"model.layers.{idx}.mlp.experts.0.up_proj.weight" f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
) )
sideload_params.append(
re.compile(
f"model\\.layers\\.{idx}"
r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
)
)
if model_type in ("qwen2_moe", "qwen3_moe"): if model_type in ("qwen2_moe", "qwen3_moe"):
model_type = model_type.replace("_", "") model_type = model_type.replace("_", "")
# GGUF layer map assumes that we will have a merged expert weights # GGUF layer map assumes that we will have a merged expert weights
...@@ -132,6 +140,12 @@ class GGUFModelLoader(BaseModelLoader): ...@@ -132,6 +140,12 @@ class GGUFModelLoader(BaseModelLoader):
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = ( gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = (
f"model.layers.{idx}.mlp.experts.0.up_proj.weight" f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
) )
sideload_params.append(
re.compile(
f"model\\.layers\\.{idx}"
r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
)
)
arch = None arch = None
for key, value in gguf.MODEL_ARCH_NAMES.items(): for key, value in gguf.MODEL_ARCH_NAMES.items():
...@@ -241,7 +255,15 @@ class GGUFModelLoader(BaseModelLoader): ...@@ -241,7 +255,15 @@ class GGUFModelLoader(BaseModelLoader):
# Parameter not in manual overrides either # Parameter not in manual overrides either
unmapped_params.append(hf_name) unmapped_params.append(hf_name)
# All parameters must be mapped: both vision/projector and backbone # All parameters (except those initialized by other means) must be mapped:
# both vision/projector and backbone
if unmapped_params:
unmapped_params = list(
filter(
lambda x: not any(re.fullmatch(p, x) for p in sideload_params),
unmapped_params,
)
)
if unmapped_params: if unmapped_params:
raise RuntimeError( raise RuntimeError(
f"Failed to map GGUF parameters " f"Failed to map GGUF parameters "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment