Unverified Commit 6f786f2c authored by Thomas's avatar Thomas Committed by GitHub
Browse files

[Bugfix][Model] Fix Devstral Small 2 HF format weight loading (#39293)


Signed-off-by: default avatarthomasmaindron <thomasmaindron@users.noreply.github.com>
Co-authored-by: default avatarthomasmaindron <thomasmaindron@users.noreply.github.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 4eee77b8
...@@ -416,6 +416,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -416,6 +416,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"MiniMaxAI/MiniMax-M2", "MiniMaxAI/MiniMax-M2",
trust_remote_code=True, trust_remote_code=True,
), ),
"Ministral3ForCausalLM": _HfExamplesInfo("mistralai/Ministral-3-3B-Instruct-2512"),
"MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"), "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
"MistralLarge3ForCausalLM": _HfExamplesInfo( "MistralLarge3ForCausalLM": _HfExamplesInfo(
"mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4" "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"
......
...@@ -382,7 +382,14 @@ class Mistral3ForConditionalGeneration( ...@@ -382,7 +382,14 @@ class Mistral3ForConditionalGeneration(
# Some PEFT LoRAs are trained against the text submodule directly # Some PEFT LoRAs are trained against the text submodule directly
# and produce names like `base_model.model.model.layers.*`. # and produce names like `base_model.model.model.layers.*`.
"model.": "language_model.model.", "model.": "language_model.model.",
} },
orig_to_new_suffix={
# FP8 quantized HF checkpoints use "activation_scale" and
# "weight_scale_inv" but vLLM's FP8 linear layers register
# them as "input_scale" and "weight_scale"
".activation_scale": ".input_scale",
".weight_scale_inv": ".weight_scale",
},
) )
@classmethod @classmethod
...@@ -402,13 +409,8 @@ class Mistral3ForConditionalGeneration( ...@@ -402,13 +409,8 @@ class Mistral3ForConditionalGeneration(
self.config = config self.config = config
self.multimodal_config = multimodal_config self.multimodal_config = multimodal_config
# NOTE: These are special cases for Pixtral-12B in the HF-format # NOTE: This is a special case for Pixtral-12B in the HF-format
# https://huggingface.co/mistral-community/pixtral-12b/blob/main/config.json # noqa # https://huggingface.co/mistral-community/pixtral-12b/blob/main/config.json # noqa
if (
config.text_config.architectures is None
and config.text_config.model_type == "mistral"
):
config.text_config.architectures = ["MistralForCausalLM"]
if ( if (
config.projector_hidden_act is None config.projector_hidden_act is None
and config.vision_config.hidden_act == "gelu" and config.vision_config.hidden_act == "gelu"
......
...@@ -160,6 +160,7 @@ _TEXT_GENERATION_MODELS = { ...@@ -160,6 +160,7 @@ _TEXT_GENERATION_MODELS = {
"MiniMaxText01ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"), "MiniMaxText01ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
"MiniMaxM1ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"), "MiniMaxM1ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
"MiniMaxM2ForCausalLM": ("minimax_m2", "MiniMaxM2ForCausalLM"), "MiniMaxM2ForCausalLM": ("minimax_m2", "MiniMaxM2ForCausalLM"),
"Ministral3ForCausalLM": ("mistral", "MistralForCausalLM"),
"MistralForCausalLM": ("mistral", "MistralForCausalLM"), "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
"MistralLarge3ForCausalLM": ("mistral_large_3", "MistralLarge3ForCausalLM"), "MistralLarge3ForCausalLM": ("mistral_large_3", "MistralLarge3ForCausalLM"),
"MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"), "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment