[Bugfix][Model] Fix Devstral Small 2 HF format weight loading (#39293)

Signed-off-by: thomasmaindron <thomasmaindron@users.noreply.github.com> Co-authored-by: thomasmaindron <thomasmaindron@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

[Bugfix][Model] Fix Devstral Small 2 HF format weight loading (#39293)
Signed-off-by: thomasmaindron <thomasmaindron@users.noreply.github.com> Co-authored-by: thomasmaindron <thomasmaindron@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
6f786f2c · Thomas · GitHub · 4eee77b8 · 6f786f2c · 6f786f2c
Unverified Commit 6f786f2c authored Apr 14, 2026 by Thomas Committed by GitHub Apr 14, 2026
3 changed files
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -416,6 +416,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
        "MiniMaxAI/MiniMax-M2",
        trust_remote_code=True,
    ),
+    "Ministral3ForCausalLM": _HfExamplesInfo("mistralai/Ministral-3-3B-Instruct-2512"),
    "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
        "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"

--- a/vllm/model_executor/models/mistral3.py
+++ b/vllm/model_executor/models/mistral3.py
@@ -382,7 +382,14 @@ class Mistral3ForConditionalGeneration(
            # Some PEFT LoRAs are trained against the text submodule directly
            # and produce names like `base_model.model.model.layers.*`.
            "model.": "language_model.model.",
-        }
+        },
+        orig_to_new_suffix={
+            # FP8 quantized HF checkpoints use "activation_scale" and
+            # "weight_scale_inv" but vLLM's FP8 linear layers register
+            # them as "input_scale" and "weight_scale"
+            ".activation_scale": ".input_scale",
+            ".weight_scale_inv": ".weight_scale",
+        },
    )
    @classmethod
@@ -402,13 +409,8 @@ class Mistral3ForConditionalGeneration(
        self.config = config
        self.multimodal_config = multimodal_config
-        # NOTE: These are special cases for Pixtral-12B in the HF-format
+        # NOTE: This is a special case for Pixtral-12B in the HF-format
        # https://huggingface.co/mistral-community/pixtral-12b/blob/main/config.json  # noqa
-        if (
-            config.text_config.architectures is None
-            and config.text_config.model_type == "mistral"
-        ):
-            config.text_config.architectures = ["MistralForCausalLM"]
        if (
            config.projector_hidden_act is None
            and config.vision_config.hidden_act == "gelu"

--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -160,6 +160,7 @@ _TEXT_GENERATION_MODELS = {
    "MiniMaxText01ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
    "MiniMaxM1ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
    "MiniMaxM2ForCausalLM": ("minimax_m2", "MiniMaxM2ForCausalLM"),
+    "Ministral3ForCausalLM": ("mistral", "MistralForCausalLM"),
    "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
    "MistralLarge3ForCausalLM": ("mistral_large_3", "MistralLarge3ForCausalLM"),
    "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),