[Bugfix] add support for 'num_attention_groups' in...

[Bugfix] add support for 'num_attention_groups' in ModelArchConfigConvertorBase for Step3p5 (#39796) Signed-off-by: realliujiaxu <realliujiaxu@163.com>

[Bugfix] add support for 'num_attention_groups' in...
[Bugfix] add support for 'num_attention_groups' in ModelArchConfigConvertorBase for Step3p5 (#39796) Signed-off-by: realliujiaxu <realliujiaxu@163.com>
78453792 · realliujiaxu · GitHub · 4b7ca37b · 78453792 · 78453792
Unverified Commit 78453792 authored Apr 16, 2026 by realliujiaxu Committed by GitHub Apr 16, 2026
3 changed files
--- a/tests/config/base_model_arch_groundtruth.json
+++ b/tests/config/base_model_arch_groundtruth.json
@@ -356,6 +356,23 @@
        "is_multimodal_model": false,
        "dtype": "torch.float32"
    },
+    "stepfun-ai/Step-3.5-Flash": {
+        "architectures": [
+            "Step3p5ForCausalLM"
+        ],
+        "model_type": "step3p5",
+        "text_model_type": "step3p5",
+        "hidden_size": 4096,
+        "total_num_hidden_layers": 45,
+        "total_num_attention_heads": 64,
+        "head_size": 128,
+        "vocab_size": 128896,
+        "total_num_kv_heads": 8,
+        "num_experts": 288,
+        "is_deepseek_mla": false,
+        "is_multimodal_model": false,
+        "dtype": "torch.bfloat16"
+    },
    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": {
        "architectures": [
            "NemotronHForCausalLM"

--- a/tests/config/test_model_arch_config.py
+++ b/tests/config/test_model_arch_config.py
@@ -16,6 +16,7 @@ BASE_TRUST_REMOTE_CODE_MODELS = {
    "nvidia/Llama-3_3-Nemotron-Super-49B-v1",
    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
    "XiaomiMiMo/MiMo-7B-RL",
+    "stepfun-ai/Step-3.5-Flash",
    # Excluded: Not available online right now
    # "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1",
    "meituan-longcat/LongCat-Flash-Chat",

--- a/vllm/transformers_utils/model_arch_config_convertor.py
+++ b/vllm/transformers_utils/model_arch_config_convertor.py
@@ -77,6 +77,8 @@ class ModelArchConfigConvertorBase:
            "num_key_value_heads",
            # For ChatGLM:
            "multi_query_group_num",
+            # For Step3p5:
+            "num_attention_groups",
        ]
        # For non-grouped-query attention models, the number of KV heads is
        # equal to the number of attention heads.