[Bugfix] Fix KV head calculation for MPT models when using GQA (#5142)

a3e8a05d · Bruce Fontaine · GitHub · e441bad6 · a3e8a05d
Unverified Commit a3e8a05d authored Jun 17, 2024 by Bruce Fontaine Committed by GitHub Jun 17, 2024
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

vllm/config.py vllm/config.py +5 -1

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -302,7 +302,11 @@ class ModelConfig:
            return 1

        # For DBRX and MPT
-        if self.hf_config.model_type in ["dbrx", "mpt"]:
+        if self.hf_config.model_type == "mpt":
+            if "kv_n_heads" in self.hf_config.attn_config:
+                return self.hf_config.attn_config["kv_n_heads"]
+            return self.hf_config.num_attention_heads
+        if self.hf_config.model_type == "dbrx":
            return getattr(self.hf_config.attn_config, "kv_n_heads",
                           self.hf_config.num_attention_heads)