fix: incorrect bigcode attention heads num (#676)

621980bd · Wen Sun · GitHub · aa84c92e · 621980bd
Unverified Commit 621980bd authored Aug 05, 2023 by Wen Sun Committed by GitHub Aug 04, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

vllm/config.py vllm/config.py +5 -3

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -98,9 +98,11 @@ class ModelConfig:
        # Note: for falcon, when new_decoder_architecture is True, the
        # multi_query flag is ignored and we use n_head_kv for the number of
        # KV heads.
-        if (getattr(self.hf_config, "multi_query", False) and
+        new_decoder_arch_falcon = (
-            (self.hf_config.model_type == "falcon" and
+            self.hf_config.model_type == "falcon"
-             not getattr(self.hf_config, "new_decoder_architecture", False))):
+            and getattr(self.hf_config, "new_decoder_architecture", False))
+        if not new_decoder_arch_falcon and getattr(self.hf_config,
+                                                   "multi_query", False):
            # Multi-query attention, only one KV head.
            return 1
        # For Falcon: