support bitsandbytes quantization with qwen model (#10549)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>

support bitsandbytes quantization with qwen model (#10549)
Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
948c8595 · zixuanzhang226 · GitHub · 97814fbf · 948c8595
Unverified Commit 948c8595 authored Nov 22, 2024 by zixuanzhang226 Committed by GitHub Nov 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 0 deletions

vllm/model_executor/models/qwen.py vllm/model_executor/models/qwen.py +12 -0

No files found.
--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -1028,6 +1028,18 @@ class QWenLLM(QWenBaseModel):
    embedding_modules = {}
    embedding_padding_modules = []
+    default_bitsandbytes_target_modules = [
+        ".c_attn.",
+        ".c_proj.",
+        ".w1.",
+        ".w2.",
+    ]
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "w2": ("gate_up_proj", 0),
+        "w1": ("gate_up_proj", 1),
+    }
 class QWenVL(QWenBaseModel, SupportsMultiModal):
    packed_modules_mapping = {