[Model] support bitsandbytes quantization with minicpm3 model (#10682)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>

[Model] support bitsandbytes quantization with minicpm3 model (#10682)
Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
70dc14fb · zixuanzhang226 · GitHub · cb4e1c3f · 70dc14fb
Unverified Commit 70dc14fb authored Nov 27, 2024 by zixuanzhang226 Committed by GitHub Nov 27, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

vllm/model_executor/models/minicpm3.py vllm/model_executor/models/minicpm3.py +6 -0

No files found.
--- a/vllm/model_executor/models/minicpm3.py
+++ b/vllm/model_executor/models/minicpm3.py
@@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
    # `embedding_modules` and `embedding_padding_modules`
    # are inherited from MiniCPMForCausalLM
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
    def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""):
        self.model = MiniCPM3Model(vllm_config=vllm_config,
                                   prefix=maybe_prefix(prefix, "model"))