[Gemma2] add bitsandbytes support for Gemma2 (#8338)

1bf2dd9d · Blueyo0 · GitHub · 5a60699c · 1bf2dd9d
Unverified Commit 1bf2dd9d authored Sep 12, 2024 by Blueyo0 Committed by GitHub Sep 11, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 0 deletions

vllm/model_executor/models/gemma2.py vllm/model_executor/models/gemma2.py +8 -0

No files found.
--- a/vllm/model_executor/models/gemma2.py
+++ b/vllm/model_executor/models/gemma2.py
@@ -312,6 +312,14 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA):
    # Gemma does not apply LoRA to the embedding layer.
    embedding_modules = {}
    embedding_padding_modules = []
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "q_proj": ("qkv_proj", 0),
+        "k_proj": ("qkv_proj", 1),
+        "v_proj": ("qkv_proj", 2),
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }

    def __init__(
        self,