[Quantization]add prefix for commandA quantized model (#17017)

6b2427f9 · Chen Xia · GitHub · b07d7416 · 6b2427f9
Unverified Commit 6b2427f9 authored Apr 23, 2025 by Chen Xia Committed by GitHub Apr 23, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 1 deletion

vllm/model_executor/models/commandr.py vllm/model_executor/models/commandr.py +8 -1

No files found.
--- a/vllm/model_executor/models/commandr.py
+++ b/vllm/model_executor/models/commandr.py
@@ -89,6 +89,7 @@ class CohereMLP(nn.Module):
        self,
        config: CohereConfig,
        quant_config: Optional[QuantizationConfig] = None,
+        prefix: str = "",
    ):
        super().__init__()
        self.config = config
@@ -99,12 +100,14 @@ class CohereMLP(nn.Module):
            [self.intermediate_size] * 2,
            bias=False,
            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
        )
        self.down_proj = RowParallelLinear(
            self.intermediate_size,
            self.hidden_size,
            bias=False,
            quant_config=quant_config,
+            prefix=f"{prefix}.down_proj",
        )
        self.act_fn = SiluAndMul()

@@ -158,12 +161,14 @@ class CohereAttention(nn.Module):
            self.total_num_kv_heads,
            bias=False,
            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
        )
        self.o_proj = RowParallelLinear(
            self.total_num_heads * self.head_dim,
            self.hidden_size,
            bias=False,
            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
        )
        self.rotary_emb = get_rope(
            self.head_dim,
@@ -244,7 +249,9 @@ class CohereDecoderLayer(nn.Module):
                                         quant_config=quant_config,
                                         prefix=f"{prefix}.self_attn")

-        self.mlp = CohereMLP(config, quant_config=quant_config)
+        self.mlp = CohereMLP(config,
+                             quant_config=quant_config,
+                             prefix=f"{prefix}.mlp")
        self.input_layernorm = LayerNorm(param_shape=(config.hidden_size),
                                         eps=config.layer_norm_eps)