[Model] Ignore rotary embed load for Cohere model (#17319)

97cc8729 · Ekagra Ranjan · GitHub · 44641092 · 97cc8729
Unverified Commit 97cc8729 authored Apr 29, 2025 by Ekagra Ranjan Committed by GitHub Apr 29, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

vllm/model_executor/models/commandr.py vllm/model_executor/models/commandr.py +4 -0

No files found.
--- a/vllm/model_executor/models/commandr.py
+++ b/vllm/model_executor/models/commandr.py
@@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
        loaded_params: Set[str] = set()
        for name, loaded_weight in weights:

+            # Skip loading rotary embeddings since vLLM has its own
+            if "rotary_emb.inv_freq" in name:
+                continue
+
            if (self.quant_config is not None and
                (scale_name := self.quant_config.get_cache_scale(name))):
                # Loading kv cache quantization scales