Unverified Commit 97cc8729 authored by Ekagra Ranjan's avatar Ekagra Ranjan Committed by GitHub
Browse files

[Model] Ignore rotary embed load for Cohere model (#17319)

parent 44641092
......@@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
loaded_params: Set[str] = set()
for name, loaded_weight in weights:
# Skip loading rotary embeddings since vLLM has its own
if "rotary_emb.inv_freq" in name:
continue
if (self.quant_config is not None and
(scale_name := self.quant_config.get_cache_scale(name))):
# Loading kv cache quantization scales
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment