Unverified Commit 24cde76a authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Add comment on skipping rope caches (#2004)

parent 1aa13615
...@@ -322,9 +322,10 @@ class LlamaForCausalLM(nn.Module): ...@@ -322,9 +322,10 @@ class LlamaForCausalLM(nn.Module):
model_name_or_path, cache_dir, load_format, revision): model_name_or_path, cache_dir, load_format, revision):
if "rotary_emb.inv_freq" in name: if "rotary_emb.inv_freq" in name:
continue continue
if "rotary_emb.cos_cached" in name: if ("rotary_emb.cos_cached" in name
continue or "rotary_emb.sin_cached" in name):
if "rotary_emb.sin_cached" in name: # Models trained using ColossalAI may include these tensors in
# the checkpoint. Skip them.
continue continue
for (param_name, weight_name, shard_id) in stacked_params_mapping: for (param_name, weight_name, shard_id) in stacked_params_mapping:
if weight_name not in name: if weight_name not in name:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment