Unverified Commit fd8a0b29 authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

fix: correct scale parameter remapping logic in Llama4ForConditionalGeneration (#11282)

parent afc35ccc
...@@ -710,7 +710,7 @@ class Llama4ForConditionalGeneration(nn.Module): ...@@ -710,7 +710,7 @@ class Llama4ForConditionalGeneration(nn.Module):
"""Handle scale parameter remapping. Returns True if handled.""" """Handle scale parameter remapping. Returns True if handled."""
if "scale" in name and "expert" not in name: if "scale" in name and "expert" not in name:
remapped_name = maybe_remap_kv_scale_name(name, params_dict) remapped_name = maybe_remap_kv_scale_name(name, params_dict)
return remapped_name is not None and remapped_name != name return remapped_name != name
return False return False
def _handle_stacked_params( def _handle_stacked_params(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment