handle torch_dtype in low cpu mem usage (#16580)

21decb77 · Suraj Patil · GitHub · 8bf6d28c · 21decb77
Unverified Commit 21decb77 authored Apr 05, 2022 by Suraj Patil Committed by GitHub Apr 05, 2022
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

src/transformers/modeling_utils.py src/transformers/modeling_utils.py +2 -1

No files found.
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -2165,7 +2165,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
            for k in loaded_state_dict_keys:
                submodule, param_name = find_submodule_and_param_name(model, k)
                if submodule is not None:
-                    new_val = state_dict[k]
+                    param_dtype = getattr(submodule, param_name).dtype
+                    new_val = state_dict[k].to(param_dtype)
                    if isinstance(getattr(submodule, param_name), torch.nn.Parameter):
                        new_val = torch.nn.Parameter(new_val)
                    setattr(submodule, param_name, new_val)