Fix dtype in radnomly initialized head (#19690)

344e2664 · Sylvain Gugger · GitHub · 07f66902 · 344e2664
Unverified Commit 344e2664 authored Oct 17, 2022 by Sylvain Gugger Committed by GitHub Oct 17, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

src/transformers/modeling_utils.py src/transformers/modeling_utils.py +2 -2

No files found.
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -2446,9 +2446,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
                param = model_state_dict[key]
                if param.device == torch.device("meta"):
                    if not load_in_8bit:
-                        set_module_tensor_to_device(model, key, "cpu", torch.empty(*param.size()))
+                        set_module_tensor_to_device(model, key, "cpu", torch.empty(*param.size(), dtype=dtype))
                    else:
-                        set_module_8bit_tensor_to_device(model, key, "cpu", torch.empty(*param.size()))
+                        set_module_8bit_tensor_to_device(model, key, "cpu", torch.empty(*param.size(), dtype=dtype))

        # retrieve unintialized modules and initialize before maybe overriding that with the pretrained weights.
        if _fast_init: