[Flax `.from_pretrained`] Raise a warning if model weights are not in float32 (#16762)

* [Flax] Raise a warning if model weights are not in float32 * apply suggestions and few small changes * reorder wording for better readability

[Flax `.from_pretrained`] Raise a warning if model weights are not in float32 (#16762)
* [Flax] Raise a warning if model weights are not in float32 * apply suggestions and few small changes * reorder wording for better readability
d8269eb4 · Sanchit Gandhi · GitHub · 195fbbb6 · d8269eb4
Unverified Commit d8269eb4 authored Apr 14, 2022 by Sanchit Gandhi Committed by GitHub Apr 14, 2022
Show whitespace changes
Inline Side-by-side

Showing with 23 additions and 0 deletions

src/transformers/modeling_flax_utils.py src/transformers/modeling_flax_utils.py +23 -0

No files found.
--- a/src/transformers/modeling_flax_utils.py
+++ b/src/transformers/modeling_flax_utils.py
@@ -657,6 +657,29 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
                f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference."
            )

+        # dictionary of key: dtypes for the model params
+        param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+        # extract keys of parameters not in jnp.float32
+        fp16_params = [k for k in param_dtypes if param_dtypes[k] == jnp.float16]
+        bf16_params = [k for k in param_dtypes if param_dtypes[k] == jnp.bfloat16]
+
+        # raise a warning if any of the parameters are not in jnp.float32
+        if len(fp16_params) > 0:
+            logger.warning(
+                f"Some of the weights of {model.__class__.__name__} were initialized in float16 precision from "
+                f"the model checkpoint at {pretrained_model_name_or_path}:\n{fp16_params}\n"
+                "You should probably UPCAST the model weights to float32 if this was not intended. "
+                "See [`~FlaxPreTrainedModel.to_fp32`] for further information on how to do this."
+            )
+
+        if len(bf16_params) > 0:
+            logger.warning(
+                f"Some of the weights of {model.__class__.__name__} were initialized in bfloat16 precision from "
+                f"the model checkpoint at {pretrained_model_name_or_path}:\n{bf16_params}\n"
+                "You should probably UPCAST the model weights to float32 if this was not intended. "
+                "See [`~FlaxPreTrainedModel.to_fp32`] for further information on how to do this."
+            )
+
        # set correct parameters
        model.params = unflatten_dict(state)