TF: embeddings out of bounds check factored into function (#23427)

cf9e7cb0 · Joao Gante · GitHub · 45e3d649 · cf9e7cb0 · cf9e7cb0
Unverified Commit cf9e7cb0 authored May 17, 2023 by Joao Gante Committed by GitHub May 17, 2023
20 changed files
--- a/src/transformers/models/albert/modeling_tf_albert.py
+++ b/src/transformers/models/albert/modeling_tf_albert.py
@@ -44,7 +44,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    MULTIPLE_CHOICE_DUMMY_INPUTS,
    ModelOutput,
@@ -188,16 +188,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/bart/modeling_tf_bart.py
+++ b/src/transformers/models/bart/modeling_tf_bart.py
@@ -40,7 +40,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    ContextManagers,
    add_code_sample_docstrings,
@@ -763,16 +763,7 @@ class TFBartEncoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        embed_pos = self.embed_positions(input_shape)
@@ -965,16 +956,7 @@ class TFBartDecoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        hidden_states = inputs_embeds

--- a/src/transformers/models/bert/modeling_tf_bert.py
+++ b/src/transformers/models/bert/modeling_tf_bert.py
@@ -49,7 +49,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    DUMMY_INPUTS,
    MULTIPLE_CHOICE_DUMMY_INPUTS,
@@ -198,16 +198,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py
+++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py
@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    ContextManagers,
    add_code_sample_docstrings,
@@ -746,16 +746,7 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        embed_pos = self.embed_positions(input_shape)
@@ -956,16 +947,7 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        hidden_states = inputs_embeds

--- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py
+++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py
@@ -37,7 +37,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    ContextManagers,
    add_code_sample_docstrings,
@@ -752,16 +752,7 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        embed_pos = self.embed_positions(input_shape)
@@ -961,16 +952,7 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer):
            if hasattr(self.embed_tokens, "load_weight_prefix"):
                context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
            with ContextManagers(context):
-                # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+                check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
-                # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-                tf.debugging.assert_less(
-                    input_ids,
-                    tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
-                    message=(
-                        "input_ids must be smaller than the embedding layer's input dimension (got"
-                        f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
-                    ),
-                )
                inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]

--- a/src/transformers/models/blip/modeling_tf_blip.py
+++ b/src/transformers/models/blip/modeling_tf_blip.py
@@ -29,7 +29,7 @@ from ...modeling_tf_utils import (
    shape_list,
    unpack_inputs,
 )
-from ...tf_utils import stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, stable_softmax
 from ...utils import (
    ModelOutput,
    add_start_docstrings,
@@ -316,16 +316,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer):
            raise ValueError("You have to specify either input_ids or inputs_embeds")
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/blip/modeling_tf_blip_text.py
+++ b/src/transformers/models/blip/modeling_tf_blip_text.py
@@ -32,7 +32,7 @@ from ...modeling_tf_utils import (
    shape_list,
    unpack_inputs,
 )
-from ...tf_utils import invert_attention_mask, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, invert_attention_mask, stable_softmax
 from ...utils import add_start_docstrings_to_model_forward, logging
 from .configuration_blip import BlipTextConfig
@@ -112,16 +112,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer):
            position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length]
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = self.word_embeddings(input_ids)
        embeddings = inputs_embeds

--- a/src/transformers/models/camembert/modeling_tf_camembert.py
+++ b/src/transformers/models/camembert/modeling_tf_camembert.py
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    DUMMY_INPUTS,
    MULTIPLE_CHOICE_DUMMY_INPUTS,
@@ -239,16 +239,7 @@ class TFCamembertEmbeddings(tf.keras.layers.Layer):
        assert not (input_ids is None and inputs_embeds is None)
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/clip/modeling_tf_clip.py
+++ b/src/transformers/models/clip/modeling_tf_clip.py
@@ -34,7 +34,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    ModelOutput,
    add_start_docstrings,
@@ -238,16 +238,7 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer):
            raise ValueError("You have to specify either input_ids or inputs_embeds")
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/convbert/modeling_tf_convbert.py
+++ b/src/transformers/models/convbert/modeling_tf_convbert.py
@@ -42,7 +42,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    MULTIPLE_CHOICE_DUMMY_INPUTS,
    add_code_sample_docstrings,
@@ -124,16 +124,7 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/ctrl/modeling_tf_ctrl.py
+++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py
@@ -32,7 +32,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
 from .configuration_ctrl import CTRLConfig
@@ -336,16 +336,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
        position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.w.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.w.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.w.vocab_size})"
-                ),
-            )
            inputs_embeds = self.w(input_ids, mode="embedding")
        seq_len = input_shape[-1]
        mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)

--- a/src/transformers/models/deberta/modeling_tf_deberta.py
+++ b/src/transformers/models/deberta/modeling_tf_deberta.py
@@ -39,7 +39,7 @@ from ...modeling_tf_utils import (
    get_initializer,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
 from .configuration_deberta import DebertaConfig
@@ -778,16 +778,7 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py
+++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py
@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
    get_initializer,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
 from .configuration_deberta_v2 import DebertaV2Config
@@ -867,16 +867,7 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/distilbert/modeling_tf_distilbert.py
+++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py
@@ -43,7 +43,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    MULTIPLE_CHOICE_DUMMY_INPUTS,
    add_code_sample_docstrings,
@@ -109,16 +109,7 @@ class TFEmbeddings(tf.keras.layers.Layer):
        assert not (input_ids is None and inputs_embeds is None)
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/electra/modeling_tf_electra.py
+++ b/src/transformers/models/electra/modeling_tf_electra.py
@@ -44,7 +44,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    DUMMY_INPUTS,
    MULTIPLE_CHOICE_DUMMY_INPUTS,
@@ -528,16 +528,7 @@ class TFElectraEmbeddings(tf.keras.layers.Layer):
            raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
        input_shape = shape_list(inputs_embeds)[:-1]

--- a/src/transformers/models/esm/modeling_tf_esm.py
+++ b/src/transformers/models/esm/modeling_tf_esm.py
@@ -40,7 +40,7 @@ from ...modeling_tf_utils import (
    shape_list,
    unpack_inputs,
 )
-from ...tf_utils import stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, stable_softmax
 from ...utils import logging
 from .configuration_esm import EsmConfig
@@ -214,16 +214,7 @@ class TFEsmEmbeddings(Layer):
                position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = self.word_embeddings(input_ids)
        # Note that if we want to support ESM-1 (not 1b!) in future then we need to support an

--- a/src/transformers/models/flaubert/modeling_tf_flaubert.py
+++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    MULTIPLE_CHOICE_DUMMY_INPUTS,
    ModelOutput,
@@ -578,16 +578,7 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
        # embeddings
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.embeddings.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})"
-                ),
-            )
            inputs_embeds = self.embeddings(input_ids)
        tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids)

--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -42,7 +42,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    MULTIPLE_CHOICE_DUMMY_INPUTS,
    ModelOutput,
@@ -109,16 +109,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer):
        assert not (input_ids is not None and inputs_embeds is not None)
        if input_ids is not None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = tf.gather(self.weight, input_ids)
        final_embeddings = self.LayerNorm(inputs=inputs_embeds)

--- a/src/transformers/models/gpt2/modeling_tf_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py
@@ -39,7 +39,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import (
    DUMMY_INPUTS,
    ModelOutput,
@@ -437,16 +437,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
        position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
-                ),
-            )
            inputs_embeds = self.wte(input_ids, mode="embedding")
        position_embeds = tf.gather(self.wpe, position_ids)

--- a/src/transformers/models/gptj/modeling_tf_gptj.py
+++ b/src/transformers/models/gptj/modeling_tf_gptj.py
@@ -43,7 +43,7 @@ from ...modeling_tf_utils import (
    keras_serializable,
    unpack_inputs,
 )
-from ...tf_utils import shape_list, stable_softmax
+from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
 from ...utils import logging
 from .configuration_gptj import GPTJConfig
@@ -437,16 +437,7 @@ class TFGPTJMainLayer(tf.keras.layers.Layer):
        position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
        if inputs_embeds is None:
-            # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
+            check_embeddings_within_bounds(input_ids, self.wte.vocab_size)
-            # indices on GPU, returning zeros instead. This is a dangerous silent behavior.
-            tf.debugging.assert_less(
-                input_ids,
-                tf.cast(self.wte.vocab_size, dtype=input_ids.dtype),
-                message=(
-                    "input_ids must be smaller than the embedding layer's input dimension (got"
-                    f" {tf.math.reduce_max(input_ids)} >= {self.wte.vocab_size})"
-                ),
-            )
            inputs_embeds = self.wte(input_ids, mode="embedding")
        if token_type_ids is not None: