Unverified Commit cf9e7cb0 authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

TF: embeddings out of bounds check factored into function (#23427)

parent 45e3d649
...@@ -44,7 +44,7 @@ from ...modeling_tf_utils import ( ...@@ -44,7 +44,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
ModelOutput, ModelOutput,
...@@ -188,16 +188,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer): ...@@ -188,16 +188,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -40,7 +40,7 @@ from ...modeling_tf_utils import ( ...@@ -40,7 +40,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
ContextManagers, ContextManagers,
add_code_sample_docstrings, add_code_sample_docstrings,
...@@ -763,16 +763,7 @@ class TFBartEncoder(tf.keras.layers.Layer): ...@@ -763,16 +763,7 @@ class TFBartEncoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -965,16 +956,7 @@ class TFBartDecoder(tf.keras.layers.Layer): ...@@ -965,16 +956,7 @@ class TFBartDecoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
hidden_states = inputs_embeds hidden_states = inputs_embeds
......
...@@ -49,7 +49,7 @@ from ...modeling_tf_utils import ( ...@@ -49,7 +49,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
DUMMY_INPUTS, DUMMY_INPUTS,
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
...@@ -198,16 +198,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer): ...@@ -198,16 +198,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( ...@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
ContextManagers, ContextManagers,
add_code_sample_docstrings, add_code_sample_docstrings,
...@@ -746,16 +746,7 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): ...@@ -746,16 +746,7 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -956,16 +947,7 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): ...@@ -956,16 +947,7 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
hidden_states = inputs_embeds hidden_states = inputs_embeds
......
...@@ -37,7 +37,7 @@ from ...modeling_tf_utils import ( ...@@ -37,7 +37,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
ContextManagers, ContextManagers,
add_code_sample_docstrings, add_code_sample_docstrings,
...@@ -752,16 +752,7 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): ...@@ -752,16 +752,7 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -961,16 +952,7 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): ...@@ -961,16 +952,7 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer):
if hasattr(self.embed_tokens, "load_weight_prefix"): if hasattr(self.embed_tokens, "load_weight_prefix"):
context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/"))
with ContextManagers(context): with ContextManagers(context):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
......
...@@ -29,7 +29,7 @@ from ...modeling_tf_utils import ( ...@@ -29,7 +29,7 @@ from ...modeling_tf_utils import (
shape_list, shape_list,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import stable_softmax from ...tf_utils import check_embeddings_within_bounds, stable_softmax
from ...utils import ( from ...utils import (
ModelOutput, ModelOutput,
add_start_docstrings, add_start_docstrings,
...@@ -316,16 +316,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer): ...@@ -316,16 +316,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -32,7 +32,7 @@ from ...modeling_tf_utils import ( ...@@ -32,7 +32,7 @@ from ...modeling_tf_utils import (
shape_list, shape_list,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import invert_attention_mask, stable_softmax from ...tf_utils import check_embeddings_within_bounds, invert_attention_mask, stable_softmax
from ...utils import add_start_docstrings_to_model_forward, logging from ...utils import add_start_docstrings_to_model_forward, logging
from .configuration_blip import BlipTextConfig from .configuration_blip import BlipTextConfig
...@@ -112,16 +112,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer): ...@@ -112,16 +112,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer):
position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length] position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length]
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = self.word_embeddings(input_ids) inputs_embeds = self.word_embeddings(input_ids)
embeddings = inputs_embeds embeddings = inputs_embeds
......
...@@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( ...@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
DUMMY_INPUTS, DUMMY_INPUTS,
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
...@@ -239,16 +239,7 @@ class TFCamembertEmbeddings(tf.keras.layers.Layer): ...@@ -239,16 +239,7 @@ class TFCamembertEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is None and inputs_embeds is None) assert not (input_ids is None and inputs_embeds is None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -34,7 +34,7 @@ from ...modeling_tf_utils import ( ...@@ -34,7 +34,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
ModelOutput, ModelOutput,
add_start_docstrings, add_start_docstrings,
...@@ -238,16 +238,7 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer): ...@@ -238,16 +238,7 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -42,7 +42,7 @@ from ...modeling_tf_utils import ( ...@@ -42,7 +42,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
add_code_sample_docstrings, add_code_sample_docstrings,
...@@ -124,16 +124,7 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer): ...@@ -124,16 +124,7 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -32,7 +32,7 @@ from ...modeling_tf_utils import ( ...@@ -32,7 +32,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_ctrl import CTRLConfig from .configuration_ctrl import CTRLConfig
...@@ -336,16 +336,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): ...@@ -336,16 +336,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.w.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.w.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.w.vocab_size})"
),
)
inputs_embeds = self.w(input_ids, mode="embedding") inputs_embeds = self.w(input_ids, mode="embedding")
seq_len = input_shape[-1] seq_len = input_shape[-1]
mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0) mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
......
...@@ -39,7 +39,7 @@ from ...modeling_tf_utils import ( ...@@ -39,7 +39,7 @@ from ...modeling_tf_utils import (
get_initializer, get_initializer,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_deberta import DebertaConfig from .configuration_deberta import DebertaConfig
...@@ -778,16 +778,7 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer): ...@@ -778,16 +778,7 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( ...@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
get_initializer, get_initializer,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_deberta_v2 import DebertaV2Config from .configuration_deberta_v2 import DebertaV2Config
...@@ -867,16 +867,7 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer): ...@@ -867,16 +867,7 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -43,7 +43,7 @@ from ...modeling_tf_utils import ( ...@@ -43,7 +43,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
add_code_sample_docstrings, add_code_sample_docstrings,
...@@ -109,16 +109,7 @@ class TFEmbeddings(tf.keras.layers.Layer): ...@@ -109,16 +109,7 @@ class TFEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is None and inputs_embeds is None) assert not (input_ids is None and inputs_embeds is None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -44,7 +44,7 @@ from ...modeling_tf_utils import ( ...@@ -44,7 +44,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
DUMMY_INPUTS, DUMMY_INPUTS,
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
...@@ -528,16 +528,7 @@ class TFElectraEmbeddings(tf.keras.layers.Layer): ...@@ -528,16 +528,7 @@ class TFElectraEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -40,7 +40,7 @@ from ...modeling_tf_utils import ( ...@@ -40,7 +40,7 @@ from ...modeling_tf_utils import (
shape_list, shape_list,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import stable_softmax from ...tf_utils import check_embeddings_within_bounds, stable_softmax
from ...utils import logging from ...utils import logging
from .configuration_esm import EsmConfig from .configuration_esm import EsmConfig
...@@ -214,16 +214,7 @@ class TFEsmEmbeddings(Layer): ...@@ -214,16 +214,7 @@ class TFEsmEmbeddings(Layer):
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds) position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = self.word_embeddings(input_ids) inputs_embeds = self.word_embeddings(input_ids)
# Note that if we want to support ESM-1 (not 1b!) in future then we need to support an # Note that if we want to support ESM-1 (not 1b!) in future then we need to support an
......
...@@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( ...@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
ModelOutput, ModelOutput,
...@@ -578,16 +578,7 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): ...@@ -578,16 +578,7 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
# embeddings # embeddings
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.embeddings.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})"
),
)
inputs_embeds = self.embeddings(input_ids) inputs_embeds = self.embeddings(input_ids)
tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids)
......
...@@ -42,7 +42,7 @@ from ...modeling_tf_utils import ( ...@@ -42,7 +42,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
MULTIPLE_CHOICE_DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS,
ModelOutput, ModelOutput,
...@@ -109,16 +109,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): ...@@ -109,16 +109,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is not None and inputs_embeds is not None) assert not (input_ids is not None and inputs_embeds is not None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = tf.gather(self.weight, input_ids) inputs_embeds = tf.gather(self.weight, input_ids)
final_embeddings = self.LayerNorm(inputs=inputs_embeds) final_embeddings = self.LayerNorm(inputs=inputs_embeds)
......
...@@ -39,7 +39,7 @@ from ...modeling_tf_utils import ( ...@@ -39,7 +39,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import ( from ...utils import (
DUMMY_INPUTS, DUMMY_INPUTS,
ModelOutput, ModelOutput,
...@@ -437,16 +437,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -437,16 +437,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.config.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.config.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})"
),
)
inputs_embeds = self.wte(input_ids, mode="embedding") inputs_embeds = self.wte(input_ids, mode="embedding")
position_embeds = tf.gather(self.wpe, position_ids) position_embeds = tf.gather(self.wpe, position_ids)
......
...@@ -43,7 +43,7 @@ from ...modeling_tf_utils import ( ...@@ -43,7 +43,7 @@ from ...modeling_tf_utils import (
keras_serializable, keras_serializable,
unpack_inputs, unpack_inputs,
) )
from ...tf_utils import shape_list, stable_softmax from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import logging from ...utils import logging
from .configuration_gptj import GPTJConfig from .configuration_gptj import GPTJConfig
...@@ -437,16 +437,7 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): ...@@ -437,16 +437,7 @@ class TFGPTJMainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound check_embeddings_within_bounds(input_ids, self.wte.vocab_size)
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.wte.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.wte.vocab_size})"
),
)
inputs_embeds = self.wte(input_ids, mode="embedding") inputs_embeds = self.wte(input_ids, mode="embedding")
if token_type_ids is not None: if token_type_ids is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment