Unverified Commit 1b5ab39c authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

TF: check embeddings range (#19102)

parent cf6308ef
...@@ -3054,6 +3054,7 @@ class TFWrappedEmbeddings: ...@@ -3054,6 +3054,7 @@ class TFWrappedEmbeddings:
def __init__(self, layer, abs_scope_name=None): def __init__(self, layer, abs_scope_name=None):
self._layer = layer self._layer = layer
self._abs_scope_name = abs_scope_name self._abs_scope_name = abs_scope_name
self.vocab_size = self._layer.vocab_size
def call(self, inputs, mode="embedding"): def call(self, inputs, mode="embedding"):
if self._abs_scope_name is None: if self._abs_scope_name is None:
......
...@@ -190,6 +190,16 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer): ...@@ -190,6 +190,16 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -200,6 +200,16 @@ class TFBertEmbeddings(tf.keras.layers.Layer): ...@@ -200,6 +200,16 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -726,6 +726,16 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): ...@@ -726,6 +726,16 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -923,6 +933,16 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): ...@@ -923,6 +933,16 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer):
positions = self.embed_positions(input_shape, position_ids=position_ids) positions = self.embed_positions(input_shape, position_ids=position_ids)
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
hidden_states = inputs_embeds hidden_states = inputs_embeds
......
...@@ -731,6 +731,16 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): ...@@ -731,6 +731,16 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -921,6 +931,16 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): ...@@ -921,6 +931,16 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer):
past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
......
...@@ -241,6 +241,16 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer): ...@@ -241,6 +241,16 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -126,6 +126,16 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer): ...@@ -126,6 +126,16 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -338,6 +338,16 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): ...@@ -338,6 +338,16 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.w.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.w.vocab_size})"
),
)
inputs_embeds = self.w(input_ids, mode="embedding") inputs_embeds = self.w(input_ids, mode="embedding")
seq_len = input_shape[-1] seq_len = input_shape[-1]
mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0) mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
......
...@@ -783,6 +783,16 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer): ...@@ -783,6 +783,16 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -872,6 +872,16 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer): ...@@ -872,6 +872,16 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -110,6 +110,16 @@ class TFEmbeddings(tf.keras.layers.Layer): ...@@ -110,6 +110,16 @@ class TFEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is None and inputs_embeds is None) assert not (input_ids is None and inputs_embeds is None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -530,6 +530,16 @@ class TFElectraEmbeddings(tf.keras.layers.Layer): ...@@ -530,6 +530,16 @@ class TFElectraEmbeddings(tf.keras.layers.Layer):
raise ValueError("Need to provide either `input_ids` or `input_embeds`.") raise ValueError("Need to provide either `input_ids` or `input_embeds`.")
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -573,6 +573,16 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): ...@@ -573,6 +573,16 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
# embeddings # embeddings
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})"
),
)
inputs_embeds = self.embeddings(input_ids) inputs_embeds = self.embeddings(input_ids)
tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids)
......
...@@ -110,6 +110,16 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): ...@@ -110,6 +110,16 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is not None and inputs_embeds is not None) assert not (input_ids is not None and inputs_embeds is not None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(self.weight, input_ids) inputs_embeds = tf.gather(self.weight, input_ids)
final_embeddings = self.LayerNorm(inputs=inputs_embeds) final_embeddings = self.LayerNorm(inputs=inputs_embeds)
......
...@@ -442,6 +442,16 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -442,6 +442,16 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = self.wte(input_ids, mode="embedding") inputs_embeds = self.wte(input_ids, mode="embedding")
position_embeds = tf.gather(self.wpe, position_ids) position_embeds = tf.gather(self.wpe, position_ids)
......
...@@ -440,6 +440,16 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): ...@@ -440,6 +440,16 @@ class TFGPTJMainLayer(tf.keras.layers.Layer):
position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]])
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.wte.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.wte.vocab_size})"
),
)
inputs_embeds = self.wte(input_ids, mode="embedding") inputs_embeds = self.wte(input_ids, mode="embedding")
if token_type_ids is not None: if token_type_ids is not None:
......
...@@ -141,6 +141,16 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer): ...@@ -141,6 +141,16 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is None and inputs_embeds is None) assert not (input_ids is None and inputs_embeds is None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
...@@ -240,6 +240,16 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer): ...@@ -240,6 +240,16 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer):
token_type_ids = tf.zeros(input_shape, dtype=position_ids.dtype) token_type_ids = tf.zeros(input_shape, dtype=position_ids.dtype)
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.word_embeddings.input_dim, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.word_embeddings.input_dim})"
),
)
inputs_embeds = self.word_embeddings(input_ids) inputs_embeds = self.word_embeddings(input_ids)
token_type_embeddings = self.token_type_embeddings(token_type_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids)
......
...@@ -1737,6 +1737,16 @@ class TFLEDEncoder(tf.keras.layers.Layer): ...@@ -1737,6 +1737,16 @@ class TFLEDEncoder(tf.keras.layers.Layer):
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = shape_list(input_ids) input_shape = shape_list(input_ids)
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) inputs_embeds = self.embed_tokens(input_ids)
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
...@@ -2012,6 +2022,16 @@ class TFLEDDecoder(tf.keras.layers.Layer): ...@@ -2012,6 +2022,16 @@ class TFLEDDecoder(tf.keras.layers.Layer):
positions = self.embed_positions(input_shape, past_key_values_length) positions = self.embed_positions(input_shape, past_key_values_length)
if inputs_embeds is None: if inputs_embeds is None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})"
),
)
inputs_embeds = self.embed_tokens(input_ids) inputs_embeds = self.embed_tokens(input_ids)
hidden_states = inputs_embeds hidden_states = inputs_embeds
......
...@@ -540,6 +540,16 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer): ...@@ -540,6 +540,16 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
assert not (input_ids is None and inputs_embeds is None) assert not (input_ids is None and inputs_embeds is None)
if input_ids is not None: if input_ids is not None:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf.debugging.assert_less(
input_ids,
tf.cast(self.vocab_size, dtype=input_ids.dtype),
message=(
"input_ids must be smaller than the embedding layer's input dimension (got"
f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})"
),
)
inputs_embeds = tf.gather(params=self.weight, indices=input_ids) inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
input_shape = shape_list(inputs_embeds)[:-1] input_shape = shape_list(inputs_embeds)[:-1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment