Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
cf9e7cb0
Unverified
Commit
cf9e7cb0
authored
May 17, 2023
by
Joao Gante
Committed by
GitHub
May 17, 2023
Browse files
TF: embeddings out of bounds check factored into function (#23427)
parent
45e3d649
Changes
47
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
45 additions
and
269 deletions
+45
-269
src/transformers/models/groupvit/modeling_tf_groupvit.py
src/transformers/models/groupvit/modeling_tf_groupvit.py
+2
-11
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
+2
-11
src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py
src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py
+2
-10
src/transformers/models/led/modeling_tf_led.py
src/transformers/models/led/modeling_tf_led.py
+3
-21
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
+2
-11
src/transformers/models/lxmert/modeling_tf_lxmert.py
src/transformers/models/lxmert/modeling_tf_lxmert.py
+2
-11
src/transformers/models/marian/modeling_tf_marian.py
src/transformers/models/marian/modeling_tf_marian.py
+3
-21
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
+3
-21
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
+2
-11
src/transformers/models/mpnet/modeling_tf_mpnet.py
src/transformers/models/mpnet/modeling_tf_mpnet.py
+2
-11
src/transformers/models/openai/modeling_tf_openai.py
src/transformers/models/openai/modeling_tf_openai.py
+3
-21
src/transformers/models/opt/modeling_tf_opt.py
src/transformers/models/opt/modeling_tf_opt.py
+2
-11
src/transformers/models/pegasus/modeling_tf_pegasus.py
src/transformers/models/pegasus/modeling_tf_pegasus.py
+3
-21
src/transformers/models/rembert/modeling_tf_rembert.py
src/transformers/models/rembert/modeling_tf_rembert.py
+2
-11
src/transformers/models/roberta/modeling_tf_roberta.py
src/transformers/models/roberta/modeling_tf_roberta.py
+2
-11
src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py
.../roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py
+2
-11
src/transformers/models/roformer/modeling_tf_roformer.py
src/transformers/models/roformer/modeling_tf_roformer.py
+2
-11
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
...rmers/models/speech_to_text/modeling_tf_speech_to_text.py
+2
-11
src/transformers/models/t5/modeling_tf_t5.py
src/transformers/models/t5/modeling_tf_t5.py
+2
-11
src/transformers/models/tapas/modeling_tf_tapas.py
src/transformers/models/tapas/modeling_tf_tapas.py
+2
-11
No files found.
src/transformers/models/groupvit/modeling_tf_groupvit.py
View file @
cf9e7cb0
...
...
@@ -33,7 +33,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ModelOutput
,
add_start_docstrings
,
...
...
@@ -572,16 +572,7 @@ class TFGroupViTTextEmbeddings(tf.keras.layers.Layer):
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/layoutlm/modeling_tf_layoutlm.py
View file @
cf9e7cb0
...
...
@@ -41,7 +41,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
logging
,
replace_return_docstrings
from
.configuration_layoutlm
import
LayoutLMConfig
...
...
@@ -140,16 +140,7 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py
View file @
cf9e7cb0
...
...
@@ -36,6 +36,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
check_embeddings_within_bounds
from
...utils
import
add_start_docstrings
,
add_start_docstrings_to_model_forward
,
replace_return_docstrings
from
.configuration_layoutlmv3
import
LayoutLMv3Config
...
...
@@ -240,16 +241,7 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer):
token_type_ids
=
tf
.
zeros
(
input_shape
,
dtype
=
position_ids
.
dtype
)
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
word_embeddings
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
word_embeddings
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
word_embeddings
.
input_dim
)
inputs_embeds
=
self
.
word_embeddings
(
input_ids
)
token_type_embeddings
=
self
.
token_type_embeddings
(
token_type_ids
)
...
...
src/transformers/models/led/modeling_tf_led.py
View file @
cf9e7cb0
...
...
@@ -33,7 +33,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ContextManagers
,
ModelOutput
,
...
...
@@ -1746,16 +1746,7 @@ class TFLEDEncoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
@@ -2038,16 +2029,7 @@ class TFLEDDecoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
hidden_states
=
inputs_embeds
...
...
src/transformers/models/longformer/modeling_tf_longformer.py
View file @
cf9e7cb0
...
...
@@ -34,7 +34,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
MULTIPLE_CHOICE_DUMMY_INPUTS
,
ModelOutput
,
...
...
@@ -538,16 +538,7 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/lxmert/modeling_tf_lxmert.py
View file @
cf9e7cb0
...
...
@@ -32,7 +32,7 @@ from ...modeling_tf_utils import (
shape_list
,
unpack_inputs
,
)
from
...tf_utils
import
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
stable_softmax
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -232,16 +232,7 @@ class TFLxmertEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/marian/modeling_tf_marian.py
View file @
cf9e7cb0
...
...
@@ -37,7 +37,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ContextManagers
,
add_code_sample_docstrings
,
...
...
@@ -778,16 +778,7 @@ class TFMarianEncoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
embed_pos
=
self
.
embed_positions
(
input_shape
)
...
...
@@ -990,16 +981,7 @@ class TFMarianDecoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
hidden_states
=
inputs_embeds
...
...
src/transformers/models/mbart/modeling_tf_mbart.py
View file @
cf9e7cb0
...
...
@@ -37,7 +37,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ContextManagers
,
add_code_sample_docstrings
,
...
...
@@ -770,16 +770,7 @@ class TFMBartEncoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
embed_pos
=
self
.
embed_positions
(
input_shape
)
...
...
@@ -989,16 +980,7 @@ class TFMBartDecoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
hidden_states
=
inputs_embeds
...
...
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
View file @
cf9e7cb0
...
...
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
MULTIPLE_CHOICE_DUMMY_INPUTS
,
ModelOutput
,
...
...
@@ -212,16 +212,7 @@ class TFMobileBertEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/mpnet/modeling_tf_mpnet.py
View file @
cf9e7cb0
...
...
@@ -45,7 +45,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
MULTIPLE_CHOICE_DUMMY_INPUTS
,
add_code_sample_docstrings
,
...
...
@@ -144,16 +144,7 @@ class TFMPNetEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/openai/modeling_tf_openai.py
View file @
cf9e7cb0
...
...
@@ -35,7 +35,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ModelOutput
,
add_code_sample_docstrings
,
...
...
@@ -295,30 +295,12 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
position_ids
=
tf
.
reshape
(
position_ids
,
[
-
1
,
shape_list
(
position_ids
)[
-
1
]])
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
self
.
tokens_embed
(
input_ids
,
mode
=
"embedding"
)
position_embeds
=
tf
.
gather
(
self
.
positions_embed
,
position_ids
)
if
token_type_ids
is
not
None
:
token_type_ids
=
tf
.
reshape
(
token_type_ids
,
[
-
1
,
shape_list
(
token_type_ids
)[
-
1
]])
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
token_type_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
token_type_ids
.
dtype
),
message
=
(
"token_type_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
token_type_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
token_type_ids
,
self
.
config
.
vocab_size
,
"token_type_ids"
)
token_type_embeds
=
self
.
tokens_embed
(
token_type_ids
,
mode
=
"embedding"
)
else
:
token_type_embeds
=
0
...
...
src/transformers/models/opt/modeling_tf_opt.py
View file @
cf9e7cb0
...
...
@@ -33,7 +33,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
add_code_sample_docstrings
,
add_start_docstrings
,
...
...
@@ -631,16 +631,7 @@ class TFOPTDecoder(tf.keras.layers.Layer):
past_key_values_length
=
shape_list
(
past_key_values
[
0
][
0
])[
2
]
if
past_key_values
is
not
None
else
0
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
vocab_size
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
if
attention_mask
is
None
:
...
...
src/transformers/models/pegasus/modeling_tf_pegasus.py
View file @
cf9e7cb0
...
...
@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ContextManagers
,
add_code_sample_docstrings
,
...
...
@@ -782,16 +782,7 @@ class TFPegasusEncoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
embed_pos
=
self
.
embed_positions
(
input_shape
)
...
...
@@ -997,16 +988,7 @@ class TFPegasusDecoder(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
hidden_states
=
inputs_embeds
...
...
src/transformers/models/rembert/modeling_tf_rembert.py
View file @
cf9e7cb0
...
...
@@ -45,7 +45,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
DUMMY_INPUTS
,
MULTIPLE_CHOICE_DUMMY_INPUTS
,
...
...
@@ -122,16 +122,7 @@ class TFRemBertEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/roberta/modeling_tf_roberta.py
View file @
cf9e7cb0
...
...
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
DUMMY_INPUTS
,
MULTIPLE_CHOICE_DUMMY_INPUTS
,
...
...
@@ -144,16 +144,7 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py
View file @
cf9e7cb0
...
...
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
DUMMY_INPUTS
,
MULTIPLE_CHOICE_DUMMY_INPUTS
,
...
...
@@ -149,16 +149,7 @@ class TFRobertaPreLayerNormEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/roformer/modeling_tf_roformer.py
View file @
cf9e7cb0
...
...
@@ -46,7 +46,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
MULTIPLE_CHOICE_DUMMY_INPUTS
,
add_code_sample_docstrings
,
...
...
@@ -175,16 +175,7 @@ class TFRoFormerEmbeddings(tf.keras.layers.Layer):
assert
not
(
input_ids
is
None
and
inputs_embeds
is
None
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
input_shape
=
shape_list
(
inputs_embeds
)[:
-
1
]
...
...
src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
View file @
cf9e7cb0
...
...
@@ -36,7 +36,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
add_code_sample_docstrings
,
add_start_docstrings
,
...
...
@@ -1030,16 +1030,7 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer):
past_key_values_length
=
shape_list
(
past_key_values
[
0
][
0
])[
2
]
if
past_key_values
is
not
None
else
0
if
inputs_embeds
is
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
vocab_size
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
else
:
inputs_embeds
=
inputs_embeds
...
...
src/transformers/models/t5/modeling_tf_t5.py
View file @
cf9e7cb0
...
...
@@ -40,7 +40,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
DUMMY_INPUTS
,
DUMMY_MASK
,
...
...
@@ -686,16 +686,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
if
hasattr
(
self
.
embed_tokens
,
"load_weight_prefix"
):
context
.
append
(
tf
.
name_scope
(
self
.
embed_tokens
.
load_weight_prefix
+
"/"
))
with
ContextManagers
(
context
):
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
embed_tokens
.
input_dim
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
embed_tokens
.
input_dim
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
embed_tokens
.
input_dim
)
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
batch_size
,
seq_length
=
input_shape
...
...
src/transformers/models/tapas/modeling_tf_tapas.py
View file @
cf9e7cb0
...
...
@@ -38,7 +38,7 @@ from ...modeling_tf_utils import (
keras_serializable
,
unpack_inputs
,
)
from
...tf_utils
import
shape_list
,
stable_softmax
from
...tf_utils
import
check_embeddings_within_bounds
,
shape_list
,
stable_softmax
from
...utils
import
(
ModelOutput
,
add_start_docstrings
,
...
...
@@ -231,16 +231,7 @@ class TFTapasEmbeddings(tf.keras.layers.Layer):
position_ids
=
tf
.
math
.
minimum
(
self
.
max_position_embeddings
-
1
,
position
-
first_position
)
if
input_ids
is
not
None
:
# Note: tf.gather, on which the embedding layer is based, won't check positive out of bound
# indices on GPU, returning zeros instead. This is a dangerous silent behavior.
tf
.
debugging
.
assert_less
(
input_ids
,
tf
.
cast
(
self
.
config
.
vocab_size
,
dtype
=
input_ids
.
dtype
),
message
=
(
"input_ids must be smaller than the embedding layer's input dimension (got"
f
"
{
tf
.
math
.
reduce_max
(
input_ids
)
}
>=
{
self
.
config
.
vocab_size
}
)"
),
)
check_embeddings_within_bounds
(
input_ids
,
self
.
config
.
vocab_size
)
inputs_embeds
=
tf
.
gather
(
params
=
self
.
weight
,
indices
=
input_ids
)
position_embeddings
=
tf
.
gather
(
self
.
position_embeddings
,
indices
=
position_ids
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment