Unverified Commit de4159a3 authored by Matt's avatar Matt Committed by GitHub
Browse files

More TF int dtype fixes (#20384)

* Add a test to ensure int dummy inputs are int64

* Move the test into the existing int64 test and update a lot of existing dummies

* Fix remaining dummies

* Fix remaining dummies

* Test for int64 serving sigs as well

* Update core tests to use tf.int64

* Add better messages to the assertions

* Update all serving sigs to int64

* More sneaky hiding tf.int32s

* Add an optional int32 signature in save_pretrained

* make fixup

* Add Amy's suggestions

* Switch all serving sigs back to tf.int32

* Switch all dummies to tf.int32

* Adjust tests to check for tf.int32 instead of tf.int64

* Fix base dummy_inputs dtype

* Start casting to tf.int32 in input_processing

* Change dtype for unpack_inputs test

* Add proper tf.int32 test

* Make the alternate serving signature int64
parent 72b19ca6
......@@ -998,10 +998,10 @@ class TFLayoutLMv3PreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"bbox": tf.TensorSpec((None, None, 4), tf.int64, name="bbox"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"bbox": tf.TensorSpec((None, None, 4), tf.int32, name="bbox"),
"pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......
......@@ -1323,10 +1323,10 @@ class TFLEDPreTrainedModel(TFPreTrainedModel):
@property
def dummy_inputs(self):
input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0]], dtype=tf.int64)
input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0]], dtype=tf.int32)
# make sure global layers are initialized
attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0]], dtype=tf.int64)
global_attention_mask = tf.convert_to_tensor([[0, 0, 0, 0, 1], [0, 0, 1, 0, 0]], dtype=tf.int64)
attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0]], dtype=tf.int32)
global_attention_mask = tf.convert_to_tensor([[0, 0, 0, 0, 1], [0, 0, 1, 0, 0]], dtype=tf.int32)
dummy_inputs = {
"input_ids": input_ids,
"attention_mask": attention_mask,
......@@ -1338,10 +1338,10 @@ class TFLEDPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
......
......@@ -1884,14 +1884,14 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel):
@property
def dummy_inputs(self):
input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int64)
input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int32)
# make sure global layers are initialized
attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int64)
attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32)
global_attention_mask = tf.convert_to_tensor(
[[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int64
[[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int32
)
global_attention_mask = tf.convert_to_tensor(
[[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int64
[[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int32
)
return {
"input_ids": input_ids,
......@@ -1902,8 +1902,8 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......@@ -2497,9 +2497,9 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic
@property
def dummy_inputs(self):
input_ids = tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int64)
input_ids = tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)
# make sure global layers are initialized
global_attention_mask = tf.convert_to_tensor([[[0, 0, 0, 1], [0, 0, 0, 1]]] * 2, dtype=tf.int64)
global_attention_mask = tf.convert_to_tensor([[[0, 0, 0, 1], [0, 0, 0, 1]]] * 2, dtype=tf.int32)
return {"input_ids": input_ids, "global_attention_mask": global_attention_mask}
@unpack_inputs
......@@ -2591,8 +2591,8 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
}
]
)
......
......@@ -655,7 +655,7 @@ class TFLxmertMainLayer(tf.keras.layers.Layer):
"""
batch_size = 2
num_visual_features = 10
input_ids = tf.constant([[3, 5, 6], [2, 3, 4]])
input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32)
visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim))
visual_pos = tf.random.uniform((batch_size, num_visual_features, 4))
......@@ -817,12 +817,12 @@ class TFLxmertPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"visual_feats": tf.TensorSpec((None, None, None), tf.float32, name="visual_feats"),
"visual_pos": tf.TensorSpec((None, None, None), tf.float32, name="visual_pos"),
"visual_attention_mask": tf.TensorSpec((None, None), tf.int64, name="visual_attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"),
"visual_attention_mask": tf.TensorSpec((None, None), tf.int32, name="visual_attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
......@@ -1246,7 +1246,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
"""
batch_size = 2
num_visual_features = 10
input_ids = tf.constant([[3, 5, 6], [2, 3, 4]])
input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32)
visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim))
visual_pos = tf.random.uniform((batch_size, num_visual_features, 4))
......
......@@ -507,7 +507,7 @@ class TFMarianPreTrainedModel(TFPreTrainedModel):
decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32)
dummy_inputs = {
"decoder_input_ids": decoder_input_ids,
"attention_mask": tf.math.not_equal(input_ids, pad_token),
"attention_mask": tf.cast(input_ids != pad_token, tf.int32),
"input_ids": input_ids,
}
return dummy_inputs
......
......@@ -474,7 +474,7 @@ class TFMBartPreTrainedModel(TFPreTrainedModel):
decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32)
dummy_inputs = {
"decoder_input_ids": decoder_input_ids,
"attention_mask": tf.math.not_equal(input_ids, pad_token),
"attention_mask": tf.cast(input_ids != pad_token, tf.int32),
"input_ids": input_ids,
}
return dummy_inputs
......
......@@ -1549,7 +1549,7 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)}
@unpack_inputs
@add_start_docstrings_to_model_forward(
......
......@@ -79,8 +79,8 @@ class TFMPNetPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......@@ -992,7 +992,7 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)}
@unpack_inputs
@add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
......
......@@ -379,8 +379,8 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......
......@@ -415,9 +415,9 @@ class TFOPTPreTrainedModel(TFPreTrainedModel):
@property
def dummy_inputs(self):
pad_token = 1
input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32)
input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32)
dummy_inputs = {
"attention_mask": tf.math.not_equal(input_ids, pad_token),
"attention_mask": tf.cast(input_ids != pad_token, tf.int32),
"input_ids": input_ids,
}
return dummy_inputs
......@@ -425,8 +425,8 @@ class TFOPTPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......
......@@ -505,11 +505,11 @@ class TFPegasusPreTrainedModel(TFPreTrainedModel):
@property
def dummy_inputs(self):
pad_token = 1
input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32)
decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32)
input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32)
decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32)
dummy_inputs = {
"decoder_input_ids": decoder_input_ids,
"attention_mask": tf.math.not_equal(input_ids, pad_token),
"attention_mask": tf.cast(input_ids != pad_token, tf.int32),
"input_ids": input_ids,
}
return dummy_inputs
......
......@@ -830,7 +830,7 @@ class TFRemBertPreTrainedModel(TFPreTrainedModel):
Returns:
`Dict[str, tf.Tensor]`: The dummy inputs.
"""
dummy = {"input_ids": tf.constant(DUMMY_INPUTS)}
dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)}
# Add `encoder_hidden_states` to make the cross-attention layers' weights initialized
if self.config.add_cross_attention:
batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape
......@@ -1361,7 +1361,7 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss)
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)}
@unpack_inputs
@add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
......@@ -1445,9 +1445,9 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss)
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"),
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
......
......@@ -795,7 +795,7 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel):
Returns:
`Dict[str, tf.Tensor]`: The dummy inputs.
"""
dummy = {"input_ids": tf.constant(DUMMY_INPUTS)}
dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)}
# Add `encoder_hidden_states` to make the cross-attention layers' weights initialized
if self.config.add_cross_attention:
batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape
......@@ -808,8 +808,8 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
......@@ -1436,7 +1436,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)}
@unpack_inputs
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
......
......@@ -1141,7 +1141,7 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)}
@unpack_inputs
@add_start_docstrings_to_model_forward(
......@@ -1221,9 +1221,9 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"),
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
......
......@@ -595,9 +595,9 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel):
input_signature=[
{
"input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
......
......@@ -878,8 +878,8 @@ class TFT5PreTrainedModel(TFPreTrainedModel):
@property
def dummy_inputs(self):
inputs = tf.constant(DUMMY_INPUTS)
input_mask = tf.constant(DUMMY_MASK)
inputs = tf.constant(DUMMY_INPUTS, dtype=tf.int32)
input_mask = tf.constant(DUMMY_MASK, dtype=tf.int32)
dummy_inputs = {
"input_ids": inputs,
"decoder_input_ids": inputs,
......@@ -890,10 +890,10 @@ class TFT5PreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
......@@ -1575,7 +1575,7 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
@property
def dummy_inputs(self):
return {"input_ids": tf.constant(DUMMY_INPUTS)}
return {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)}
def get_encoder(self):
return self.encoder
......
......@@ -865,9 +865,9 @@ class TFTapasPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
......
......@@ -687,7 +687,7 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"),
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
}
]
)
......
......@@ -261,7 +261,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
Returns:
`Dict[str, tf.Tensor]`: The dummy inputs.
"""
decoder_input_ids = tf.constant(DUMMY_INPUTS)
decoder_input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32)
batch_size, seq_len = decoder_input_ids.shape
VISION_DUMMY_INPUTS = tf.random.uniform(
......
......@@ -1345,8 +1345,8 @@ class TFWav2Vec2PreTrainedModel(TFPreTrainedModel):
input_signature=[
{
"input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
"attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment