Unverified Commit 8e838466 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update serving code to enable `saved_model=True` (#18153)



* Add serving_output and serving methods to some vision models

* Add serving outputs for DeiT

* Don't convert hidden states - differing shapes

* Make saveable

* Fix up

* Make swin saveable

* Add in tests

* Fix funnel tests (can't convert to tensor)

* Fix numpy call

* Tidy up a bit

* Add in hidden states - resnet

* Remove numpy

* Fix failing tests - tensor shape and skipping tests

* Remove duplicated function

* PR comments - formatting and var names

* PR comments
Add suggestions made by Joao Gante:
* Use tf.shape instead of shape_list
* Use @tooslow decorator on tests
* Simplify some of the logic

* PR comments
Address Yih-Dar Sheih comments - making tensor names consistent and make types float

* Types consistent with docs; disable test on swin (slow)

* CI trigger

* Change input_features to float32

* Add serving_output for segformer

* Fixup
Co-authored-by: default avatarAmy Roberts <amyeroberts@users.noreply.github.com>
parent 07505358
...@@ -383,7 +383,8 @@ class TFConvNextPreTrainedModel(TFPreTrainedModel): ...@@ -383,7 +383,8 @@ class TFConvNextPreTrainedModel(TFPreTrainedModel):
inputs (`Dict[str, tf.Tensor]`): inputs (`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionary of tensors. The input of the saved model as a dictionary of tensors.
""" """
return self.call(inputs) output = self.call(inputs)
return self.serving_output(output)
CONVNEXT_START_DOCSTRING = r""" CONVNEXT_START_DOCSTRING = r"""
...@@ -492,6 +493,14 @@ class TFConvNextModel(TFConvNextPreTrainedModel): ...@@ -492,6 +493,14 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
) )
def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=output.hidden_states,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -584,3 +593,7 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas ...@@ -584,3 +593,7 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
logits=logits, logits=logits,
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
) )
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)
...@@ -801,8 +801,8 @@ class TFData2VecVisionPreTrainedModel(TFPreTrainedModel): ...@@ -801,8 +801,8 @@ class TFData2VecVisionPreTrainedModel(TFPreTrainedModel):
inputs (`Dict[str, tf.Tensor]`): inputs (`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionary of tensors. The input of the saved model as a dictionary of tensors.
""" """
output = self.call(inputs)
return self.call(inputs) return self.serving_output(output)
DATA2VEC_VISION_START_DOCSTRING = r""" DATA2VEC_VISION_START_DOCSTRING = r"""
...@@ -910,6 +910,17 @@ class TFData2VecVisionModel(TFData2VecVisionPreTrainedModel): ...@@ -910,6 +910,17 @@ class TFData2VecVisionModel(TFData2VecVisionPreTrainedModel):
return outputs return outputs
def serving_output(self, output: TFData2VecVisionModelOutputWithPooling) -> TFData2VecVisionModelOutputWithPooling:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFData2VecVisionModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hidden_states,
attentions=attentions,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -983,6 +994,12 @@ class TFData2VecVisionForImageClassification(TFData2VecVisionPreTrainedModel, TF ...@@ -983,6 +994,12 @@ class TFData2VecVisionForImageClassification(TFData2VecVisionPreTrainedModel, TF
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
class TFData2VecVisionConvModule(tf.keras.layers.Layer): class TFData2VecVisionConvModule(tf.keras.layers.Layer):
""" """
...@@ -1443,3 +1460,9 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel): ...@@ -1443,3 +1460,9 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
hidden_states=outputs.hidden_states if output_hidden_states else None, hidden_states=outputs.hidden_states if output_hidden_states else None,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSemanticSegmenterOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
...@@ -193,7 +193,7 @@ class TFDeiTPatchEmbeddings(tf.keras.layers.Layer): ...@@ -193,7 +193,7 @@ class TFDeiTPatchEmbeddings(tf.keras.layers.Layer):
raise ValueError( raise ValueError(
"Make sure that the channel dimension of the pixel values match with the one set in the configuration." "Make sure that the channel dimension of the pixel values match with the one set in the configuration."
) )
if height != self.image_size[0] or width != self.image_size[1]: if tf.executing_eagerly() and (height != self.image_size[0] or width != self.image_size[1]):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})."
) )
...@@ -680,14 +680,14 @@ class TFDeiTModel(TFDeiTPreTrainedModel): ...@@ -680,14 +680,14 @@ class TFDeiTModel(TFDeiTPreTrainedModel):
return outputs return outputs
def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling( return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state, last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output, pooler_output=output.pooler_output,
hidden_states=hs, hidden_states=hidden_states,
attentions=attns, attentions=attentions,
) )
...@@ -864,6 +864,12 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel): ...@@ -864,6 +864,12 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -961,6 +967,12 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati ...@@ -961,6 +967,12 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFImageClassifierOutput) -> TFImageClassifierOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFImageClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1041,3 +1053,17 @@ class TFDeiTForImageClassificationWithTeacher(TFDeiTPreTrainedModel): ...@@ -1041,3 +1053,17 @@ class TFDeiTForImageClassificationWithTeacher(TFDeiTPreTrainedModel):
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(
self, output: TFDeiTForImageClassificationWithTeacherOutput
) -> TFDeiTForImageClassificationWithTeacherOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFDeiTForImageClassificationWithTeacherOutput(
logits=output.logits,
cls_logits=output.cls_logits,
distillation_logits=output.distillation_logits,
hidden_states=hidden_states,
attentions=attentions,
)
...@@ -1127,12 +1127,14 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): ...@@ -1127,12 +1127,14 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
training=training, training=training,
) )
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output
def serving_output(self, output): def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFBaseModelOutput(
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) last_hidden_state=output.last_hidden_state,
hidden_states=output.hidden_states,
attentions=output.attentions,
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1175,12 +1177,14 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ...@@ -1175,12 +1177,14 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
training=training, training=training,
) )
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output
def serving_output(self, output): def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFBaseModelOutput(
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) last_hidden_state=output.last_hidden_state,
hidden_states=output.hidden_states,
attentions=output.attentions,
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1249,10 +1253,11 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): ...@@ -1249,10 +1253,11 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
) )
def serving_output(self, output): def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFFunnelForPreTrainingOutput(
return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=hs, attentions=attns) logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
@add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING) @add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING)
...@@ -1322,12 +1327,10 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1322,12 +1327,10 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
attentions=outputs.attentions, attentions=outputs.attentions,
) )
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output
def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFMaskedLMOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
@add_start_docstrings( @add_start_docstrings(
...@@ -1398,12 +1401,12 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ...@@ -1398,12 +1401,12 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
attentions=outputs.attentions, attentions=outputs.attentions,
) )
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFSequenceClassifierOutput(
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1503,9 +1506,9 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1503,9 +1506,9 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
@tf.function( @tf.function(
input_signature=[ input_signature=[
{ {
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
} }
] ]
) )
...@@ -1514,12 +1517,12 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1514,12 +1517,12 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
return self.serving_output(output=output) return self.serving_output(output=output)
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output
def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFMultipleChoiceModelOutput(
return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1592,12 +1595,12 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ...@@ -1592,12 +1595,12 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
attentions=outputs.attentions, attentions=outputs.attentions,
) )
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output
def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFTokenClassifierOutput(
return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1683,11 +1686,12 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL ...@@ -1683,11 +1686,12 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
attentions=outputs.attentions, attentions=outputs.attentions,
) )
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None # different dimensions
return TFQuestionAnsweringModelOutput( return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=output.hidden_states,
attentions=output.attentions,
) )
...@@ -227,12 +227,13 @@ def _compute_mask_indices( ...@@ -227,12 +227,13 @@ def _compute_mask_indices(
f" `sequence_length`: {sequence_length}`" f" `sequence_length`: {sequence_length}`"
) )
# compute number of masked spans in batch # compute number of masked spans in batch
num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,))
num_masked_spans = max(num_masked_spans, min_masks) num_masked_spans = tf.maximum(num_masked_spans, min_masks)
num_masked_spans = tf.cast(num_masked_spans, tf.int32)
# make sure num masked indices <= sequence_length # make sure num masked indices <= sequence_length
if num_masked_spans * mask_length > sequence_length: num_masked_spans = tf.math.minimum(sequence_length // mask_length, num_masked_spans)
num_masked_spans = sequence_length // mask_length num_masked_spans = tf.squeeze(num_masked_spans)
# SpecAugment mask to fill # SpecAugment mask to fill
spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32) spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32)
...@@ -256,7 +257,7 @@ def _compute_mask_indices( ...@@ -256,7 +257,7 @@ def _compute_mask_indices(
# scatter indices to mask # scatter indices to mask
spec_aug_mask = _scatter_values_on_batch_indices( spec_aug_mask = _scatter_values_on_batch_indices(
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, tf.shape(spec_aug_mask)
) )
return spec_aug_mask return spec_aug_mask
...@@ -1319,7 +1320,15 @@ class TFHubertPreTrainedModel(TFPreTrainedModel): ...@@ -1319,7 +1320,15 @@ class TFHubertPreTrainedModel(TFPreTrainedModel):
"to train/fine-tine this model, you need a GPU or a TPU" "to train/fine-tine this model, you need a GPU or a TPU"
) )
@tf.function @tf.function(
input_signature=[
{
"input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs): def serving(self, inputs):
output = self.call(input_values=inputs, training=False) output = self.call(input_values=inputs, training=False)
...@@ -1511,10 +1520,11 @@ class TFHubertModel(TFHubertPreTrainedModel): ...@@ -1511,10 +1520,11 @@ class TFHubertModel(TFHubertPreTrainedModel):
return outputs return outputs
def serving_output(self, output): def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) last_hidden_state=output.last_hidden_state, hidden_states=hidden_states, attentions=attentions
)
@add_start_docstrings( @add_start_docstrings(
...@@ -1685,6 +1695,6 @@ class TFHubertForCTC(TFHubertPreTrainedModel): ...@@ -1685,6 +1695,6 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
) )
def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
...@@ -371,7 +371,8 @@ class TFRegNetPreTrainedModel(TFPreTrainedModel): ...@@ -371,7 +371,8 @@ class TFRegNetPreTrainedModel(TFPreTrainedModel):
inputs (`Dict[str, tf.Tensor]`): inputs (`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionary of tensors. The input of the saved model as a dictionary of tensors.
""" """
return self.call(inputs) output = self.call(inputs)
return self.serving_output(output)
REGNET_START_DOCSTRING = r""" REGNET_START_DOCSTRING = r"""
...@@ -444,6 +445,16 @@ class TFRegNetModel(TFRegNetPreTrainedModel): ...@@ -444,6 +445,16 @@ class TFRegNetModel(TFRegNetPreTrainedModel):
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
) )
def serving_output(
self, output: TFBaseModelOutputWithPoolingAndNoAttention
) -> TFBaseModelOutputWithPoolingAndNoAttention:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFBaseModelOutputWithPoolingAndNoAttention(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=output.hidden_states,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -506,3 +517,7 @@ class TFRegNetForImageClassification(TFRegNetPreTrainedModel, TFSequenceClassifi ...@@ -506,3 +517,7 @@ class TFRegNetForImageClassification(TFRegNetPreTrainedModel, TFSequenceClassifi
return ((loss,) + output) if loss is not None else output return ((loss,) + output) if loss is not None else output
return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states) return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)
...@@ -263,10 +263,7 @@ class TFResNetEncoder(tf.keras.layers.Layer): ...@@ -263,10 +263,7 @@ class TFResNetEncoder(tf.keras.layers.Layer):
if not return_dict: if not return_dict:
return tuple(v for v in [hidden_state, hidden_states] if v is not None) return tuple(v for v in [hidden_state, hidden_states] if v is not None)
return TFBaseModelOutputWithNoAttention( return TFBaseModelOutputWithNoAttention(last_hidden_state=hidden_state, hidden_states=hidden_states)
last_hidden_state=hidden_state,
hidden_states=hidden_states,
)
class TFResNetPreTrainedModel(TFPreTrainedModel): class TFResNetPreTrainedModel(TFPreTrainedModel):
...@@ -288,6 +285,17 @@ class TFResNetPreTrainedModel(TFPreTrainedModel): ...@@ -288,6 +285,17 @@ class TFResNetPreTrainedModel(TFPreTrainedModel):
VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32) VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32)
return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)}
@tf.function(
input_signature=[
{
"pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
RESNET_START_DOCSTRING = r""" RESNET_START_DOCSTRING = r"""
This model is a TensorFlow This model is a TensorFlow
...@@ -413,6 +421,16 @@ class TFResNetModel(TFResNetPreTrainedModel): ...@@ -413,6 +421,16 @@ class TFResNetModel(TFResNetPreTrainedModel):
) )
return resnet_outputs return resnet_outputs
def serving_output(
self, output: TFBaseModelOutputWithPoolingAndNoAttention
) -> TFBaseModelOutputWithPoolingAndNoAttention:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFBaseModelOutputWithPoolingAndNoAttention(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=output.hidden_states,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -477,3 +495,7 @@ class TFResNetForImageClassification(TFResNetPreTrainedModel, TFSequenceClassifi ...@@ -477,3 +495,7 @@ class TFResNetForImageClassification(TFResNetPreTrainedModel, TFSequenceClassifi
return (loss,) + output if loss is not None else output return (loss,) + output if loss is not None else output
return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states) return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention:
# hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFImageClassifierOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states)
...@@ -544,7 +544,9 @@ class TFSegformerPreTrainedModel(TFPreTrainedModel): ...@@ -544,7 +544,9 @@ class TFSegformerPreTrainedModel(TFPreTrainedModel):
inputs (`Dict[str, tf.Tensor]`): inputs (`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionary of tensors. The input of the saved model as a dictionary of tensors.
""" """
return self.call(inputs) output = self.call(inputs)
return self.serving_output(output)
SEGFORMER_START_DOCSTRING = r""" SEGFORMER_START_DOCSTRING = r"""
...@@ -628,6 +630,14 @@ class TFSegformerModel(TFSegformerPreTrainedModel): ...@@ -628,6 +630,14 @@ class TFSegformerModel(TFSegformerPreTrainedModel):
) )
return outputs return outputs
def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput:
# hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=output.hidden_states,
attentions=output.attentions,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -692,6 +702,12 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl ...@@ -692,6 +702,12 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions
) )
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
# hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSequenceClassifierOutput(
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
class TFSegformerMLP(tf.keras.layers.Layer): class TFSegformerMLP(tf.keras.layers.Layer):
""" """
...@@ -876,3 +892,9 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel): ...@@ -876,3 +892,9 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel):
hidden_states=outputs.hidden_states if output_hidden_states else None, hidden_states=outputs.hidden_states if output_hidden_states else None,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
# hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSemanticSegmenterOutput(
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
)
...@@ -143,7 +143,8 @@ class TFConv1dSubsampler(tf.keras.layers.Layer): ...@@ -143,7 +143,8 @@ class TFConv1dSubsampler(tf.keras.layers.Layer):
] ]
def call(self, input_features: tf.Tensor) -> tf.Tensor: def call(self, input_features: tf.Tensor) -> tf.Tensor:
hidden_states = tf.identity(input_features) # TF Conv1D assumes Batch x Time x Channels, same as the input # TF Conv1D assumes Batch x Time x Channels, same as the input
hidden_states = tf.cast(input_features, tf.float32)
for i, conv in enumerate(self.conv_layers): for i, conv in enumerate(self.conv_layers):
# equivalent to `padding=k // 2` on PT's `nn.Conv1d` # equivalent to `padding=k // 2` on PT's `nn.Conv1d`
pad_len = self.kernel_sizes[i] // 2 pad_len = self.kernel_sizes[i] // 2
...@@ -187,23 +188,20 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(tf.keras.layers.Layer): ...@@ -187,23 +188,20 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(tf.keras.layers.Layer):
# zero pad # zero pad
emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1) emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1)
if padding_idx is not None: if padding_idx is not None:
emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, emb.shape[1])), emb[padding_idx + 1 :, :]], axis=0) emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, tf.shape(emb)[1])), emb[padding_idx + 1 :, :]], axis=0)
return emb return emb
def _resize_embeddings(self):
"""Recreates (and effectivelly resizes) the sinusoidal embeddings"""
self.embeddings = self.add_weight(
name="weights", # name also used in PT
shape=self.embedding_weights.shape,
)
self.embeddings.assign(self.embedding_weights)
def build(self, input_shape: tf.TensorShape): def build(self, input_shape: tf.TensorShape):
""" """
Build shared token embedding layer Shared weights logic adapted from Build shared token embedding layer Shared weights logic adapted from
https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24 https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24
""" """
self._resize_embeddings() self.embeddings = self.add_weight(
name="weights", # name also used in PT
shape=tf.shape(self.embedding_weights),
trainable=False,
)
self.embeddings.assign(self.embedding_weights)
super().build(input_shape) super().build(input_shape)
def call(self, input_ids: tf.Tensor, past_key_values_length: int = 0) -> tf.Tensor: def call(self, input_ids: tf.Tensor, past_key_values_length: int = 0) -> tf.Tensor:
...@@ -215,7 +213,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(tf.keras.layers.Layer): ...@@ -215,7 +213,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(tf.keras.layers.Layer):
max_pos = self.padding_idx + 1 + seq_len max_pos = self.padding_idx + 1 + seq_len
if max_pos > shape_list(self.embeddings)[0]: if max_pos > shape_list(self.embeddings)[0]:
self.embedding_weights = self._get_embedding(max_pos + self.offset, self.embedding_dim, self.padding_idx) self.embedding_weights = self._get_embedding(max_pos + self.offset, self.embedding_dim, self.padding_idx)
self._resize_embeddings() self.embeddings.assign(self.embedding_weights)
return tf.reshape(tf.gather(self.embeddings, tf.reshape(position_ids, (-1,)), axis=0), (bsz, seq_len, -1)) return tf.reshape(tf.gather(self.embeddings, tf.reshape(position_ids, (-1,)), axis=0), (bsz, seq_len, -1))
@staticmethod @staticmethod
...@@ -608,7 +606,7 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel): ...@@ -608,7 +606,7 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel):
@tf.function( @tf.function(
input_signature=[ input_signature=[
{ {
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
...@@ -791,7 +789,6 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer): ...@@ -791,7 +789,6 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer):
), ),
axis=-1, axis=-1,
) )
attention_mask = tf.scatter_nd(indices=indices, updates=tf.ones(bsz), shape=[bsz, feature_vector_length]) attention_mask = tf.scatter_nd(indices=indices, updates=tf.ones(bsz), shape=[bsz, feature_vector_length])
attention_mask = tf.cast(tf.reverse(tf.math.cumsum(tf.reverse(attention_mask, [-1]), -1), [-1]), tf.int64) attention_mask = tf.cast(tf.reverse(tf.math.cumsum(tf.reverse(attention_mask, [-1]), -1), [-1]), tf.int64)
return attention_mask return attention_mask
...@@ -845,10 +842,10 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer): ...@@ -845,10 +842,10 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer):
# subsample attention mask if necessary # subsample attention mask if necessary
if attention_mask is not None: if attention_mask is not None:
attention_mask = self._get_feature_vector_attention_mask(inputs_embeds.shape[1], attention_mask) attention_mask = self._get_feature_vector_attention_mask(tf.shape(inputs_embeds)[1], attention_mask)
padding_mask = tf.cast(tf.math.not_equal(attention_mask, 1), tf.int64) padding_mask = tf.cast(tf.math.not_equal(attention_mask, 1), tf.int64)
else: else:
padding_mask = tf.zeros(inputs_embeds.shape[:-1], dtype=tf.int64) padding_mask = tf.zeros(tf.shape(inputs_embeds)[:-1], dtype=tf.int64)
embed_pos = self.embed_positions(padding_mask) embed_pos = self.embed_positions(padding_mask)
...@@ -942,22 +939,6 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): ...@@ -942,22 +939,6 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer):
def set_embed_tokens(self, embed_tokens): def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens self.embed_tokens = embed_tokens
def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
# create causal mask
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
combined_attention_mask = None
if input_shape[-1] > 1:
combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
if attention_mask is not None:
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
expanded_attn_mask = _expand_mask(attention_mask, tgt_len=input_shape[-1])
combined_attention_mask = (
expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
)
return combined_attention_mask
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
...@@ -1053,10 +1034,17 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): ...@@ -1053,10 +1034,17 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer):
else: else:
inputs_embeds = inputs_embeds inputs_embeds = inputs_embeds
attention_mask = self._prepare_decoder_attention_mask( # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
attention_mask, input_shape, inputs_embeds, past_key_values_length if input_shape[-1] > 1:
combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
else:
combined_attention_mask = _expand_mask(
tf.ones((input_shape[0], input_shape[1] + past_key_values_length)), tgt_len=input_shape[-1]
) )
if attention_mask is not None:
combined_attention_mask = combined_attention_mask + _expand_mask(attention_mask, tgt_len=input_shape[-1])
# expand encoder attention mask # expand encoder attention mask
if encoder_hidden_states is not None and encoder_attention_mask is not None: if encoder_hidden_states is not None and encoder_attention_mask is not None:
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
...@@ -1100,7 +1088,7 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): ...@@ -1100,7 +1088,7 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer):
hidden_states, layer_self_attn, layer_cross_attn, present_key_value = decoder_layer( hidden_states, layer_self_attn, layer_cross_attn, present_key_value = decoder_layer(
hidden_states, hidden_states,
attention_mask=attention_mask, attention_mask=combined_attention_mask,
encoder_hidden_states=encoder_hidden_states, encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask, encoder_attention_mask=encoder_attention_mask,
layer_head_mask=head_mask[idx] if head_mask is not None else None, layer_head_mask=head_mask[idx] if head_mask is not None else None,
...@@ -1203,7 +1191,7 @@ class TFSpeech2TextMainLayer(tf.keras.layers.Layer): ...@@ -1203,7 +1191,7 @@ class TFSpeech2TextMainLayer(tf.keras.layers.Layer):
# downsample encoder attention mask # downsample encoder attention mask
if attention_mask is not None: if attention_mask is not None:
encoder_attention_mask = self.encoder._get_feature_vector_attention_mask( encoder_attention_mask = self.encoder._get_feature_vector_attention_mask(
encoder_outputs[0].shape[1], attention_mask tf.shape(encoder_outputs[0])[1], attention_mask
) )
else: else:
encoder_attention_mask = None encoder_attention_mask = None
...@@ -1465,8 +1453,8 @@ class TFSpeech2TextForConditionalGeneration(TFSpeech2TextPreTrainedModel, TFCaus ...@@ -1465,8 +1453,8 @@ class TFSpeech2TextForConditionalGeneration(TFSpeech2TextPreTrainedModel, TFCaus
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
return TFSeq2SeqModelOutput( return TFSeq2SeqLMOutput(
last_hidden_state=output.last_hidden_state, logits=output.logits,
past_key_values=pkv, past_key_values=pkv,
decoder_hidden_states=dec_hs, decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns, decoder_attentions=dec_attns,
......
...@@ -227,7 +227,7 @@ def window_reverse(windows: tf.Tensor, window_size: int, height: int, width: int ...@@ -227,7 +227,7 @@ def window_reverse(windows: tf.Tensor, window_size: int, height: int, width: int
Merges windows to produce higher resolution features. Merges windows to produce higher resolution features.
""" """
x = shape_list(windows)[0] x = shape_list(windows)[0]
y = tf.cast(height * width / window_size / window_size, tf.int32) y = tf.cast(height * width / (window_size * window_size), tf.int32)
batch_size = int(x / y) batch_size = int(x / y)
windows = tf.reshape( windows = tf.reshape(
windows, (batch_size, height // window_size, width // window_size, window_size, window_size, -1) windows, (batch_size, height // window_size, width // window_size, window_size, window_size, -1)
...@@ -541,7 +541,7 @@ class TFSwinSelfAttention(tf.keras.layers.Layer): ...@@ -541,7 +541,7 @@ class TFSwinSelfAttention(tf.keras.layers.Layer):
attention_scores = attention_scores + tf.expand_dims(relative_position_bias, 0) attention_scores = attention_scores + tf.expand_dims(relative_position_bias, 0)
if attention_mask is not None: if attention_mask is not None:
# Apply the attention mask is (precomputed for all layers in SwinModel forward() function) # Apply the attention mask is (precomputed for all layers in SwinModel call() function)
mask_shape = shape_list(attention_mask)[0] mask_shape = shape_list(attention_mask)[0]
attention_scores = tf.reshape( attention_scores = tf.reshape(
attention_scores, (batch_size // mask_shape, mask_shape, self.num_attention_heads, dim, dim) attention_scores, (batch_size // mask_shape, mask_shape, self.num_attention_heads, dim, dim)
...@@ -647,10 +647,10 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -647,10 +647,10 @@ class TFSwinLayer(tf.keras.layers.Layer):
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
self.chunk_size_feed_forward = config.chunk_size_feed_forward self.chunk_size_feed_forward = config.chunk_size_feed_forward
self.shift_size = shift_size min_res = tf.reduce_min(input_resolution)
self.window_size = config.window_size self.window_size = min_res if min_res <= config.window_size else config.window_size
self.shift_size = 0 if min_res <= self.window_size else shift_size
self.input_resolution = input_resolution self.input_resolution = input_resolution
self.set_shift_and_window_size(input_resolution)
self.layernorm_before = tf.keras.layers.LayerNormalization( self.layernorm_before = tf.keras.layers.LayerNormalization(
epsilon=config.layer_norm_eps, name="layernorm_before" epsilon=config.layer_norm_eps, name="layernorm_before"
...@@ -659,7 +659,7 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -659,7 +659,7 @@ class TFSwinLayer(tf.keras.layers.Layer):
self.drop_path = ( self.drop_path = (
TFSwinDropPath(config.drop_path_rate, name="drop_path") TFSwinDropPath(config.drop_path_rate, name="drop_path")
if config.drop_path_rate > 0.0 if config.drop_path_rate > 0.0
else tf.identity(name="drop_path") else tf.keras.layers.Activation("linear", name="drop_path")
) )
self.layernorm_after = tf.keras.layers.LayerNormalization( self.layernorm_after = tf.keras.layers.LayerNormalization(
epsilon=config.layer_norm_eps, name="layernorm_after" epsilon=config.layer_norm_eps, name="layernorm_after"
...@@ -667,36 +667,20 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -667,36 +667,20 @@ class TFSwinLayer(tf.keras.layers.Layer):
self.intermediate = TFSwinIntermediate(config, dim, name="intermediate") self.intermediate = TFSwinIntermediate(config, dim, name="intermediate")
self.swin_output = TFSwinOutput(config, dim, name="output") self.swin_output = TFSwinOutput(config, dim, name="output")
def set_shift_and_window_size(self, input_resolution: Tuple[int, int]) -> None: def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> Optional[tf.Tensor]:
if min(input_resolution) <= self.window_size:
# if window size is larger than input resolution, we don't partition windows
self.shift_size = 0
self.window_size = min(input_resolution)
def get_attn_mask(self, height: int, width: int) -> Optional[tf.Tensor]:
if self.shift_size > 0:
# calculate attention mask for SW-MSA
img_mask = tf.zeros((height, width)) img_mask = tf.zeros((height, width))
height_slices = ( height_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1))
(0, -self.window_size), width_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1))
(-self.window_size, -self.shift_size),
(-self.shift_size, -1),
)
width_slices = (
(0, -self.window_size),
(-self.window_size, -self.shift_size),
(-self.shift_size, -1),
)
# calculate attention mask for SW-MSA
if shift_size > 0:
count = 0 count = 0
for height_slice in height_slices: for height_slice in height_slices:
for width_slice in width_slices: for width_slice in width_slices:
indices = [ height_inds = tf.range(height_slice[0] % height, height_slice[1] % height + 1)
[i, j] width_inds = tf.range(width_slice[0] % width, width_slice[1] % width + 1)
for i in range(height_slice[0] % height, height_slice[1] % height + 1) indices = tf.reshape(tf.stack(tf.meshgrid(height_inds, width_inds), axis=-1), (-1, 2))
for j in range(width_slice[0] % width, width_slice[1] % width + 1) if len(indices) >= 1:
]
if indices:
updates = tf.ones((len(indices),), dtype=img_mask.dtype) * count updates = tf.ones((len(indices),), dtype=img_mask.dtype) * count
img_mask = tf.tensor_scatter_nd_update(img_mask, indices, updates) img_mask = tf.tensor_scatter_nd_update(img_mask, indices, updates)
count += 1 count += 1
...@@ -709,14 +693,12 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -709,14 +693,12 @@ class TFSwinLayer(tf.keras.layers.Layer):
attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2) attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2)
attn_mask = tf.where(attn_mask != 0, float(-100.0), attn_mask) attn_mask = tf.where(attn_mask != 0, float(-100.0), attn_mask)
attn_mask = tf.where(attn_mask == 0, float(0.0), attn_mask) attn_mask = tf.where(attn_mask == 0, float(0.0), attn_mask)
else:
attn_mask = None
return attn_mask return attn_mask
def maybe_pad(self, hidden_states: tf.Tensor, height: int, width: int) -> Tuple[tf.Tensor, tf.Tensor]: def maybe_pad(self, hidden_states: tf.Tensor, height: int, width: int) -> Tuple[tf.Tensor, tf.Tensor]:
pad_right = (self.window_size - width % self.window_size) % self.window_size pad_right = (self.window_size - width % self.window_size) % self.window_size
pad_bottom = (self.window_size - height % self.window_size) % self.window_size pad_bottom = (self.window_size - height % self.window_size) % self.window_size
pad_values = tf.constant([[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]]) pad_values = [[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]]
hidden_states = tf.pad(hidden_states, pad_values) hidden_states = tf.pad(hidden_states, pad_values)
pad_values = tf.reshape(pad_values, (-1,)) pad_values = tf.reshape(pad_values, (-1,))
return hidden_states, pad_values return hidden_states, pad_values
...@@ -729,7 +711,11 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -729,7 +711,11 @@ class TFSwinLayer(tf.keras.layers.Layer):
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
self.set_shift_and_window_size(input_dimensions) # if window size is larger than input resolution, we don't partition windows
min_res = tf.reduce_min(input_dimensions)
shift_size = 0 if min_res <= self.window_size else self.shift_size
window_size = min_res if min_res <= self.window_size else self.window_size
height, width = input_dimensions height, width = input_dimensions
batch_size, _, channels = shape_list(hidden_states) batch_size, _, channels = shape_list(hidden_states)
shortcut = hidden_states shortcut = hidden_states
...@@ -741,15 +727,17 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -741,15 +727,17 @@ class TFSwinLayer(tf.keras.layers.Layer):
_, height_pad, width_pad, _ = shape_list(hidden_states) _, height_pad, width_pad, _ = shape_list(hidden_states)
# cyclic shift # cyclic shift
if self.shift_size > 0: if shift_size > 0:
shifted_hidden_states = tf.roll(hidden_states, shift=(-self.shift_size, -self.shift_size), axis=(1, 2)) shifted_hidden_states = tf.roll(hidden_states, shift=(-shift_size, -shift_size), axis=(1, 2))
else: else:
shifted_hidden_states = hidden_states shifted_hidden_states = hidden_states
# partition windows # partition windows
hidden_states_windows = window_partition(shifted_hidden_states, self.window_size) hidden_states_windows = window_partition(shifted_hidden_states, window_size)
hidden_states_windows = tf.reshape(hidden_states_windows, (-1, self.window_size * self.window_size, channels)) hidden_states_windows = tf.reshape(hidden_states_windows, (-1, window_size * window_size, channels))
attn_mask = self.get_attn_mask(height_pad, width_pad) attn_mask = self.get_attn_mask(
height=height_pad, width=width_pad, window_size=window_size, shift_size=shift_size
)
attention_outputs = self.attention( attention_outputs = self.attention(
hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions, training=training hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions, training=training
...@@ -757,12 +745,12 @@ class TFSwinLayer(tf.keras.layers.Layer): ...@@ -757,12 +745,12 @@ class TFSwinLayer(tf.keras.layers.Layer):
attention_output = attention_outputs[0] attention_output = attention_outputs[0]
attention_windows = tf.reshape(attention_output, (-1, self.window_size, self.window_size, channels)) attention_windows = tf.reshape(attention_output, (-1, window_size, window_size, channels))
shifted_windows = window_reverse(attention_windows, self.window_size, height_pad, width_pad) shifted_windows = window_reverse(attention_windows, window_size, height_pad, width_pad)
# reverse cyclic shift # reverse cyclic shift
if self.shift_size > 0: if shift_size > 0:
attention_windows = tf.roll(shifted_windows, shift=(self.shift_size, self.shift_size), axis=(1, 2)) attention_windows = tf.roll(shifted_windows, shift=(shift_size, shift_size), axis=(1, 2))
else: else:
attention_windows = shifted_windows attention_windows = shifted_windows
...@@ -961,6 +949,17 @@ class TFSwinPreTrainedModel(TFPreTrainedModel): ...@@ -961,6 +949,17 @@ class TFSwinPreTrainedModel(TFPreTrainedModel):
) )
return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)}
@tf.function(
input_signature=[
{
"pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
SWIN_START_DOCSTRING = r""" SWIN_START_DOCSTRING = r"""
This model is a Tensorflow This model is a Tensorflow
...@@ -1223,6 +1222,16 @@ class TFSwinModel(TFSwinPreTrainedModel): ...@@ -1223,6 +1222,16 @@ class TFSwinModel(TFSwinPreTrainedModel):
return swin_outputs return swin_outputs
def serving_output(self, output: TFSwinModelOutput) -> TFSwinModelOutput:
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSwinModelOutput(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=output.hidden_states,
attentions=output.attentions,
reshaped_hidden_states=output.reshaped_hidden_states,
)
class TFSwinPixelShuffle(tf.keras.layers.Layer): class TFSwinPixelShuffle(tf.keras.layers.Layer):
"""TF layer implementation of torch.nn.PixelShuffle""" """TF layer implementation of torch.nn.PixelShuffle"""
...@@ -1377,6 +1386,15 @@ class TFSwinForMaskedImageModeling(TFSwinPreTrainedModel): ...@@ -1377,6 +1386,15 @@ class TFSwinForMaskedImageModeling(TFSwinPreTrainedModel):
reshaped_hidden_states=outputs.reshaped_hidden_states, reshaped_hidden_states=outputs.reshaped_hidden_states,
) )
def serving_output(self, output: TFSwinMaskedImageModelingOutput) -> TFSwinMaskedImageModelingOutput:
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSwinMaskedImageModelingOutput(
logits=output.logits,
hidden_states=output.hidden_states,
attentions=output.attentions,
reshaped_hidden_states=output.reshaped_hidden_states,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1396,7 +1414,7 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati ...@@ -1396,7 +1414,7 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati
self.classifier = ( self.classifier = (
tf.keras.layers.Dense(config.num_labels, name="classifier") tf.keras.layers.Dense(config.num_labels, name="classifier")
if config.num_labels > 0 if config.num_labels > 0
else tf.identity(name="classifier") else tf.keras.layers.Activation("linear", name="classifier")
) )
@add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
...@@ -1452,3 +1470,12 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati ...@@ -1452,3 +1470,12 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati
attentions=outputs.attentions, attentions=outputs.attentions,
reshaped_hidden_states=outputs.reshaped_hidden_states, reshaped_hidden_states=outputs.reshaped_hidden_states,
) )
def serving_output(self, output: TFSwinImageClassifierOutput) -> TFSwinImageClassifierOutput:
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
return TFSwinImageClassifierOutput(
logits=output.logits,
hidden_states=output.hidden_states,
attentions=output.attentions,
reshaped_hidden_states=output.reshaped_hidden_states,
)
...@@ -862,6 +862,19 @@ class TFTapasPreTrainedModel(TFPreTrainedModel): ...@@ -862,6 +862,19 @@ class TFTapasPreTrainedModel(TFPreTrainedModel):
config_class = TapasConfig config_class = TapasConfig
base_model_prefix = "tapas" base_model_prefix = "tapas"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
TAPAS_START_DOCSTRING = r""" TAPAS_START_DOCSTRING = r"""
...@@ -1021,14 +1034,14 @@ class TFTapasModel(TFTapasPreTrainedModel): ...@@ -1021,14 +1034,14 @@ class TFTapasModel(TFTapasPreTrainedModel):
return outputs return outputs
def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling( return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state, last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output, pooler_output=output.pooler_output,
hidden_states=hs, hidden_states=hidden_states,
attentions=attns, attentions=attentions,
) )
...@@ -1128,10 +1141,10 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -1128,10 +1141,10 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
) )
def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) return TFMaskedLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
class TFTapasComputeTokenLogits(tf.keras.layers.Layer): class TFTapasComputeTokenLogits(tf.keras.layers.Layer):
...@@ -1557,11 +1570,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): ...@@ -1557,11 +1570,14 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
) )
def serving_output(self, output: TFTableQuestionAnsweringOutput) -> TFTableQuestionAnsweringOutput: def serving_output(self, output: TFTableQuestionAnsweringOutput) -> TFTableQuestionAnsweringOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTableQuestionAnsweringOutput( return TFTableQuestionAnsweringOutput(
logits=output.logits, logits_aggregation=output.logits_aggregation, hidden_states=hs, attentions=attns logits=output.logits,
logits_aggregation=output.logits_aggregation,
hidden_states=hidden_states,
attentions=attentions,
) )
...@@ -1667,10 +1683,10 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif ...@@ -1667,10 +1683,10 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
) )
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
""" TAPAS utilities.""" """ TAPAS utilities."""
......
...@@ -722,8 +722,8 @@ class TFViTMAEPreTrainedModel(TFPreTrainedModel): ...@@ -722,8 +722,8 @@ class TFViTMAEPreTrainedModel(TFPreTrainedModel):
inputs (`Dict[str, tf.Tensor]`): inputs (`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionary of tensors. The input of the saved model as a dictionary of tensors.
""" """
output = self.call(inputs)
return self.call(inputs) return self.serving_output(output)
VIT_MAE_START_DOCSTRING = r""" VIT_MAE_START_DOCSTRING = r"""
...@@ -842,6 +842,18 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel): ...@@ -842,6 +842,18 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel):
return outputs return outputs
def serving_output(self, output: TFViTMAEModelOutput) -> TFViTMAEModelOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFViTMAEModelOutput(
last_hidden_state=output.last_hidden_state,
mask=output.mask,
ids_restore=output.ids_restore,
hidden_states=hidden_states,
attentions=attentions,
)
class TFViTMAEDecoder(tf.keras.layers.Layer): class TFViTMAEDecoder(tf.keras.layers.Layer):
def __init__(self, config, num_patches, **kwargs): def __init__(self, config, num_patches, **kwargs):
...@@ -1143,3 +1155,15 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel): ...@@ -1143,3 +1155,15 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel):
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output: TFViTMAEForPreTrainingOutput) -> TFViTMAEForPreTrainingOutput:
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFViTMAEForPreTrainingOutput(
logits=output.logits,
mask=output.mask,
ids_restore=output.ids_restore,
hidden_states=hidden_states,
attentions=attentions,
)
...@@ -268,12 +268,13 @@ def _compute_mask_indices( ...@@ -268,12 +268,13 @@ def _compute_mask_indices(
f" `sequence_length`: {sequence_length}`" f" `sequence_length`: {sequence_length}`"
) )
# compute number of masked spans in batch # compute number of masked spans in batch
num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,))
num_masked_spans = max(num_masked_spans, min_masks) num_masked_spans = tf.maximum(num_masked_spans, min_masks)
num_masked_spans = tf.cast(num_masked_spans, tf.int32)
# make sure num masked indices <= sequence_length # make sure num masked indices <= sequence_length
if num_masked_spans * mask_length > sequence_length: num_masked_spans = tf.math.minimum(sequence_length // mask_length, num_masked_spans)
num_masked_spans = sequence_length // mask_length num_masked_spans = tf.squeeze(num_masked_spans)
# SpecAugment mask to fill # SpecAugment mask to fill
spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32) spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32)
...@@ -297,7 +298,7 @@ def _compute_mask_indices( ...@@ -297,7 +298,7 @@ def _compute_mask_indices(
# scatter indices to mask # scatter indices to mask
spec_aug_mask = _scatter_values_on_batch_indices( spec_aug_mask = _scatter_values_on_batch_indices(
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, tf.shape(spec_aug_mask)
) )
return spec_aug_mask return spec_aug_mask
...@@ -1352,7 +1353,15 @@ class TFWav2Vec2PreTrainedModel(TFPreTrainedModel): ...@@ -1352,7 +1353,15 @@ class TFWav2Vec2PreTrainedModel(TFPreTrainedModel):
"to train/fine-tine this model, you need a GPU or a TPU" "to train/fine-tine this model, you need a GPU or a TPU"
) )
@tf.function @tf.function(
input_signature=[
{
"input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs): def serving(self, inputs):
output = self.call(input_values=inputs, training=False) output = self.call(input_values=inputs, training=False)
...@@ -1544,14 +1553,14 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel): ...@@ -1544,14 +1553,14 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
return outputs return outputs
def serving_output(self, output): def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFWav2Vec2BaseModelOutput( return TFWav2Vec2BaseModelOutput(
last_hidden_state=output.last_hidden_state, last_hidden_state=output.last_hidden_state,
extract_features=output.extract_features, extract_features=output.extract_features,
hidden_states=hs, hidden_states=hidden_states,
attentions=attns, attentions=attentions,
) )
...@@ -1726,6 +1735,6 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel): ...@@ -1726,6 +1735,6 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
) )
def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
...@@ -18,7 +18,7 @@ import unittest ...@@ -18,7 +18,7 @@ import unittest
import numpy as np import numpy as np
from transformers import BartConfig, BartTokenizer, is_tf_available from transformers import BartConfig, BartTokenizer, is_tf_available
from transformers.testing_utils import require_tf, slow from transformers.testing_utils import require_tf, slow, tooslow
from transformers.utils import cached_property from transformers.utils import cached_property
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -293,8 +293,8 @@ class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestC ...@@ -293,8 +293,8 @@ class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestC
models_equal = False models_equal = False
self.assertTrue(models_equal) self.assertTrue(models_equal)
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import unittest import unittest
from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available
from transformers.testing_utils import require_tf, require_tokenizers, slow from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow
from transformers.utils import cached_property from transformers.utils import cached_property
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -213,8 +213,8 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -213,8 +213,8 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
name = model.get_bias() name = model.get_bias()
assert name is None assert name is None
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
def test_resize_token_embeddings(self): def test_resize_token_embeddings(self):
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import unittest import unittest
from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available
from transformers.testing_utils import require_tf, require_tokenizers, slow from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow
from transformers.utils import cached_property from transformers.utils import cached_property
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -278,8 +278,8 @@ class TFBlenderbotSmallModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -278,8 +278,8 @@ class TFBlenderbotSmallModelTest(TFModelTesterMixin, unittest.TestCase):
models_equal = False models_equal = False
self.assertTrue(models_equal) self.assertTrue(models_equal)
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
......
...@@ -606,11 +606,21 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -606,11 +606,21 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase):
model = TFCLIPModel.from_pretrained(model_name) model = TFCLIPModel.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
@unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.")
@slow
def test_saved_model_creation(self):
pass
@unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.") @unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.")
@slow @slow
def test_saved_model_creation_extended(self): def test_saved_model_creation_extended(self):
pass pass
@unittest.skip(reason="`saved_model` doesn't work with nested outputs so no preparation happens.")
@slow
def test_prepare_serving_output(self):
pass
# We will verify our results on an image of cute cats # We will verify our results on an image of cute cats
def prepare_img(): def prepare_img():
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import unittest import unittest
from transformers import FunnelConfig, is_tf_available from transformers import FunnelConfig, is_tf_available
from transformers.testing_utils import require_tf from transformers.testing_utils import require_tf, tooslow
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
...@@ -371,8 +371,8 @@ class TFFunnelModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -371,8 +371,8 @@ class TFFunnelModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs) self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
def test_compile_tf_model(self): def test_compile_tf_model(self):
...@@ -407,6 +407,6 @@ class TFFunnelBaseModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -407,6 +407,6 @@ class TFFunnelBaseModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import unittest import unittest
from transformers import LEDConfig, is_tf_available from transformers import LEDConfig, is_tf_available
from transformers.testing_utils import require_tf, slow from transformers.testing_utils import require_tf, slow, tooslow
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
...@@ -365,8 +365,8 @@ class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -365,8 +365,8 @@ class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase):
# TODO JP: Make LED XLA compliant # TODO JP: Make LED XLA compliant
pass pass
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
def test_generate_with_headmasking(self): def test_generate_with_headmasking(self):
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import unittest import unittest
from transformers import is_tf_available from transformers import is_tf_available
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
...@@ -326,8 +326,8 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -326,8 +326,8 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
@tooslow
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI
pass pass
def test_xla_mode(self): def test_xla_mode(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment