Unverified Commit 812045ad authored by Julien Plu's avatar Julien Plu Committed by GitHub
Browse files

New serving (#9419)

* Add a serving method

* Add albert

* Add serving for BERT and BART

* Add more models

* Finish the serving addition

* Temp fix

* Restore DPR

* Fix funnel attribute

* Fix attributes GPT2

* Fix OpenAIGPT attribute

* Fix T5 attributes

* Fix Bart attributes

* Fix TransfoXL attributes

* Add versioning

* better test

* Update template

* Fix Flaubert

* Fix T5

* Apply style

* Remove unused imports

* Deactivate extra parameters

* Remove too long test + saved_model default to False

* Ignore the saved model test for some models

* Fix some inputs

* Fix mpnet serving

* Trigger CI

* Address all comments
parent 390cf16b
...@@ -272,14 +272,13 @@ def booleans_processing(config, **kwargs): ...@@ -272,14 +272,13 @@ def booleans_processing(config, **kwargs):
if kwargs["output_hidden_states"] is not None if kwargs["output_hidden_states"] is not None
else config.output_hidden_states else config.output_hidden_states
) )
if "return_dict" in kwargs:
final_booleans["return_dict"] = ( final_booleans["return_dict"] = (
kwargs["return_dict"] if kwargs["return_dict"] is not None else config.return_dict kwargs["return_dict"] if kwargs["return_dict"] is not None else config.return_dict
) )
if "use_cache" in kwargs: if "use_cache" in kwargs:
final_booleans["use_cache"] = kwargs["use_cache"] if kwargs["use_cache"] is not None else config.use_cache final_booleans["use_cache"] = kwargs["use_cache"] if kwargs["use_cache"] is not None else config.use_cache
else: else:
if ( if (
kwargs["output_attentions"] is not None kwargs["output_attentions"] is not None
...@@ -294,11 +293,8 @@ def booleans_processing(config, **kwargs): ...@@ -294,11 +293,8 @@ def booleans_processing(config, **kwargs):
final_booleans["output_attentions"] = config.output_attentions final_booleans["output_attentions"] = config.output_attentions
final_booleans["output_hidden_states"] = config.output_hidden_states final_booleans["output_hidden_states"] = config.output_hidden_states
if "return_dict" in kwargs:
if kwargs["return_dict"] is not None: if kwargs["return_dict"] is not None:
logger.warning( logger.warning("The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.")
"The parameter `return_dict` cannot be set in graph mode and will always be set to `True`."
)
final_booleans["return_dict"] = True final_booleans["return_dict"] = True
if "use_cache" in kwargs: if "use_cache" in kwargs:
...@@ -568,7 +564,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -568,7 +564,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
Returns: Returns:
:obj:`Dict[str, tf.Tensor]`: The dummy inputs. :obj:`Dict[str, tf.Tensor]`: The dummy inputs.
""" """
return {"input_ids": tf.constant(DUMMY_INPUTS)} return {
"input_ids": tf.constant(DUMMY_INPUTS),
}
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super().__init__(*inputs, **kwargs) super().__init__(*inputs, **kwargs)
...@@ -584,6 +582,37 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -584,6 +582,37 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
self.config = config self.config = config
self.name_or_path = config.name_or_path self.name_or_path = config.name_or_path
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
"""
Method used for serving the model.
Args:
inputs (:obj:`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionnary of tensors.
"""
output = self.call(inputs)
return self.serving_output(output)
def serving_output(output):
"""
Prepare the output of the saved model. Each model must implement this function.
Args:
output (:obj:`~transformers.TFBaseModelOutput`):
The output returned by the model.
"""
raise NotImplementedError
def get_input_embeddings(self) -> tf.keras.layers.Layer: def get_input_embeddings(self) -> tf.keras.layers.Layer:
""" """
Returns the model's input embeddings. Returns the model's input embeddings.
...@@ -808,7 +837,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -808,7 +837,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
""" """
raise NotImplementedError raise NotImplementedError
def save_pretrained(self, save_directory): def save_pretrained(self, save_directory, saved_model=False, version=1):
""" """
Save a model and its configuration file to a directory, so that it can be re-loaded using the Save a model and its configuration file to a directory, so that it can be re-loaded using the
:func:`~transformers.TFPreTrainedModel.from_pretrained` class method. :func:`~transformers.TFPreTrainedModel.from_pretrained` class method.
...@@ -816,12 +845,23 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -816,12 +845,23 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
Arguments: Arguments:
save_directory (:obj:`str`): save_directory (:obj:`str`):
Directory to which to save. Will be created if it doesn't exist. Directory to which to save. Will be created if it doesn't exist.
saved_model (:obj:`bool`, `optional`, defaults to :obj:`False`):
If the model has to be saved in saved model format as well or not.
version (:obj:`int`, `optional`, defaults to 1):
The version of the saved model. A saved model needs to be versioned in order to be properly loaded by
TensorFlow Serving as detailed in the official documentation
https://www.tensorflow.org/tfx/serving/serving_basic
""" """
if os.path.isfile(save_directory): if os.path.isfile(save_directory):
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
return return
os.makedirs(save_directory, exist_ok=True) os.makedirs(save_directory, exist_ok=True)
if saved_model:
saved_model_dir = os.path.join(save_directory, "saved_model", str(version))
self.save(saved_model_dir, include_optimizer=False, signatures=self.serving)
logger.info(f"Saved model created in {saved_model_dir}")
# Save configuration file # Save configuration file
self.config.save_pretrained(save_directory) self.config.save_pretrained(save_directory)
...@@ -1033,7 +1073,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -1033,7 +1073,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
# Load from a PyTorch checkpoint # Load from a PyTorch checkpoint
return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True) return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
model(model.dummy_inputs, training=False) # build the network with dummy inputs model(model.dummy_inputs) # build the network with dummy inputs
assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file) assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
# 'by_name' allow us to do transfer learning by skipping/adding layers # 'by_name' allow us to do transfer learning by skipping/adding layers
...@@ -1046,7 +1086,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ...@@ -1046,7 +1086,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
"If you tried to load a TF 2.0 model from a PyTorch checkpoint, please set from_pt=True. " "If you tried to load a TF 2.0 model from a PyTorch checkpoint, please set from_pt=True. "
) )
model(model.dummy_inputs, training=False) # Make sure restore ops are run model(model.dummy_inputs) # Make sure restore ops are run
if cls._keys_to_ignore_on_load_missing is not None: if cls._keys_to_ignore_on_load_missing is not None:
for pat in cls._keys_to_ignore_on_load_missing: for pat in cls._keys_to_ignore_on_load_missing:
......
...@@ -803,6 +803,17 @@ class TFAlbertModel(TFAlbertPreTrainedModel): ...@@ -803,6 +803,17 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -928,6 +939,17 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel): ...@@ -928,6 +939,17 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFAlbertForPreTrainingOutput(
prediction_logits=output.prediction_logits,
sop_logits=output.sop_logits,
hidden_states=hs,
attentions=attns,
)
class TFAlbertSOPHead(tf.keras.layers.Layer): class TFAlbertSOPHead(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
...@@ -1058,6 +1080,16 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1058,6 +1080,16 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1154,6 +1186,16 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass ...@@ -1154,6 +1186,16 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1249,6 +1291,16 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat ...@@ -1249,6 +1291,16 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1357,6 +1409,17 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL ...@@ -1357,6 +1409,17 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1486,3 +1549,27 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1486,3 +1549,27 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
...@@ -481,6 +481,21 @@ class TFBartPretrainedModel(TFPreTrainedModel): ...@@ -481,6 +481,21 @@ class TFBartPretrainedModel(TFPreTrainedModel):
} }
return dummy_inputs return dummy_inputs
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
class TFPretrainedBartModel(TFBartPretrainedModel): class TFPretrainedBartModel(TFBartPretrainedModel):
def __init_subclass__(self): def __init_subclass__(self):
...@@ -1102,6 +1117,23 @@ class TFBartModel(TFBartPretrainedModel): ...@@ -1102,6 +1117,23 @@ class TFBartModel(TFBartPretrainedModel):
encoder_attentions=inputs["encoder_outputs"].attentions, encoder_attentions=inputs["encoder_outputs"].attentions,
) )
def serving_output(self, output):
pkv = (tf.tuple(output.past_key_values)[1] if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
return TFSeq2SeqModelOutput(
last_hidden_state=output.last_hidden_state,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
)
def get_input_embeddings(self): def get_input_embeddings(self):
return self.shared return self.shared
...@@ -1248,6 +1280,23 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel): ...@@ -1248,6 +1280,23 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
encoder_attentions=outputs.encoder_attentions, # 2 of e out encoder_attentions=outputs.encoder_attentions, # 2 of e out
) )
def serving_output(self, output):
pkv = (tf.tuple(output.past_key_values)[1] if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
return TFSeq2SeqLMOutput(
logits=output.logits,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
)
def prepare_inputs_for_generation(self, decoder_input_ids, past, attention_mask, use_cache, **kwargs) -> Dict: def prepare_inputs_for_generation(self, decoder_input_ids, past, attention_mask, use_cache, **kwargs) -> Dict:
assert past is not None and len(past) in {1, 2}, f"past has to be an iterable of length 1,2 got {past}" assert past is not None and len(past) in {1, 2}, f"past has to be an iterable of length 1,2 got {past}"
if len(past) == 1: if len(past) == 1:
......
...@@ -888,6 +888,17 @@ class TFBertModel(TFBertPreTrainedModel): ...@@ -888,6 +888,17 @@ class TFBertModel(TFBertPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -999,6 +1010,17 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): ...@@ -999,6 +1010,17 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBertForPreTrainingOutput(
prediction_logits=output.prediction_logits,
seq_relationship_logits=output.seq_relationship_logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings("""Bert Model with a `language modeling` head on top. """, BERT_START_DOCSTRING) @add_start_docstrings("""Bert Model with a `language modeling` head on top. """, BERT_START_DOCSTRING)
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
...@@ -1102,6 +1124,16 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -1102,6 +1124,16 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
...@@ -1205,6 +1237,16 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -1205,6 +1237,16 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
"""Bert Model with a `next sentence prediction (classification)` head on top. """, """Bert Model with a `next sentence prediction (classification)` head on top. """,
...@@ -1302,6 +1344,16 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi ...@@ -1302,6 +1344,16 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFNextSentencePredictorOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1397,6 +1449,16 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific ...@@ -1397,6 +1449,16 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1525,6 +1587,30 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1525,6 +1587,30 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1625,6 +1711,16 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL ...@@ -1625,6 +1711,16 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1737,3 +1833,14 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) ...@@ -1737,3 +1833,14 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -594,6 +594,18 @@ class TFCTRLModel(TFCTRLPreTrainedModel): ...@@ -594,6 +594,18 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
) )
return outputs return outputs
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPast(
last_hidden_state=output.last_hidden_state,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
class TFCTRLLMHead(tf.keras.layers.Layer): class TFCTRLLMHead(tf.keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs): def __init__(self, config, input_embeddings, **kwargs):
...@@ -729,6 +741,18 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -729,6 +741,18 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutputWithPast(
logits=output.logits,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -885,3 +909,13 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific ...@@ -885,3 +909,13 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
hidden_states=transformer_outputs.hidden_states, hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
...@@ -484,6 +484,19 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel): ...@@ -484,6 +484,19 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel):
config_class = DistilBertConfig config_class = DistilBertConfig
base_model_prefix = "distilbert" base_model_prefix = "distilbert"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
DISTILBERT_START_DOCSTRING = r""" DISTILBERT_START_DOCSTRING = r"""
...@@ -615,6 +628,16 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): ...@@ -615,6 +628,16 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
) )
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
class TFDistilBertLMHead(tf.keras.layers.Layer): class TFDistilBertLMHead(tf.keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs): def __init__(self, config, input_embeddings, **kwargs):
...@@ -730,6 +753,16 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel ...@@ -730,6 +753,16 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
attentions=distilbert_output.attentions, attentions=distilbert_output.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -824,6 +857,16 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque ...@@ -824,6 +857,16 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
attentions=distilbert_output.attentions, attentions=distilbert_output.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -908,6 +951,16 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla ...@@ -908,6 +951,16 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1031,6 +1084,29 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic ...@@ -1031,6 +1084,29 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
attentions=distilbert_output.attentions, attentions=distilbert_output.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1130,3 +1206,14 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn ...@@ -1130,3 +1206,14 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
hidden_states=distilbert_output.hidden_states, hidden_states=distilbert_output.hidden_states,
attentions=distilbert_output.attentions, attentions=distilbert_output.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -240,7 +240,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer): ...@@ -240,7 +240,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer):
self, self,
input_ids: tf.Tensor = None, input_ids: tf.Tensor = None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: Optional[tf.Tensor] = None,
output_attentions: bool = False, output_attentions: bool = False,
output_hidden_states: bool = False, output_hidden_states: bool = False,
...@@ -257,7 +256,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer): ...@@ -257,7 +256,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer):
config=self.config, config=self.config,
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids,
inputs_embeds=inputs_embeds, inputs_embeds=inputs_embeds,
output_attentions=output_attentions, output_attentions=output_attentions,
output_hidden_states=output_hidden_states, output_hidden_states=output_hidden_states,
...@@ -425,6 +423,19 @@ class TFDPRPretrainedReader(TFPreTrainedModel): ...@@ -425,6 +423,19 @@ class TFDPRPretrainedReader(TFPreTrainedModel):
config_class = DPRConfig config_class = DPRConfig
base_model_prefix = "reader" base_model_prefix = "reader"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
############### ###############
# Actual Models # Actual Models
...@@ -643,6 +654,16 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): ...@@ -643,6 +654,16 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFDPRContextEncoderOutput(
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
"The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.", "The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.",
...@@ -730,6 +751,16 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): ...@@ -730,6 +751,16 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFDPRQuestionEncoderOutput(
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
"The bare DPRReader transformer outputting span predictions.", "The bare DPRReader transformer outputting span predictions.",
...@@ -749,7 +780,6 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -749,7 +780,6 @@ class TFDPRReader(TFDPRPretrainedReader):
self, self,
input_ids=None, input_ids=None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: Optional[tf.Tensor] = None,
output_attentions: bool = None, output_attentions: bool = None,
output_hidden_states: bool = None, output_hidden_states: bool = None,
...@@ -782,7 +812,6 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -782,7 +812,6 @@ class TFDPRReader(TFDPRPretrainedReader):
config=self.config, config=self.config,
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids,
inputs_embeds=inputs_embeds, inputs_embeds=inputs_embeds,
output_attentions=output_attentions, output_attentions=output_attentions,
output_hidden_states=output_hidden_states, output_hidden_states=output_hidden_states,
...@@ -803,16 +832,24 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -803,16 +832,24 @@ class TFDPRReader(TFDPRPretrainedReader):
if inputs["attention_mask"] is None: if inputs["attention_mask"] is None:
inputs["attention_mask"] = tf.ones(input_shape, dtype=tf.dtypes.int32) inputs["attention_mask"] = tf.ones(input_shape, dtype=tf.dtypes.int32)
if inputs["token_type_ids"] is None:
inputs["token_type_ids"] = tf.zeros(input_shape, dtype=tf.dtypes.int32)
return self.span_predictor( return self.span_predictor(
input_ids=inputs["input_ids"], input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"], attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
inputs_embeds=inputs["inputs_embeds"], inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"], output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"], output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"], return_dict=inputs["return_dict"],
training=inputs["training"], training=inputs["training"],
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFDPRReaderOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
relevance_logits=output.relevance_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -800,6 +800,16 @@ class TFElectraModel(TFElectraPreTrainedModel): ...@@ -800,6 +800,16 @@ class TFElectraModel(TFElectraPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -886,6 +896,16 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): ...@@ -886,6 +896,16 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
attentions=discriminator_hidden_states.attentions, attentions=discriminator_hidden_states.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFElectraForPreTrainingOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFElectraMaskedLMHead(tf.keras.layers.Layer): class TFElectraMaskedLMHead(tf.keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs): def __init__(self, config, input_embeddings, **kwargs):
...@@ -1012,6 +1032,16 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos ...@@ -1012,6 +1032,16 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
attentions=generator_hidden_states.attentions, attentions=generator_hidden_states.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFElectraClassificationHead(tf.keras.layers.Layer): class TFElectraClassificationHead(tf.keras.layers.Layer):
"""Head for sentence-level classification tasks.""" """Head for sentence-level classification tasks."""
...@@ -1123,6 +1153,16 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla ...@@ -1123,6 +1153,16 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1249,6 +1289,30 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) ...@@ -1249,6 +1289,30 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss)
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1340,6 +1404,16 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific ...@@ -1340,6 +1404,16 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
attentions=discriminator_hidden_states.attentions, attentions=discriminator_hidden_states.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1447,3 +1521,14 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin ...@@ -1447,3 +1521,14 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
hidden_states=discriminator_hidden_states.hidden_states, hidden_states=discriminator_hidden_states.hidden_states,
attentions=discriminator_hidden_states.attentions, attentions=discriminator_hidden_states.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -288,6 +288,16 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): ...@@ -288,6 +288,16 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
# Copied from transformers.models.xlm.modeling_tf_xlm.TFXLMMultiHeadAttention with XLM->Flaubert # Copied from transformers.models.xlm.modeling_tf_xlm.TFXLMMultiHeadAttention with XLM->Flaubert
class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer): class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer):
...@@ -850,6 +860,16 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): ...@@ -850,6 +860,16 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
logits=outputs, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions logits=outputs, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFFlaubertWithLMHeadModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
......
...@@ -1189,6 +1189,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): ...@@ -1189,6 +1189,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
training=inputs["training"], training=inputs["training"],
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
"The bare Funnel Transformer Model transformer outputting raw hidden-states without any specific head on top.", "The bare Funnel Transformer Model transformer outputting raw hidden-states without any specific head on top.",
...@@ -1243,6 +1253,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ...@@ -1243,6 +1253,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
training=inputs["training"], training=inputs["training"],
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1320,6 +1340,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): ...@@ -1320,6 +1340,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
attentions=discriminator_hidden_states.attentions, attentions=discriminator_hidden_states.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFFunnelForPreTrainingOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings("""Funnel Model with a `language modeling` head on top. """, FUNNEL_START_DOCSTRING) @add_start_docstrings("""Funnel Model with a `language modeling` head on top. """, FUNNEL_START_DOCSTRING)
class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss): class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss):
...@@ -1404,6 +1434,16 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1404,6 +1434,16 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1487,6 +1527,16 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ...@@ -1487,6 +1527,16 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1602,6 +1652,30 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1602,6 +1652,30 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1688,6 +1762,16 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ...@@ -1688,6 +1762,16 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1785,3 +1869,14 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL ...@@ -1785,3 +1869,14 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -416,6 +416,19 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel): ...@@ -416,6 +416,19 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
_keys_to_ignore_on_load_unexpected = [r"h.\d+.attn.bias"] _keys_to_ignore_on_load_unexpected = [r"h.\d+.attn.bias"]
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
@dataclass @dataclass
class TFGPT2DoubleHeadsModelOutput(ModelOutput): class TFGPT2DoubleHeadsModelOutput(ModelOutput):
...@@ -617,6 +630,18 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ...@@ -617,6 +630,18 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
return outputs return outputs
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPast(
last_hidden_state=output.last_hidden_state,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -723,6 +748,18 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -723,6 +748,18 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutputWithPast(
logits=output.logits,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -861,6 +898,33 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -861,6 +898,33 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="mc_token_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFGPT2DoubleHeadsModelOutput(
logits=output.logits,
mc_logits=output.mc_logits,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1015,3 +1079,15 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific ...@@ -1015,3 +1079,15 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
hidden_states=transformer_outputs.hidden_states, hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutputWithPast(
logits=output.logits,
past_key_values=pkv,
hidden_states=hs,
attentions=attns,
)
...@@ -2028,6 +2028,25 @@ class TFLEDModel(TFLEDPreTrainedModel): ...@@ -2028,6 +2028,25 @@ class TFLEDModel(TFLEDPreTrainedModel):
encoder_global_attentions=inputs["encoder_outputs"].global_attentions, encoder_global_attentions=inputs["encoder_outputs"].global_attentions,
) )
def serving_output(self, output):
pkv = (tf.tuple(output.past_key_values)[1] if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
enc_g_attns = tf.convert_to_tensor(output.encoder_global_attentions) if self.config.output_attentions else None
return TFLEDSeq2SeqModelOutput(
last_hidden_state=output.last_hidden_state,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
encoder_global_attentions=enc_g_attns,
)
def get_input_embeddings(self): def get_input_embeddings(self):
return self.shared return self.shared
...@@ -2177,6 +2196,25 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel): ...@@ -2177,6 +2196,25 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
encoder_global_attentions=outputs.encoder_global_attentions, encoder_global_attentions=outputs.encoder_global_attentions,
) )
def serving_output(self, output):
pkv = (tf.tuple(output.past_key_values)[1] if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
enc_g_attns = tf.convert_to_tensor(output.encoder_global_attentions) if self.config.output_attentions else None
return TFLEDSeq2SeqLMOutput(
logits=output.logits,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
encoder_global_attentions=enc_g_attns,
)
def prepare_inputs_for_generation(self, decoder_input_ids, past, attention_mask, use_cache, **kwargs) -> Dict: def prepare_inputs_for_generation(self, decoder_input_ids, past, attention_mask, use_cache, **kwargs) -> Dict:
assert past is not None and len(past) in {1, 2}, f"past has to be an iterable of length 1,2 got {past}" assert past is not None and len(past) in {1, 2}, f"past has to be an iterable of length 1,2 got {past}"
if len(past) == 1: if len(past) == 1:
......
...@@ -1831,6 +1831,19 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel): ...@@ -1831,6 +1831,19 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel):
"global_attention_mask": global_attention_mask, "global_attention_mask": global_attention_mask,
} }
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
LONGFORMER_START_DOCSTRING = r""" LONGFORMER_START_DOCSTRING = r"""
...@@ -1999,6 +2012,19 @@ class TFLongformerModel(TFLongformerPreTrainedModel): ...@@ -1999,6 +2012,19 @@ class TFLongformerModel(TFLongformerPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
@add_start_docstrings( @add_start_docstrings(
"""Longformer Model with a `language modeling` head on top. """, """Longformer Model with a `language modeling` head on top. """,
...@@ -2096,6 +2122,19 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel ...@@ -2096,6 +2122,19 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
global_attentions=outputs.global_attentions, global_attentions=outputs.global_attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerMaskedLMOutput(
loss=None,
logits=output.logits,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -2226,6 +2265,19 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn ...@@ -2226,6 +2265,19 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
global_attentions=outputs.global_attentions, global_attentions=outputs.global_attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
class TFLongformerClassificationHead(tf.keras.layers.Layer): class TFLongformerClassificationHead(tf.keras.layers.Layer):
"""Head for sentence-level classification tasks.""" """Head for sentence-level classification tasks."""
...@@ -2349,6 +2401,18 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque ...@@ -2349,6 +2401,18 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque
global_attentions=outputs.global_attentions, global_attentions=outputs.global_attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -2484,6 +2548,31 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic ...@@ -2484,6 +2548,31 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic
global_attentions=outputs.global_attentions, global_attentions=outputs.global_attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -2578,3 +2667,15 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla ...@@ -2578,3 +2667,15 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla
attentions=outputs.attentions, attentions=outputs.attentions,
global_attentions=outputs.global_attentions, global_attentions=outputs.global_attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None
return TFLongformerTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
global_attentions=g_attns,
)
...@@ -851,6 +851,23 @@ class TFLxmertPreTrainedModel(TFPreTrainedModel): ...@@ -851,6 +851,23 @@ class TFLxmertPreTrainedModel(TFPreTrainedModel):
def dummy_inputs(self) -> Dict[str, tf.Tensor]: def dummy_inputs(self) -> Dict[str, tf.Tensor]:
return getattr(self, self.base_model_prefix).dummy_inputs return getattr(self, self.base_model_prefix).dummy_inputs
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"visual_feats": tf.TensorSpec((None, None, None), tf.float32, name="visual_feats"),
"visual_pos": tf.TensorSpec((None, None, None), tf.float32, name="visual_pos"),
"visual_attention_mask": tf.TensorSpec((None, None), tf.int32, name="visual_attention_mask"),
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
LXMERT_START_DOCSTRING = r""" LXMERT_START_DOCSTRING = r"""
...@@ -1014,6 +1031,24 @@ class TFLxmertModel(TFLxmertPreTrainedModel): ...@@ -1014,6 +1031,24 @@ class TFLxmertModel(TFLxmertPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
l_hs = tf.convert_to_tensor(output.language_hidden_states) if self.config.output_hidden_states else None
v_hs = tf.convert_to_tensor(output.vision_hidden_states) if self.config.output_hidden_states else None
l_attns = tf.convert_to_tensor(output.language_attentions) if self.config.output_attentions else None
v_attns = tf.convert_to_tensor(output.vision_attentions) if self.config.output_attentions else None
c_enc_attns = tf.convert_to_tensor(output.cross_encoder_attentions) if self.config.output_attentions else None
return TFLxmertModelOutput(
pooled_output=output.pooled_output,
language_output=output.language_output,
vision_output=output.vision_output,
language_hidden_states=l_hs,
vision_hidden_states=v_hs,
language_attentions=l_attns,
vision_attentions=v_attns,
cross_encoder_attentions=c_enc_attns,
)
class TFLxmertPooler(tf.keras.layers.Layer): class TFLxmertPooler(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
...@@ -1431,3 +1466,21 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel): ...@@ -1431,3 +1466,21 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
vision_attentions=lxmert_output.vision_attentions, vision_attentions=lxmert_output.vision_attentions,
cross_encoder_attentions=lxmert_output.cross_encoder_attentions, cross_encoder_attentions=lxmert_output.cross_encoder_attentions,
) )
def serving_output(self, output):
l_hs = tf.convert_to_tensor(output.language_hidden_states) if self.config.output_hidden_states else None
v_hs = tf.convert_to_tensor(output.vision_hidden_states) if self.config.output_hidden_states else None
l_attns = tf.convert_to_tensor(output.language_attentions) if self.config.output_attentions else None
v_attns = tf.convert_to_tensor(output.vision_attentions) if self.config.output_attentions else None
c_enc_attns = tf.convert_to_tensor(output.cross_encoder_attentions) if self.config.output_attentions else None
return TFLxmertForPreTrainingOutput(
prediction_logits=output.prediction_logits,
cross_relationship_score=output.cross_relationship_score,
question_answering_score=output.question_answering_score,
language_hidden_states=l_hs,
vision_hidden_states=v_hs,
language_attentions=l_attns,
vision_attentions=v_attns,
cross_encoder_attentions=c_enc_attns,
)
...@@ -1012,6 +1012,17 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel): ...@@ -1012,6 +1012,17 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1109,6 +1120,17 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel): ...@@ -1109,6 +1120,17 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMobileBertForPreTrainingOutput(
prediction_logits=output.prediction_logits,
seq_relationship_logits=output.seq_relationship_logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings("""MobileBert Model with a `language modeling` head on top. """, MOBILEBERT_START_DOCSTRING) @add_start_docstrings("""MobileBert Model with a `language modeling` head on top. """, MOBILEBERT_START_DOCSTRING)
class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModelingLoss): class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModelingLoss):
...@@ -1207,6 +1229,16 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel ...@@ -1207,6 +1229,16 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFMobileBertOnlyNSPHead(tf.keras.layers.Layer): class TFMobileBertOnlyNSPHead(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
...@@ -1314,6 +1346,16 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS ...@@ -1314,6 +1346,16 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFNextSentencePredictorOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1416,6 +1458,16 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque ...@@ -1416,6 +1458,16 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1530,6 +1582,17 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn ...@@ -1530,6 +1582,17 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1666,6 +1729,30 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic ...@@ -1666,6 +1729,30 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1767,3 +1854,13 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla ...@@ -1767,3 +1854,13 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
...@@ -71,6 +71,19 @@ class TFMPNetPreTrainedModel(TFPreTrainedModel): ...@@ -71,6 +71,19 @@ class TFMPNetPreTrainedModel(TFPreTrainedModel):
config_class = MPNetConfig config_class = MPNetConfig
base_model_prefix = "mpnet" base_model_prefix = "mpnet"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
class TFMPNetEmbeddings(tf.keras.layers.Layer): class TFMPNetEmbeddings(tf.keras.layers.Layer):
"""Construct the embeddings from word, position embeddings.""" """Construct the embeddings from word, position embeddings."""
...@@ -792,6 +805,17 @@ class TFMPNetModel(TFMPNetPreTrainedModel): ...@@ -792,6 +805,17 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
) )
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
class TFMPNetLMHead(tf.keras.layers.Layer): class TFMPNetLMHead(tf.keras.layers.Layer):
"""MPNet head for masked and permuted language modeling""" """MPNet head for masked and permuted language modeling"""
...@@ -918,6 +942,16 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -918,6 +942,16 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFMPNetClassificationHead(tf.keras.layers.Layer): class TFMPNetClassificationHead(tf.keras.layers.Layer):
"""Head for sentence-level classification tasks.""" """Head for sentence-level classification tasks."""
...@@ -1035,6 +1069,16 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif ...@@ -1035,6 +1069,16 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1159,6 +1203,29 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1159,6 +1203,29 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1254,6 +1321,16 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio ...@@ -1254,6 +1321,16 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1361,3 +1438,14 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos ...@@ -1361,3 +1438,14 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -363,6 +363,19 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel): ...@@ -363,6 +363,19 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
config_class = OpenAIGPTConfig config_class = OpenAIGPTConfig
base_model_prefix = "transformer" base_model_prefix = "transformer"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
@dataclass @dataclass
class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput):
...@@ -543,6 +556,16 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): ...@@ -543,6 +556,16 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
) )
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -636,6 +659,16 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin ...@@ -636,6 +659,16 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFCausalLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -764,6 +797,31 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): ...@@ -764,6 +797,31 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
"mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFOpenAIGPTDoubleHeadsModelOutput(
logits=output.logits,
mc_logits=output.mc_logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -914,3 +972,13 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc ...@@ -914,3 +972,13 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc
hidden_states=transformer_outputs.hidden_states, hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
...@@ -628,6 +628,19 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel): ...@@ -628,6 +628,19 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel):
config_class = RobertaConfig config_class = RobertaConfig
base_model_prefix = "roberta" base_model_prefix = "roberta"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
ROBERTA_START_DOCSTRING = r""" ROBERTA_START_DOCSTRING = r"""
...@@ -779,6 +792,17 @@ class TFRobertaModel(TFRobertaPreTrainedModel): ...@@ -779,6 +792,17 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutputWithPooling(
last_hidden_state=output.last_hidden_state,
pooler_output=output.pooler_output,
hidden_states=hs,
attentions=attns,
)
class TFRobertaLMHead(tf.keras.layers.Layer): class TFRobertaLMHead(tf.keras.layers.Layer):
"""Roberta Head for masked language modeling.""" """Roberta Head for masked language modeling."""
...@@ -906,6 +930,16 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos ...@@ -906,6 +930,16 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMaskedLMOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
class TFRobertaClassificationHead(tf.keras.layers.Layer): class TFRobertaClassificationHead(tf.keras.layers.Layer):
"""Head for sentence-level classification tasks.""" """Head for sentence-level classification tasks."""
...@@ -1022,6 +1056,16 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla ...@@ -1022,6 +1056,16 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFSequenceClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1146,6 +1190,29 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) ...@@ -1146,6 +1190,29 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
attentions=outputs.attentions, attentions=outputs.attentions,
) )
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultipleChoiceModelOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1242,6 +1309,16 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific ...@@ -1242,6 +1309,16 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTokenClassifierOutput(
logits=output.logits,
hidden_states=hs,
attentions=attns,
)
@add_start_docstrings( @add_start_docstrings(
""" """
...@@ -1349,3 +1426,14 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin ...@@ -1349,3 +1426,14 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
hidden_states=outputs.hidden_states, hidden_states=outputs.hidden_states,
attentions=outputs.attentions, attentions=outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits,
end_logits=output.end_logits,
hidden_states=hs,
attentions=attns,
)
...@@ -825,6 +825,21 @@ class TFT5PreTrainedModel(TFPreTrainedModel): ...@@ -825,6 +825,21 @@ class TFT5PreTrainedModel(TFPreTrainedModel):
} }
return dummy_inputs return dummy_inputs
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
def _shift_right(self, input_ids): def _shift_right(self, input_ids):
decoder_start_token_id = self.config.decoder_start_token_id decoder_start_token_id = self.config.decoder_start_token_id
pad_token_id = self.config.pad_token_id pad_token_id = self.config.pad_token_id
...@@ -1165,6 +1180,23 @@ class TFT5Model(TFT5PreTrainedModel): ...@@ -1165,6 +1180,23 @@ class TFT5Model(TFT5PreTrainedModel):
encoder_attentions=inputs["encoder_outputs"].attentions, encoder_attentions=inputs["encoder_outputs"].attentions,
) )
def serving_output(self, output):
pkv = (tf.convert_to_tensor(output.past_key_values[1:]) if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
return TFSeq2SeqModelOutput(
last_hidden_state=output.last_hidden_state,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
)
@add_start_docstrings("""T5 Model with a `language modeling` head on top. """, T5_START_DOCSTRING) @add_start_docstrings("""T5 Model with a `language modeling` head on top. """, T5_START_DOCSTRING)
class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModelingLoss): class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModelingLoss):
...@@ -1372,6 +1404,23 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling ...@@ -1372,6 +1404,23 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
encoder_attentions=inputs["encoder_outputs"].attentions, encoder_attentions=inputs["encoder_outputs"].attentions,
) )
def serving_output(self, output):
pkv = (tf.convert_to_tensor(output.past_key_values[1:]) if self.config.use_cache else None,)
dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None
dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None
enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
return TFSeq2SeqLMOutput(
logits=output.logits,
past_key_values=pkv,
decoder_hidden_states=dec_hs,
decoder_attentions=dec_attns,
encoder_last_hidden_state=output.encoder_last_hidden_state,
encoder_hidden_states=enc_hs,
encoder_attentions=enc_attns,
)
def prepare_inputs_for_generation(self, inputs, past, attention_mask, use_cache, **kwargs): def prepare_inputs_for_generation(self, inputs, past, attention_mask, use_cache, **kwargs):
assert past is not None, "past has to be defined for encoder_outputs" assert past is not None, "past has to be defined for encoder_outputs"
...@@ -1522,3 +1571,13 @@ class TFT5EncoderModel(TFT5PreTrainedModel): ...@@ -1522,3 +1571,13 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
hidden_states=encoder_outputs.hidden_states, hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions, attentions=encoder_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFBaseModelOutput(
last_hidden_state=output.last_hidden_state,
hidden_states=hs,
attentions=attns,
)
...@@ -659,6 +659,18 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel): ...@@ -659,6 +659,18 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
config_class = TransfoXLConfig config_class = TransfoXLConfig
base_model_prefix = "transformer" base_model_prefix = "transformer"
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
@dataclass @dataclass
class TFTransfoXLModelOutput(ModelOutput): class TFTransfoXLModelOutput(ModelOutput):
...@@ -885,6 +897,17 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): ...@@ -885,6 +897,17 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
return outputs return outputs
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTransfoXLModelOutput(
last_hidden_state=output.last_hidden_state,
mems=tf.convert_to_tensor(output.mems),
hidden_states=hs,
attentions=attns,
)
class TFTransfoXLMHead(tf.keras.layers.Layer): class TFTransfoXLMHead(tf.keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs): def __init__(self, config, input_embeddings, **kwargs):
...@@ -1002,6 +1025,17 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): ...@@ -1002,6 +1025,17 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTransfoXLLMHeadModelOutput(
prediction_scores=output.prediction_scores,
mems=tf.convert_to_tensor(output.mems),
hidden_states=hs,
attentions=attns,
)
def prepare_inputs_for_generation(self, inputs, past, **model_kwargs): def prepare_inputs_for_generation(self, inputs, past, **model_kwargs):
inputs = {"input_ids": inputs} inputs = {"input_ids": inputs}
...@@ -1156,3 +1190,14 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc ...@@ -1156,3 +1190,14 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc
hidden_states=transformer_outputs.hidden_states, hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
def serving_output(self, output):
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFTransfoXLSequenceClassifierOutputWithPast(
logits=output.logits,
mems=tf.convert_to_tensor(output.mems),
hidden_states=hs,
attentions=attns,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment