# @add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
# the hidden-states output to compute `span start logits` and `span end logits`). """,
# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING)
# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING)
# class XLNetForSequenceClassification(XLNetPreTrainedModel):
# class TFXLNetForQuestionAnswering(TFXLNetPreTrainedModel):
# @add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
# @add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
# the hidden-states output to compute `span start logits` and `span end logits`). """,
# the hidden-states output to compute `span start logits` and `span end logits`). """,
# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING)
# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING)
# class XLNetForQuestionAnswering(XLNetPreTrainedModel):
# class TFXLNetForQuestionAnswering(TFXLNetPreTrainedModel):
# r"""
# r"""
# **start_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
# Labels for position (index) of the start of the labelled span for computing the token classification loss.
# Positions are clamped to the length of the sequence (`sequence_length`).
# Position outside of the sequence are not taken into account for computing the loss.
# **end_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
# Labels for position (index) of the end of the labelled span for computing the token classification loss.
# Positions are clamped to the length of the sequence (`sequence_length`).
# Position outside of the sequence are not taken into account for computing the loss.
# **is_impossible**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
# Labels whether a question has an answer or no answer (SQuAD 2.0)
# **cls_index**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
# Labels for position (index) of the classification token to use as input for computing plausibility of the answer.
# **p_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``:
# **p_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``:
# Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...).
# Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...).
# 1.0 means token should be masked. 0.0 mean token is not masked.
# 1.0 means token should be masked. 0.0 mean token is not masked.
...
@@ -1054,29 +1063,18 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
...
@@ -1054,29 +1063,18 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
@add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
the hidden-states output to compute `span start logits` and `span end logits`). """,
@add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
@add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
the hidden-states output to compute `span start logits` and `span end logits`). """,
the hidden-states output to compute `span start logits` and `span end logits`). """,