"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "31b0560ab4e5d5d3652dd931c11e630dbfbb3900"
Unverified Commit 2831826b authored by Minh Chien Vu's avatar Minh Chien Vu Committed by GitHub
Browse files

Add Doc Test for BERT (#16523)



* Add doctest BERT

* make fixup

* fix typo

* change checkpoints

* make fixup

* define doctest output value, update doctest for mobilebert

* solve fix-copies

* update QA target start index and end index

* change checkpoint for docs and reuse defined variable

* Update src/transformers/models/bert/modeling_tf_bert.py
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>

* Apply suggestions from code review
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>

* Apply suggestions from code review
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>

* make fixup
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>
parent 098b0026
...@@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased" ...@@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased"
_CONFIG_FOR_DOC = "BertConfig" _CONFIG_FOR_DOC = "BertConfig"
_TOKENIZER_FOR_DOC = "BertTokenizer" _TOKENIZER_FOR_DOC = "BertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "deepset/bert-base-cased-squad2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 7.41
_QA_TARGET_START_INDEX = 14
_QA_TARGET_END_INDEX = 15
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "textattack/bert-base-uncased-yelp-polarity"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = 0.01
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased", "bert-base-uncased",
"bert-large-uncased", "bert-large-uncased",
...@@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel): ...@@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=CausalLMOutputWithCrossAttentions,
config_class=_CONFIG_FOR_DOC,
)
def forward( def forward(
self, self,
input_ids: Optional[torch.Tensor] = None, input_ids: Optional[torch.Tensor] = None,
...@@ -1176,48 +1201,27 @@ class BertLMHeadModel(BertPreTrainedModel): ...@@ -1176,48 +1201,27 @@ class BertLMHeadModel(BertPreTrainedModel):
) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]: ) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
r""" r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
if the model is configured as a decoder. the model is configured as a decoder.
encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*): encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`: the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
are ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]`
config.vocab_size]`
past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
decoding.
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. `decoder_input_ids` of shape `(batch_size, sequence_length)`.
use_cache (`bool`, *optional*): use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
(see `past_key_values`). `past_key_values`).
Returns:
Example:
```python
>>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> config = BertConfig.from_pretrained("bert-base-cased")
>>> config.is_decoder = True
>>> model = BertLMHeadModel.from_pretrained("bert-base-cased", config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
>>> prediction_logits = outputs.logits
```
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None: if labels is not None:
...@@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput, output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.88,
) )
def forward( def forward(
self, self,
...@@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput, output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
...@@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput, output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
...@@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput, output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
......
...@@ -65,10 +65,29 @@ from .configuration_bert import BertConfig ...@@ -65,10 +65,29 @@ from .configuration_bert import BertConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "bert-base-cased" _CHECKPOINT_FOR_DOC = "bert-base-uncased"
_CONFIG_FOR_DOC = "BertConfig" _CONFIG_FOR_DOC = "BertConfig"
_TOKENIZER_FOR_DOC = "BertTokenizer" _TOKENIZER_FOR_DOC = "BertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "ydshieh/bert-base-cased-squad2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 7.41
_QA_TARGET_START_INDEX = 14
_QA_TARGET_END_INDEX = 15
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/bert-base-uncased-yelp-polarity"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = 0.01
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased", "bert-base-uncased",
"bert-large-uncased", "bert-large-uncased",
...@@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): ...@@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") >>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased") >>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[ >>> input_ids = tokenizer("Hello, my dog is cute", add_special_tokens=True, return_tensors="tf")
... None, : >>> # Batch size 1
>>> ] # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2] >>> prediction_logits, seq_relationship_logits = outputs[:2]
```""" ```"""
outputs = self.bert( outputs = self.bert(
input_ids=input_ids, input_ids=input_ids,
...@@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFMaskedLMOutput, output_type=TFMaskedLMOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.88,
) )
def call( def call(
self, self,
...@@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific ...@@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=TFSequenceClassifierOutput, output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
) )
def call( def call(
self, self,
...@@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL ...@@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TFTokenClassifierOutput, output_type=TFTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
) )
def call( def call(
self, self,
...@@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) ...@@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_QA,
output_type=TFQuestionAnsweringModelOutput, output_type=TFQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
) )
def call( def call(
self, self,
......
...@@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased" ...@@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
_CONFIG_FOR_DOC = "MobileBertConfig" _CONFIG_FOR_DOC = "MobileBertConfig"
_TOKENIZER_FOR_DOC = "MobileBertTokenizer" _TOKENIZER_FOR_DOC = "MobileBertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "mrm8488/mobilebert-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "csarron/mobilebert-uncased-squad-v2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 3.98
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 13
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "lordtt13/emo-mobilebert"
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"] MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
...@@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel): ...@@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased") >>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased") >>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze( >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
... 0 >>> # Batch size 1
>>> ) # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
>>> prediction_logits = outputs.prediction_logits >>> prediction_logits = outputs.prediction_logits
...@@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): ...@@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput, output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.57,
) )
def forward( def forward(
self, self,
...@@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel): ...@@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput, output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
...@@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel): ...@@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput, output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
...@@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel): ...@@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput, output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
) )
def forward( def forward(
self, self,
......
...@@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased" ...@@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
_CONFIG_FOR_DOC = "MobileBertConfig" _CONFIG_FOR_DOC = "MobileBertConfig"
_TOKENIZER_FOR_DOC = "MobileBertTokenizer" _TOKENIZER_FOR_DOC = "MobileBertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 3.98
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 13
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert"
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/mobilebert-uncased", "google/mobilebert-uncased",
# See all MobileBERT models at https://huggingface.co/models?filter=mobilebert # See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
...@@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel ...@@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFMaskedLMOutput, output_type=TFMaskedLMOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.57,
) )
def call( def call(
self, self,
...@@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque ...@@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=TFSequenceClassifierOutput, output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
) )
def call( def call(
self, self,
...@@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn ...@@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_QA,
output_type=TFQuestionAnsweringModelOutput, output_type=TFQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
) )
def call( def call(
self, self,
...@@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla ...@@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TFTokenClassifierOutput, output_type=TFTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
) )
def call( def call(
self, self,
......
...@@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx ...@@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx
src/transformers/generation_utils.py src/transformers/generation_utils.py
src/transformers/models/bart/modeling_bart.py src/transformers/models/bart/modeling_bart.py
src/transformers/models/beit/modeling_beit.py src/transformers/models/beit/modeling_beit.py
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_tf_bert.py
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
src/transformers/models/blenderbot/modeling_blenderbot.py src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
...@@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py ...@@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py
src/transformers/models/hubert/modeling_hubert.py src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/marian/modeling_marian.py src/transformers/models/marian/modeling_marian.py
src/transformers/models/mbart/modeling_mbart.py src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mobilebert/modeling_mobilebert.py
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
src/transformers/models/pegasus/modeling_pegasus.py src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/plbart/modeling_plbart.py src/transformers/models/plbart/modeling_plbart.py
src/transformers/models/poolformer/modeling_poolformer.py src/transformers/models/poolformer/modeling_poolformer.py
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment