Unverified Commit 790f1c95 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix template for inputs docstrings (#12976)

parent 75b8990d
...@@ -1789,7 +1789,7 @@ BIG_BIRD_START_DOCSTRING = r""" ...@@ -1789,7 +1789,7 @@ BIG_BIRD_START_DOCSTRING = r"""
BIG_BIRD_INPUTS_DOCSTRING = r""" BIG_BIRD_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.BigBirdTokenizer`. See Indices can be obtained using :class:`transformers.BigBirdTokenizer`. See
...@@ -1797,14 +1797,14 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" ...@@ -1797,14 +1797,14 @@ BIG_BIRD_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -1812,7 +1812,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" ...@@ -1812,7 +1812,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
...@@ -1823,7 +1823,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" ...@@ -1823,7 +1823,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r"""
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -1967,7 +1967,7 @@ class BigBirdModel(BigBirdPreTrainedModel): ...@@ -1967,7 +1967,7 @@ class BigBirdModel(BigBirdPreTrainedModel):
self.attention_type = value self.attention_type = value
self.encoder.set_attention_type(value) self.encoder.set_attention_type(value)
@add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -2374,7 +2374,7 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel): ...@@ -2374,7 +2374,7 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel):
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -2832,7 +2832,7 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel): ...@@ -2832,7 +2832,7 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -2940,7 +2940,7 @@ class BigBirdForQuestionAnswering(BigBirdPreTrainedModel): ...@@ -2940,7 +2940,7 @@ class BigBirdForQuestionAnswering(BigBirdPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/bigbird-base-trivia-itc", checkpoint="google/bigbird-base-trivia-itc",
......
...@@ -928,7 +928,7 @@ CANINE_START_DOCSTRING = r""" ...@@ -928,7 +928,7 @@ CANINE_START_DOCSTRING = r"""
CANINE_INPUTS_DOCSTRING = r""" CANINE_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.CanineTokenizer`. See Indices can be obtained using :class:`transformers.CanineTokenizer`. See
...@@ -936,14 +936,14 @@ CANINE_INPUTS_DOCSTRING = r""" ...@@ -936,14 +936,14 @@ CANINE_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -951,7 +951,7 @@ CANINE_INPUTS_DOCSTRING = r""" ...@@ -951,7 +951,7 @@ CANINE_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
...@@ -962,7 +962,7 @@ CANINE_INPUTS_DOCSTRING = r""" ...@@ -962,7 +962,7 @@ CANINE_INPUTS_DOCSTRING = r"""
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -1088,7 +1088,7 @@ class CanineModel(CaninePreTrainedModel): ...@@ -1088,7 +1088,7 @@ class CanineModel(CaninePreTrainedModel):
# `repeated`: [batch_size, char_seq_len, molecule_hidden_size] # `repeated`: [batch_size, char_seq_len, molecule_hidden_size]
return torch.cat([repeated, remainder_repeated], dim=-2) return torch.cat([repeated, remainder_repeated], dim=-2)
@add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1458,7 +1458,7 @@ class CanineForTokenClassification(CaninePreTrainedModel): ...@@ -1458,7 +1458,7 @@ class CanineForTokenClassification(CaninePreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1545,7 +1545,7 @@ class CanineForQuestionAnswering(CaninePreTrainedModel): ...@@ -1545,7 +1545,7 @@ class CanineForQuestionAnswering(CaninePreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -689,7 +689,7 @@ CONVBERT_START_DOCSTRING = r""" ...@@ -689,7 +689,7 @@ CONVBERT_START_DOCSTRING = r"""
CONVBERT_INPUTS_DOCSTRING = r""" CONVBERT_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.ConvBertTokenizer`. See Indices can be obtained using :class:`transformers.ConvBertTokenizer`. See
...@@ -697,7 +697,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" ...@@ -697,7 +697,7 @@ CONVBERT_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
...@@ -705,7 +705,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" ...@@ -705,7 +705,7 @@ CONVBERT_INPUTS_DOCSTRING = r"""
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -714,7 +714,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" ...@@ -714,7 +714,7 @@ CONVBERT_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
...@@ -726,7 +726,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" ...@@ -726,7 +726,7 @@ CONVBERT_INPUTS_DOCSTRING = r"""
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -1163,7 +1163,7 @@ class ConvBertForTokenClassification(ConvBertPreTrainedModel): ...@@ -1163,7 +1163,7 @@ class ConvBertForTokenClassification(ConvBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1250,7 +1250,7 @@ class ConvBertForQuestionAnswering(ConvBertPreTrainedModel): ...@@ -1250,7 +1250,7 @@ class ConvBertForQuestionAnswering(ConvBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -794,7 +794,7 @@ DEBERTA_START_DOCSTRING = r""" ...@@ -794,7 +794,7 @@ DEBERTA_START_DOCSTRING = r"""
DEBERTA_INPUTS_DOCSTRING = r""" DEBERTA_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.DebertaTokenizer`. See Indices can be obtained using :class:`transformers.DebertaTokenizer`. See
...@@ -802,14 +802,14 @@ DEBERTA_INPUTS_DOCSTRING = r""" ...@@ -802,14 +802,14 @@ DEBERTA_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -817,12 +817,12 @@ DEBERTA_INPUTS_DOCSTRING = r""" ...@@ -817,12 +817,12 @@ DEBERTA_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
`What are position IDs? <../glossary.html#position-ids>`_ `What are position IDs? <../glossary.html#position-ids>`_
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
......
...@@ -915,7 +915,7 @@ DEBERTA_START_DOCSTRING = r""" ...@@ -915,7 +915,7 @@ DEBERTA_START_DOCSTRING = r"""
DEBERTA_INPUTS_DOCSTRING = r""" DEBERTA_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.DebertaV2Tokenizer`. See Indices can be obtained using :class:`transformers.DebertaV2Tokenizer`. See
...@@ -923,14 +923,14 @@ DEBERTA_INPUTS_DOCSTRING = r""" ...@@ -923,14 +923,14 @@ DEBERTA_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -938,12 +938,12 @@ DEBERTA_INPUTS_DOCSTRING = r""" ...@@ -938,12 +938,12 @@ DEBERTA_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
`What are position IDs? <../glossary.html#position-ids>`_ `What are position IDs? <../glossary.html#position-ids>`_
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
......
...@@ -466,7 +466,7 @@ class DeiTModel(DeiTPreTrainedModel): ...@@ -466,7 +466,7 @@ class DeiTModel(DeiTPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -570,7 +570,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel): ...@@ -570,7 +570,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -707,7 +707,7 @@ class DeiTForImageClassificationWithTeacher(DeiTPreTrainedModel): ...@@ -707,7 +707,7 @@ class DeiTForImageClassificationWithTeacher(DeiTPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=DeiTForImageClassificationWithTeacherOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=DeiTForImageClassificationWithTeacherOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -774,7 +774,7 @@ class IBertModel(IBertPreTrainedModel): ...@@ -774,7 +774,7 @@ class IBertModel(IBertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -509,7 +509,7 @@ class MPNetModel(MPNetPreTrainedModel): ...@@ -509,7 +509,7 @@ class MPNetModel(MPNetPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -867,7 +867,7 @@ class MPNetForTokenClassification(MPNetPreTrainedModel): ...@@ -867,7 +867,7 @@ class MPNetForTokenClassification(MPNetPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -680,7 +680,7 @@ REMBERT_START_DOCSTRING = r""" ...@@ -680,7 +680,7 @@ REMBERT_START_DOCSTRING = r"""
REMBERT_INPUTS_DOCSTRING = r""" REMBERT_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.RemBertTokenizer`. See Indices can be obtained using :class:`transformers.RemBertTokenizer`. See
...@@ -688,14 +688,14 @@ REMBERT_INPUTS_DOCSTRING = r""" ...@@ -688,14 +688,14 @@ REMBERT_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -703,7 +703,7 @@ REMBERT_INPUTS_DOCSTRING = r""" ...@@ -703,7 +703,7 @@ REMBERT_INPUTS_DOCSTRING = r"""
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``. config.max_position_embeddings - 1]``.
...@@ -714,7 +714,7 @@ REMBERT_INPUTS_DOCSTRING = r""" ...@@ -714,7 +714,7 @@ REMBERT_INPUTS_DOCSTRING = r"""
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -772,7 +772,7 @@ class RemBertModel(RemBertPreTrainedModel): ...@@ -772,7 +772,7 @@ class RemBertModel(RemBertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="rembert", checkpoint="rembert",
...@@ -925,7 +925,7 @@ class RemBertForMaskedLM(RemBertPreTrainedModel): ...@@ -925,7 +925,7 @@ class RemBertForMaskedLM(RemBertPreTrainedModel):
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="rembert", checkpoint="rembert",
...@@ -1343,7 +1343,7 @@ class RemBertForTokenClassification(RemBertPreTrainedModel): ...@@ -1343,7 +1343,7 @@ class RemBertForTokenClassification(RemBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="rembert", checkpoint="rembert",
...@@ -1431,7 +1431,7 @@ class RemBertForQuestionAnswering(RemBertPreTrainedModel): ...@@ -1431,7 +1431,7 @@ class RemBertForQuestionAnswering(RemBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="rembert", checkpoint="rembert",
......
...@@ -730,7 +730,7 @@ class RobertaModel(RobertaPreTrainedModel): ...@@ -730,7 +730,7 @@ class RobertaModel(RobertaPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -744,7 +744,7 @@ ROFORMER_START_DOCSTRING = r""" ...@@ -744,7 +744,7 @@ ROFORMER_START_DOCSTRING = r"""
ROFORMER_INPUTS_DOCSTRING = r""" ROFORMER_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.RoFormerTokenizer`. See Indices can be obtained using :class:`transformers.RoFormerTokenizer`. See
...@@ -752,14 +752,14 @@ ROFORMER_INPUTS_DOCSTRING = r""" ...@@ -752,14 +752,14 @@ ROFORMER_INPUTS_DOCSTRING = r"""
details. details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -773,7 +773,7 @@ ROFORMER_INPUTS_DOCSTRING = r""" ...@@ -773,7 +773,7 @@ ROFORMER_INPUTS_DOCSTRING = r"""
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -832,7 +832,7 @@ class RoFormerModel(RoFormerPreTrainedModel): ...@@ -832,7 +832,7 @@ class RoFormerModel(RoFormerPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -981,7 +981,7 @@ class RoFormerForMaskedLM(RoFormerPreTrainedModel): ...@@ -981,7 +981,7 @@ class RoFormerForMaskedLM(RoFormerPreTrainedModel):
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1412,7 +1412,7 @@ class RoFormerForTokenClassification(RoFormerPreTrainedModel): ...@@ -1412,7 +1412,7 @@ class RoFormerForTokenClassification(RoFormerPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1499,7 +1499,7 @@ class RoFormerForQuestionAnswering(RoFormerPreTrainedModel): ...@@ -1499,7 +1499,7 @@ class RoFormerForQuestionAnswering(RoFormerPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -569,7 +569,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel): ...@@ -569,7 +569,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -662,7 +662,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel): ...@@ -662,7 +662,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel):
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -741,7 +741,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel): ...@@ -741,7 +741,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -839,7 +839,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel): ...@@ -839,7 +839,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward( @add_start_docstrings_to_model_forward(
SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)") SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
) )
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
...@@ -932,7 +932,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel): ...@@ -932,7 +932,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1019,7 +1019,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel): ...@@ -1019,7 +1019,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -452,7 +452,7 @@ class ViTModel(ViTPreTrainedModel): ...@@ -452,7 +452,7 @@ class ViTModel(ViTPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -555,7 +555,7 @@ class ViTForImageClassification(ViTPreTrainedModel): ...@@ -555,7 +555,7 @@ class ViTForImageClassification(ViTPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -861,7 +861,7 @@ XLNET_START_DOCSTRING = r""" ...@@ -861,7 +861,7 @@ XLNET_START_DOCSTRING = r"""
XLNET_INPUTS_DOCSTRING = r""" XLNET_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.XLNetTokenizer`. See Indices can be obtained using :class:`transformers.XLNetTokenizer`. See
......
...@@ -696,7 +696,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): ...@@ -696,7 +696,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
{{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING = r""" {{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
Indices of input sequence tokens in the vocabulary. Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.{{cookiecutter.camelcase_modelname}}Tokenizer`. Indices can be obtained using :class:`transformers.{{cookiecutter.camelcase_modelname}}Tokenizer`.
...@@ -704,14 +704,14 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): ...@@ -704,14 +704,14 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
:func:`transformers.PreTrainedTokenizer.__call__` for details. :func:`transformers.PreTrainedTokenizer.__call__` for details.
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
`What are attention masks? <../glossary.html#attention-mask>`__ `What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: 1]``:
...@@ -719,7 +719,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): ...@@ -719,7 +719,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
- 1 corresponds to a `sentence B` token. - 1 corresponds to a `sentence B` token.
`What are token type IDs? <../glossary.html#token-type-ids>`_ `What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Indices of positions of each input sequence tokens in the position embeddings.
Selected in the range ``[0, config.max_position_embeddings - 1]``. Selected in the range ``[0, config.max_position_embeddings - 1]``.
...@@ -730,7 +730,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): ...@@ -730,7 +730,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
- 1 indicates the head is **not masked**, - 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**. - 0 indicates the head is **masked**.
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix. than the model's internal embedding lookup matrix.
...@@ -788,7 +788,7 @@ class {{cookiecutter.camelcase_modelname}}Model({{cookiecutter.camelcase_modelna ...@@ -788,7 +788,7 @@ class {{cookiecutter.camelcase_modelname}}Model({{cookiecutter.camelcase_modelna
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -947,7 +947,7 @@ class {{cookiecutter.camelcase_modelname}}ForMaskedLM({{cookiecutter.camelcase_m ...@@ -947,7 +947,7 @@ class {{cookiecutter.camelcase_modelname}}ForMaskedLM({{cookiecutter.camelcase_m
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings):
self.cls.predictions.decoder = new_embeddings self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1385,7 +1385,7 @@ class {{cookiecutter.camelcase_modelname}}ForTokenClassification({{cookiecutter. ...@@ -1385,7 +1385,7 @@ class {{cookiecutter.camelcase_modelname}}ForTokenClassification({{cookiecutter.
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
...@@ -1472,7 +1472,7 @@ class {{cookiecutter.camelcase_modelname}}ForQuestionAnswering({{cookiecutter.ca ...@@ -1472,7 +1472,7 @@ class {{cookiecutter.camelcase_modelname}}ForQuestionAnswering({{cookiecutter.ca
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment