Fix documention of book in LayoutLM (#9017)

35bffd70 · Sylvain Gugger · GitHub · c95de29e · 35bffd70
Unverified Commit 35bffd70 authored Dec 10, 2020 by Sylvain Gugger Committed by GitHub Dec 10, 2020
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 10 deletions

src/transformers/models/layoutlm/modeling_layoutlm.py src/transformers/models/layoutlm/modeling_layoutlm.py +8 -10

No files found.
--- a/src/transformers/models/layoutlm/modeling_layoutlm.py
+++ b/src/transformers/models/layoutlm/modeling_layoutlm.py
@@ -562,7 +562,7 @@ LAYOUTLM_START_DOCSTRING = r"""

 LAYOUTLM_INPUTS_DOCSTRING = r"""
    Args:
-        input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`):
+        input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using :class:`transformers.LayoutLMTokenizer`. See
@@ -570,22 +570,20 @@ LAYOUTLM_INPUTS_DOCSTRING = r"""
            details.

            `What are input IDs? <../glossary.html#input-ids>`__
-        bbox (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
+        bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`):
            Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings
            - 1]``.
-
-            `What are bboxes? <../glossary.html#position-ids>`_
-        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`):
+        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
            Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
            tokens that are NOT MASKED, ``0`` for MASKED tokens.

            `What are attention masks? <../glossary.html#attention-mask>`__
-        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
+        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
            1]``: ``0`` corresponds to a `sentence A` token, ``1`` corresponds to a `sentence B` token

            `What are token type IDs? <../glossary.html#token-type-ids>`_
-        position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
+        position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
            config.max_position_embeddings - 1]``.

@@ -643,7 +641,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
        for layer, heads in heads_to_prune.items():
            self.encoder.layer[layer].attention.prune_heads(heads)

-    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint="layoutlm-base-uncased",
@@ -784,7 +782,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
    def set_output_embeddings(self, new_embeddings):
        self.cls.predictions.decoder = new_embeddings

-    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint="layoutlm-base-uncased",
@@ -872,7 +870,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
    def get_input_embeddings(self):
        return self.layoutlm.embeddings.word_embeddings

-    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint="layoutlm-base-uncased",