Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
35bffd70
"vscode:/vscode.git/clone" did not exist on "034bc5d26ad7c0e284265d92d3da39d786138545"
Unverified
Commit
35bffd70
authored
Dec 10, 2020
by
Sylvain Gugger
Committed by
GitHub
Dec 10, 2020
Browse files
Fix documention of book in LayoutLM (#9017)
parent
c95de29e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
10 deletions
+8
-10
src/transformers/models/layoutlm/modeling_layoutlm.py
src/transformers/models/layoutlm/modeling_layoutlm.py
+8
-10
No files found.
src/transformers/models/layoutlm/modeling_layoutlm.py
View file @
35bffd70
...
...
@@ -562,7 +562,7 @@ LAYOUTLM_START_DOCSTRING = r"""
LAYOUTLM_INPUTS_DOCSTRING
=
r
"""
Args:
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`):
input_ids (:obj:`torch.LongTensor` of shape :obj:`
(
{0}
)
`):
Indices of input sequence tokens in the vocabulary.
Indices can be obtained using :class:`transformers.LayoutLMTokenizer`. See
...
...
@@ -570,22 +570,20 @@ LAYOUTLM_INPUTS_DOCSTRING = r"""
details.
`What are input IDs? <../glossary.html#input-ids>`__
bbox (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
bbox (:obj:`torch.LongTensor` of shape :obj:`
(
{0}
, 4)
`, `optional`):
Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings
- 1]``.
`What are bboxes? <../glossary.html#position-ids>`_
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`):
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
tokens that are NOT MASKED, ``0`` for MASKED tokens.
`What are attention masks? <../glossary.html#attention-mask>`__
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`
(
{0}
)
`, `optional`):
Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
1]``: ``0`` corresponds to a `sentence A` token, ``1`` corresponds to a `sentence B` token
`What are token type IDs? <../glossary.html#token-type-ids>`_
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
position_ids (:obj:`torch.LongTensor` of shape :obj:`
(
{0}
)
`, `optional`):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
config.max_position_embeddings - 1]``.
...
...
@@ -643,7 +641,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
for
layer
,
heads
in
heads_to_prune
.
items
():
self
.
encoder
.
layer
[
layer
].
attention
.
prune_heads
(
heads
)
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"
(
batch_size, sequence_length
)
"
))
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
tokenizer_class
=
_TOKENIZER_FOR_DOC
,
checkpoint
=
"layoutlm-base-uncased"
,
...
...
@@ -784,7 +782,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
def
set_output_embeddings
(
self
,
new_embeddings
):
self
.
cls
.
predictions
.
decoder
=
new_embeddings
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"
(
batch_size, sequence_length
)
"
))
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
tokenizer_class
=
_TOKENIZER_FOR_DOC
,
checkpoint
=
"layoutlm-base-uncased"
,
...
...
@@ -872,7 +870,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
def
get_input_embeddings
(
self
):
return
self
.
layoutlm
.
embeddings
.
word_embeddings
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"
(
batch_size, sequence_length
)
"
))
@
add_start_docstrings_to_model_forward
(
LAYOUTLM_INPUTS_DOCSTRING
.
format
(
"batch_size, sequence_length"
))
@
add_code_sample_docstrings
(
tokenizer_class
=
_TOKENIZER_FOR_DOC
,
checkpoint
=
"layoutlm-base-uncased"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment