Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c4a997cd
Unverified
Commit
c4a997cd
authored
Oct 21, 2022
by
Sylvain Gugger
Committed by
GitHub
Oct 21, 2022
Browse files
Use None to detect if truncation was unset (#19794)
* Use None to detect if truncation was unset * Fix repo consistency
parent
2e5c6f59
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
57 additions
and
55 deletions
+57
-55
src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
...transformers/models/layoutlmv2/tokenization_layoutlmv2.py
+5
-5
src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
...formers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
+3
-3
src/transformers/models/layoutlmv3/processing_layoutlmv3.py
src/transformers/models/layoutlmv3/processing_layoutlmv3.py
+1
-1
src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py
...transformers/models/layoutlmv3/tokenization_layoutlmv3.py
+5
-5
src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py
...formers/models/layoutlmv3/tokenization_layoutlmv3_fast.py
+3
-3
src/transformers/models/layoutxlm/processing_layoutxlm.py
src/transformers/models/layoutxlm/processing_layoutxlm.py
+1
-1
src/transformers/models/layoutxlm/tokenization_layoutxlm.py
src/transformers/models/layoutxlm/tokenization_layoutxlm.py
+2
-2
src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
...nsformers/models/layoutxlm/tokenization_layoutxlm_fast.py
+1
-1
src/transformers/models/luke/tokenization_luke.py
src/transformers/models/luke/tokenization_luke.py
+2
-2
src/transformers/models/markuplm/processing_markuplm.py
src/transformers/models/markuplm/processing_markuplm.py
+1
-1
src/transformers/models/markuplm/tokenization_markuplm.py
src/transformers/models/markuplm/tokenization_markuplm.py
+5
-5
src/transformers/models/markuplm/tokenization_markuplm_fast.py
...ransformers/models/markuplm/tokenization_markuplm_fast.py
+3
-3
src/transformers/models/mluke/tokenization_mluke.py
src/transformers/models/mluke/tokenization_mluke.py
+2
-2
src/transformers/models/tapex/tokenization_tapex.py
src/transformers/models/tapex/tokenization_tapex.py
+9
-9
src/transformers/models/vilt/processing_vilt.py
src/transformers/models/vilt/processing_vilt.py
+1
-1
src/transformers/tokenization_utils_base.py
src/transformers/tokenization_utils_base.py
+13
-11
No files found.
src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
View file @
c4a997cd
...
@@ -432,7 +432,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
...
@@ -432,7 +432,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -584,7 +584,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
...
@@ -584,7 +584,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -767,7 +767,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
...
@@ -767,7 +767,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -814,7 +814,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
...
@@ -814,7 +814,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -933,7 +933,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
...
@@ -933,7 +933,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
View file @
c4a997cd
...
@@ -185,7 +185,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
...
@@ -185,7 +185,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -337,7 +337,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
...
@@ -337,7 +337,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -401,7 +401,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
...
@@ -401,7 +401,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutlmv3/processing_layoutlmv3.py
View file @
c4a997cd
...
@@ -53,7 +53,7 @@ class LayoutLMv3Processor(ProcessorMixin):
...
@@ -53,7 +53,7 @@ class LayoutLMv3Processor(ProcessorMixin):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py
View file @
c4a997cd
...
@@ -554,7 +554,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
...
@@ -554,7 +554,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -707,7 +707,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
...
@@ -707,7 +707,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -893,7 +893,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
...
@@ -893,7 +893,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -941,7 +941,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
...
@@ -941,7 +941,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -1061,7 +1061,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
...
@@ -1061,7 +1061,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py
View file @
c4a997cd
...
@@ -231,7 +231,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
...
@@ -231,7 +231,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -384,7 +384,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
...
@@ -384,7 +384,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -450,7 +450,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
...
@@ -450,7 +450,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutxlm/processing_layoutxlm.py
View file @
c4a997cd
...
@@ -53,7 +53,7 @@ class LayoutXLMProcessor(ProcessorMixin):
...
@@ -53,7 +53,7 @@ class LayoutXLMProcessor(ProcessorMixin):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutxlm/tokenization_layoutxlm.py
View file @
c4a997cd
...
@@ -450,7 +450,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
...
@@ -450,7 +450,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -777,7 +777,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
...
@@ -777,7 +777,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
word_labels
:
Optional
[
List
[
int
]]
=
None
,
word_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
View file @
c4a997cd
...
@@ -275,7 +275,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
...
@@ -275,7 +275,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
word_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/luke/tokenization_luke.py
View file @
c4a997cd
...
@@ -268,7 +268,7 @@ class LukeTokenizer(RobertaTokenizer):
...
@@ -268,7 +268,7 @@ class LukeTokenizer(RobertaTokenizer):
entities_pair
:
Optional
[
Union
[
EntityInput
,
List
[
EntityInput
]]]
=
None
,
entities_pair
:
Optional
[
Union
[
EntityInput
,
List
[
EntityInput
]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
...
@@ -858,7 +858,7 @@ class LukeTokenizer(RobertaTokenizer):
...
@@ -858,7 +858,7 @@ class LukeTokenizer(RobertaTokenizer):
pair_entity_token_spans
:
Optional
[
List
[
Tuple
[
int
,
int
]]]
=
None
,
pair_entity_token_spans
:
Optional
[
List
[
Tuple
[
int
,
int
]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
...
...
src/transformers/models/markuplm/processing_markuplm.py
View file @
c4a997cd
...
@@ -54,7 +54,7 @@ class MarkupLMProcessor(ProcessorMixin):
...
@@ -54,7 +54,7 @@ class MarkupLMProcessor(ProcessorMixin):
questions
=
None
,
questions
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/markuplm/tokenization_markuplm.py
View file @
c4a997cd
...
@@ -514,7 +514,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
...
@@ -514,7 +514,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -663,7 +663,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
...
@@ -663,7 +663,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -844,7 +844,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
...
@@ -844,7 +844,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
node_labels
:
Optional
[
List
[
int
]]
=
None
,
node_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -891,7 +891,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
...
@@ -891,7 +891,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
node_labels
:
Optional
[
List
[
int
]]
=
None
,
node_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -1010,7 +1010,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
...
@@ -1010,7 +1010,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
node_labels
:
Optional
[
List
[
int
]]
=
None
,
node_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/markuplm/tokenization_markuplm_fast.py
View file @
c4a997cd
...
@@ -290,7 +290,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
...
@@ -290,7 +290,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -438,7 +438,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
...
@@ -438,7 +438,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
node_labels
:
Optional
[
Union
[
List
[
int
],
List
[
List
[
int
]]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -501,7 +501,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
...
@@ -501,7 +501,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
node_labels
:
Optional
[
List
[
int
]]
=
None
,
node_labels
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/models/mluke/tokenization_mluke.py
View file @
c4a997cd
...
@@ -374,7 +374,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
...
@@ -374,7 +374,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
entities_pair
:
Optional
[
Union
[
EntityInput
,
List
[
EntityInput
]]]
=
None
,
entities_pair
:
Optional
[
Union
[
EntityInput
,
List
[
EntityInput
]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
...
@@ -970,7 +970,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
...
@@ -970,7 +970,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
pair_entity_token_spans
:
Optional
[
List
[
Tuple
[
int
,
int
]]]
=
None
,
pair_entity_token_spans
:
Optional
[
List
[
Tuple
[
int
,
int
]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
max_entity_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
...
...
src/transformers/models/tapex/tokenization_tapex.py
View file @
c4a997cd
...
@@ -518,7 +518,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -518,7 +518,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
Union
[
str
,
List
[
str
]]
=
None
,
answer
:
Union
[
str
,
List
[
str
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -595,7 +595,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -595,7 +595,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
Union
[
str
,
List
[
str
]]
=
None
,
answer
:
Union
[
str
,
List
[
str
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -683,7 +683,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -683,7 +683,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
List
[
str
]
=
None
,
answer
:
List
[
str
]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
...
@@ -871,7 +871,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -871,7 +871,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
Optional
[
str
]
=
None
,
answer
:
Optional
[
str
]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
,
TapexTruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
,
TapexTruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
**
kwargs
**
kwargs
...
@@ -903,7 +903,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -903,7 +903,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
Optional
[
str
]
=
None
,
answer
:
Optional
[
str
]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
...
@@ -1007,7 +1007,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -1007,7 +1007,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
Union
[
str
,
List
[
str
]],
answer
:
Union
[
str
,
List
[
str
]],
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
@@ -1072,7 +1072,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -1072,7 +1072,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
List
[
str
],
answer
:
List
[
str
],
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
...
@@ -1187,7 +1187,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -1187,7 +1187,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
str
,
answer
:
str
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
,
TapexTruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
,
TapexTruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
**
kwargs
**
kwargs
...
@@ -1218,7 +1218,7 @@ class TapexTokenizer(PreTrainedTokenizer):
...
@@ -1218,7 +1218,7 @@ class TapexTokenizer(PreTrainedTokenizer):
answer
:
str
,
answer
:
str
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
...
...
src/transformers/models/vilt/processing_vilt.py
View file @
c4a997cd
...
@@ -49,7 +49,7 @@ class ViltProcessor(ProcessorMixin):
...
@@ -49,7 +49,7 @@ class ViltProcessor(ProcessorMixin):
text
:
Union
[
TextInput
,
PreTokenizedInput
,
List
[
TextInput
],
List
[
PreTokenizedInput
]]
=
None
,
text
:
Union
[
TextInput
,
PreTokenizedInput
,
List
[
TextInput
],
List
[
PreTokenizedInput
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
src/transformers/tokenization_utils_base.py
View file @
c4a997cd
...
@@ -2235,7 +2235,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2235,7 +2235,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
EncodedInput
]]
=
None
,
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
EncodedInput
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
return_tensors
:
Optional
[
Union
[
str
,
TensorType
]]
=
None
,
...
@@ -2274,7 +2274,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2274,7 +2274,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
raise
NotImplementedError
raise
NotImplementedError
def
_get_padding_truncation_strategies
(
def
_get_padding_truncation_strategies
(
self
,
padding
=
False
,
truncation
=
Fals
e
,
max_length
=
None
,
pad_to_multiple_of
=
None
,
verbose
=
True
,
**
kwargs
self
,
padding
=
False
,
truncation
=
Non
e
,
max_length
=
None
,
pad_to_multiple_of
=
None
,
verbose
=
True
,
**
kwargs
):
):
"""
"""
Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy
Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy
...
@@ -2285,7 +2285,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2285,7 +2285,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
# Backward compatibility for previous behavior, maybe we should deprecate it:
# Backward compatibility for previous behavior, maybe we should deprecate it:
# If you only set max_length, it activates truncation for max_length
# If you only set max_length, it activates truncation for max_length
if
max_length
is
not
None
and
padding
is
False
and
truncation
is
Fals
e
:
if
max_length
is
not
None
and
padding
is
False
and
truncation
is
Non
e
:
if
verbose
:
if
verbose
:
if
not
self
.
deprecation_warnings
.
get
(
"Truncation-not-explicitly-activated"
,
False
):
if
not
self
.
deprecation_warnings
.
get
(
"Truncation-not-explicitly-activated"
,
False
):
logger
.
warning
(
logger
.
warning
(
...
@@ -2316,7 +2316,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2316,7 +2316,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
elif
padding
is
not
False
:
elif
padding
is
not
False
:
if
padding
is
True
:
if
padding
is
True
:
if
verbose
:
if
verbose
:
if
max_length
is
not
None
and
(
truncation
is
False
or
truncation
==
"do_not_truncate"
):
if
max_length
is
not
None
and
(
truncation
is
None
or
truncation
is
False
or
truncation
==
"do_not_truncate"
):
warnings
.
warn
(
warnings
.
warn
(
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
"To pad to max length, use `padding='max_length'`."
"To pad to max length, use `padding='max_length'`."
...
@@ -2332,7 +2334,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2332,7 +2334,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
padding_strategy
=
PaddingStrategy
.
DO_NOT_PAD
padding_strategy
=
PaddingStrategy
.
DO_NOT_PAD
# Get truncation strategy
# Get truncation strategy
if
truncation
is
Fals
e
and
old_truncation_strategy
!=
"do_not_truncate"
:
if
truncation
is
Non
e
and
old_truncation_strategy
!=
"do_not_truncate"
:
if
verbose
:
if
verbose
:
warnings
.
warn
(
warnings
.
warn
(
"The `truncation_strategy` argument is deprecated and will be removed in a future version, use"
"The `truncation_strategy` argument is deprecated and will be removed in a future version, use"
...
@@ -2346,7 +2348,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2346,7 +2348,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
FutureWarning
,
FutureWarning
,
)
)
truncation_strategy
=
TruncationStrategy
(
old_truncation_strategy
)
truncation_strategy
=
TruncationStrategy
(
old_truncation_strategy
)
elif
truncation
is
not
False
:
elif
truncation
is
not
False
and
truncation
is
not
None
:
if
truncation
is
True
:
if
truncation
is
True
:
truncation_strategy
=
(
truncation_strategy
=
(
TruncationStrategy
.
LONGEST_FIRST
TruncationStrategy
.
LONGEST_FIRST
...
@@ -2420,7 +2422,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2420,7 +2422,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
]
=
None
,
]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
is_split_into_words
:
bool
=
False
,
is_split_into_words
:
bool
=
False
,
...
@@ -2504,7 +2506,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2504,7 +2506,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
List
[
TextInput
],
List
[
PreTokenizedInput
]]]
=
None
,
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
List
[
TextInput
],
List
[
PreTokenizedInput
]]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
is_split_into_words
:
bool
=
False
,
is_split_into_words
:
bool
=
False
,
...
@@ -2617,7 +2619,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2617,7 +2619,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
EncodedInput
]]
=
None
,
text_pair
:
Optional
[
Union
[
TextInput
,
PreTokenizedInput
,
EncodedInput
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
is_split_into_words
:
bool
=
False
,
is_split_into_words
:
bool
=
False
,
...
@@ -2719,7 +2721,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2719,7 +2721,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
],
],
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
is_split_into_words
:
bool
=
False
,
is_split_into_words
:
bool
=
False
,
...
@@ -3029,7 +3031,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -3029,7 +3031,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
pair_ids
:
Optional
[
List
[
int
]]
=
None
,
pair_ids
:
Optional
[
List
[
int
]]
=
None
,
add_special_tokens
:
bool
=
True
,
add_special_tokens
:
bool
=
True
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
padding
:
Union
[
bool
,
str
,
PaddingStrategy
]
=
False
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Fals
e
,
truncation
:
Union
[
bool
,
str
,
TruncationStrategy
]
=
Non
e
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
Optional
[
int
]
=
None
,
stride
:
int
=
0
,
stride
:
int
=
0
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
pad_to_multiple_of
:
Optional
[
int
]
=
None
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment