Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b2fdbacc
Unverified
Commit
b2fdbacc
authored
Jun 23, 2022
by
SaulLu
Committed by
GitHub
Jun 23, 2022
Browse files
change message (#17836)
parent
d37a68e6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
4 deletions
+16
-4
src/transformers/tokenization_utils_base.py
src/transformers/tokenization_utils_base.py
+16
-4
No files found.
src/transformers/tokenization_utils_base.py
View file @
b2fdbacc
...
...
@@ -291,7 +291,10 @@ class BatchEncoding(UserDict):
`List[str]`: The list of tokens at that index.
"""
if
not
self
.
_encodings
:
raise
ValueError
(
"tokens() is not available when using Python-based tokenizers"
)
raise
ValueError
(
"tokens() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return
self
.
_encodings
[
batch_index
].
tokens
def
sequence_ids
(
self
,
batch_index
:
int
=
0
)
->
List
[
Optional
[
int
]]:
...
...
@@ -312,7 +315,10 @@ class BatchEncoding(UserDict):
sequence.
"""
if
not
self
.
_encodings
:
raise
ValueError
(
"sequence_ids() is not available when using Python-based tokenizers"
)
raise
ValueError
(
"sequence_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return
self
.
_encodings
[
batch_index
].
sequence_ids
def
words
(
self
,
batch_index
:
int
=
0
)
->
List
[
Optional
[
int
]]:
...
...
@@ -328,7 +334,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word).
"""
if
not
self
.
_encodings
:
raise
ValueError
(
"words() is not available when using Python-based tokenizers"
)
raise
ValueError
(
"words() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
warnings
.
warn
(
"`BatchEncoding.words()` property is deprecated and should be replaced with the identical, "
"but more self-explanatory `BatchEncoding.word_ids()` property."
,
...
...
@@ -349,7 +358,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word).
"""
if
not
self
.
_encodings
:
raise
ValueError
(
"word_ids() is not available when using Python-based tokenizers"
)
raise
ValueError
(
"word_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return
self
.
_encodings
[
batch_index
].
word_ids
def
token_to_sequence
(
self
,
batch_or_token_index
:
int
,
token_index
:
Optional
[
int
]
=
None
)
->
int
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment