Unverified Commit b2fdbacc authored by SaulLu's avatar SaulLu Committed by GitHub
Browse files

change message (#17836)

parent d37a68e6
...@@ -291,7 +291,10 @@ class BatchEncoding(UserDict): ...@@ -291,7 +291,10 @@ class BatchEncoding(UserDict):
`List[str]`: The list of tokens at that index. `List[str]`: The list of tokens at that index.
""" """
if not self._encodings: if not self._encodings:
raise ValueError("tokens() is not available when using Python-based tokenizers") raise ValueError(
"tokens() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].tokens return self._encodings[batch_index].tokens
def sequence_ids(self, batch_index: int = 0) -> List[Optional[int]]: def sequence_ids(self, batch_index: int = 0) -> List[Optional[int]]:
...@@ -312,7 +315,10 @@ class BatchEncoding(UserDict): ...@@ -312,7 +315,10 @@ class BatchEncoding(UserDict):
sequence. sequence.
""" """
if not self._encodings: if not self._encodings:
raise ValueError("sequence_ids() is not available when using Python-based tokenizers") raise ValueError(
"sequence_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].sequence_ids return self._encodings[batch_index].sequence_ids
def words(self, batch_index: int = 0) -> List[Optional[int]]: def words(self, batch_index: int = 0) -> List[Optional[int]]:
...@@ -328,7 +334,10 @@ class BatchEncoding(UserDict): ...@@ -328,7 +334,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word). (several tokens will be mapped to the same word index if they are parts of that word).
""" """
if not self._encodings: if not self._encodings:
raise ValueError("words() is not available when using Python-based tokenizers") raise ValueError(
"words() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
warnings.warn( warnings.warn(
"`BatchEncoding.words()` property is deprecated and should be replaced with the identical, " "`BatchEncoding.words()` property is deprecated and should be replaced with the identical, "
"but more self-explanatory `BatchEncoding.word_ids()` property.", "but more self-explanatory `BatchEncoding.word_ids()` property.",
...@@ -349,7 +358,10 @@ class BatchEncoding(UserDict): ...@@ -349,7 +358,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word). (several tokens will be mapped to the same word index if they are parts of that word).
""" """
if not self._encodings: if not self._encodings:
raise ValueError("word_ids() is not available when using Python-based tokenizers") raise ValueError(
"word_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].word_ids return self._encodings[batch_index].word_ids
def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int: def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment