"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "47b9165109dc19ef285179ced9ef856053ebcbc9"
Unverified Commit ad1f7bef authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Reformat to make code clearer in tokenizer call (#11497)

* Reformat to make code clearer

* Reformat to make code clearer
parent f748bd42
...@@ -2236,47 +2236,42 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -2236,47 +2236,42 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
:obj:`is_split_into_words=True` (to lift the ambiguity with a batch of sequences). :obj:`is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
""" """
# Input type checking for clearer error # Input type checking for clearer error
assert isinstance(text, str) or ( def _is_valid_text_input(t):
isinstance(text, (list, tuple)) if isinstance(t, str):
and ( # Strings are fine
len(text) == 0 return True
or ( elif isinstance(t, (list, tuple)):
isinstance(text[0], str) # List are fine as long as they are...
or (isinstance(text[0], (list, tuple)) and (len(text[0]) == 0 or isinstance(text[0][0], str))) if len(t) == 0:
) # ... empty
) return True
), ( elif isinstance(t[0], str):
"text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) " # ... list of strings
"or `List[List[str]]` (batch of pretokenized examples)." return True
) elif isinstance(t[0], (list, tuple)):
# ... list with an empty list or with a list of strings
return len(t[0]) == 0 or isinstance(t[0][0], str)
else:
return False
else:
return False
assert ( if not _is_valid_text_input(text):
text_pair is None raise ValueError(
or isinstance(text_pair, str) "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
or ( "or `List[List[str]]` (batch of pretokenized examples)."
isinstance(text_pair, (list, tuple))
and (
len(text_pair) == 0
or (
isinstance(text_pair[0], str)
or (
isinstance(text_pair[0], (list, tuple))
and (len(text_pair[0]) == 0 or isinstance(text_pair[0][0], str))
)
)
)
) )
), (
"text_pair input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
"or `List[List[str]]` (batch of pretokenized examples)."
)
is_batched = bool( if text_pair is not None and not _is_valid_text_input(text_pair):
(not is_split_into_words and isinstance(text, (list, tuple))) raise ValueError(
or ( "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
is_split_into_words and isinstance(text, (list, tuple)) and text and isinstance(text[0], (list, tuple)) "or `List[List[str]]` (batch of pretokenized examples)."
) )
)
if is_split_into_words:
is_batched = isinstance(text, (list, tuple)) and text and isinstance(text[0], (list, tuple))
else:
is_batched = isinstance(text, (list, tuple))
if is_batched: if is_batched:
if isinstance(text_pair, str): if isinstance(text_pair, str):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment