Unverified Commit 70527ba6 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix PreTrainedTokenizer.pad when first inputs are empty (#9018)

* Fix PreTrainedTokenizer.pad when first inputs are empty

* Handle empty inputs case
parent 783d7d26
...@@ -2664,10 +2664,17 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): ...@@ -2664,10 +2664,17 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
# If we have PyTorch/TF/NumPy tensors/arrays as inputs, we cast them as python objects # If we have PyTorch/TF/NumPy tensors/arrays as inputs, we cast them as python objects
# and rebuild them afterwards if no return_tensors is specified # and rebuild them afterwards if no return_tensors is specified
# Note that we lose the specific device the tensor may be on for PyTorch # Note that we lose the specific device the tensor may be on for PyTorch
first_element = encoded_inputs["input_ids"][0] first_element = encoded_inputs["input_ids"][0]
if isinstance(first_element, (list, tuple)) and first_element: if isinstance(first_element, (list, tuple)):
first_element = first_element[0] # first_element might be an empty list/tuple in some edge cases so we grab the first non empty element.
if not isinstance(first_element, int): index = 0
while len(encoded_inputs["input_ids"][index]) == 0:
index += 1
if index < len(encoded_inputs["input_ids"]):
first_element = encoded_inputs["input_ids"][index][0]
# At this state, if `first_element` is still a list/tuple, it's an empty one so there is nothing to do.
if not isinstance(first_element, (int, list, tuple)):
if is_tf_available() and isinstance(first_element, tf.Tensor): if is_tf_available() and isinstance(first_element, tf.Tensor):
return_tensors = "tf" if return_tensors is None else return_tensors return_tensors = "tf" if return_tensors is None else return_tensors
elif is_torch_available() and isinstance(first_element, torch.Tensor): elif is_torch_available() and isinstance(first_element, torch.Tensor):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment