"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "dcb183f4bdcd9491efb68b3c28d51614a11e59dc"
Unverified Commit 280db79a authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

BatchEncoding.to with device with tests (#9584)

parent 8bf27075
...@@ -65,6 +65,12 @@ def _is_torch(x): ...@@ -65,6 +65,12 @@ def _is_torch(x):
return isinstance(x, torch.Tensor) return isinstance(x, torch.Tensor)
def _is_torch_device(x):
import torch
return isinstance(x, torch.device)
def _is_tensorflow(x): def _is_tensorflow(x):
import tensorflow as tf import tensorflow as tf
...@@ -801,7 +807,7 @@ class BatchEncoding(UserDict): ...@@ -801,7 +807,7 @@ class BatchEncoding(UserDict):
# This check catches things like APEX blindly calling "to" on all inputs to a module # This check catches things like APEX blindly calling "to" on all inputs to a module
# Otherwise it passes the casts down and casts the LongTensor containing the token idxs # Otherwise it passes the casts down and casts the LongTensor containing the token idxs
# into a HalfTensor # into a HalfTensor
if isinstance(device, str) or isinstance(device, torch.device) or isinstance(device, int): if isinstance(device, str) or _is_torch_device(device) or isinstance(device, int):
self.data = {k: v.to(device=device) for k, v in self.data.items()} self.data = {k: v.to(device=device) for k, v in self.data.items()}
else: else:
logger.warning( logger.warning(
......
...@@ -1704,6 +1704,10 @@ class TokenizerTesterMixin: ...@@ -1704,6 +1704,10 @@ class TokenizerTesterMixin:
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10] first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
sequence = " ".join(first_ten_tokens) sequence = " ".join(first_ten_tokens)
encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt") encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt")
# Ensure that the BatchEncoding.to() method works.
encoded_sequence.to(model.device)
batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt") batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt")
# This should not fail # This should not fail
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment