Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
......@@ -195,8 +195,6 @@ class TFAutoModelTest(unittest.TestCase):
mapping = tuple(mapping.items())
for index, (child_config, child_model) in enumerate(mapping[1:]):
for parent_config, parent_model in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model))
......@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -287,10 +287,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -380,10 +380,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -320,10 +320,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -291,10 +291,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -318,10 +318,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -320,13 +320,13 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase):
if "conv.weight" in name or "masked_spec_embed" in name:
self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
@slow
......@@ -437,13 +437,13 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
if "conv.weight" in name or "masked_spec_embed" in name:
self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
@slow
......
......@@ -101,9 +101,7 @@ class AutoTokenizerTest(unittest.TestCase):
mapping = tuple(mapping.items())
for index, (child_config, _) in enumerate(mapping[1:]):
for parent_config, _ in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
self.assertFalse(issubclass(child_config, parent_config))
@require_tokenizers
......
......@@ -154,7 +154,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence."
......
......@@ -250,7 +250,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_offsets_with_special_characters(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
......
......@@ -38,7 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
with open(self.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens:
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n")
fp.write(f"{token} {vocab_tokens[token]}\n")
with open(self.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges))
......
......@@ -1216,18 +1216,18 @@ class TokenizerTesterMixin:
empty_tokens = tokenizer("", padding=True, pad_to_multiple_of=8)
normal_tokens = tokenizer("This is a sample input", padding=True, pad_to_multiple_of=8)
for key, value in empty_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
normal_tokens = tokenizer("This", pad_to_multiple_of=8)
for key, value in normal_tokens.items():
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# Should also work with truncation
normal_tokens = tokenizer("This", padding=True, truncation=True, pad_to_multiple_of=8)
for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# truncation to something which is not a multiple of pad_to_multiple_of raises an error
self.assertRaises(
......@@ -1897,7 +1897,7 @@ class TokenizerTesterMixin:
def test_is_fast(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -1907,7 +1907,7 @@ class TokenizerTesterMixin:
def test_fast_only_inputs(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Ensure None raise an error
......@@ -1918,7 +1918,7 @@ class TokenizerTesterMixin:
def test_alignement_methods(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"]
......@@ -2144,7 +2144,7 @@ class TokenizerTesterMixin:
def test_tokenization_python_rust_equals(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2181,7 +2181,7 @@ class TokenizerTesterMixin:
def test_num_special_tokens_to_add_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2195,7 +2195,7 @@ class TokenizerTesterMixin:
def test_max_length_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2205,7 +2205,7 @@ class TokenizerTesterMixin:
def test_special_tokens_map_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2217,7 +2217,7 @@ class TokenizerTesterMixin:
def test_add_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
vocab_size = len(tokenizer_r)
......@@ -2239,7 +2239,7 @@ class TokenizerTesterMixin:
def test_offsets_mapping(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
text = "Wonderful no inspiration example with subtoken"
......@@ -2285,9 +2285,7 @@ class TokenizerTesterMixin:
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
with self.subTest(
"{} ({}, {})".format(tokenizer.__class__.__name__, pretrained_name, tokenizer.__class__.__name__)
):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"):
if is_torch_available():
returned_tensor = "pt"
......@@ -2341,7 +2339,7 @@ class TokenizerTesterMixin:
def test_compare_pretokenized_inputs(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2419,7 +2417,7 @@ class TokenizerTesterMixin:
def test_create_token_type_ids(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
input_simple = [1, 2, 3]
......@@ -2437,7 +2435,7 @@ class TokenizerTesterMixin:
def test_build_inputs_with_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# # Input string
......@@ -2470,7 +2468,7 @@ class TokenizerTesterMixin:
def test_padding(self, max_length=50):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2688,7 +2686,7 @@ class TokenizerTesterMixin:
def test_padding_different_model_input_name(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
self.assertEqual(tokenizer_p.pad_token_id, tokenizer_r.pad_token_id)
......@@ -2722,7 +2720,7 @@ class TokenizerTesterMixin:
def test_save_pretrained(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
......@@ -2747,7 +2745,7 @@ class TokenizerTesterMixin:
def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence."
......@@ -2772,7 +2770,7 @@ class TokenizerTesterMixin:
def test_compare_add_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
simple_num_special_tokens_to_add = tokenizer_r.num_special_tokens_to_add(pair=False)
......@@ -2811,7 +2809,7 @@ class TokenizerTesterMixin:
def test_compare_prepare_for_model(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
string_sequence = "Asserting that both tokenizers are equal"
......
......@@ -133,7 +133,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input
......
......@@ -87,7 +87,7 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input
......
......@@ -39,7 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
with open(self.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens:
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n")
fp.write(f"{token} {vocab_tokens[token]}\n")
with open(self.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges))
......
......@@ -65,7 +65,7 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input
......
......@@ -167,7 +167,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence."
......
......@@ -312,7 +312,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_offsets_with_special_characters(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
......@@ -807,18 +807,18 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
empty_tokens = tokenizer(table, padding=True, pad_to_multiple_of=8)
normal_tokens = tokenizer(table, "This is a sample input", padding=True, pad_to_multiple_of=8)
for key, value in empty_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
normal_tokens = tokenizer(table, "This", pad_to_multiple_of=8)
for key, value in normal_tokens.items():
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# Should also work with truncation
normal_tokens = tokenizer(table, "This", padding=True, truncation=True, pad_to_multiple_of=8)
for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
@unittest.skip("TAPAS cannot handle `prepare_for_model` without passing by `encode_plus` or `batch_encode_plus`")
def test_prepare_for_model(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment