"vscode:/vscode.git/clone" did not exist on "5c186003b830ed71dde9897a2acd5fb06fcc53aa"
Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
...@@ -195,8 +195,6 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -195,8 +195,6 @@ class TFAutoModelTest(unittest.TestCase):
mapping = tuple(mapping.items()) mapping = tuple(mapping.items())
for index, (child_config, child_model) in enumerate(mapping[1:]): for index, (child_config, child_model) in enumerate(mapping[1:]):
for parent_config, parent_model in mapping[: index + 1]: for parent_config, parent_model in mapping[: index + 1]:
with self.subTest( with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config)) self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model)) self.assertFalse(issubclass(child_model, parent_model))
...@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -287,10 +287,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -287,10 +287,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -380,10 +380,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -380,10 +380,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -320,10 +320,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -320,10 +320,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -291,10 +291,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -291,10 +291,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -318,10 +318,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -318,10 +318,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -320,13 +320,13 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -320,13 +320,13 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase):
if "conv.weight" in name or "masked_spec_embed" in name: if "conv.weight" in name or "masked_spec_embed" in name:
self.assertTrue( self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0, -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
else: else:
self.assertIn( self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(), ((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0], [0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
@slow @slow
...@@ -437,13 +437,13 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -437,13 +437,13 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
if "conv.weight" in name or "masked_spec_embed" in name: if "conv.weight" in name or "masked_spec_embed" in name:
self.assertTrue( self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0, -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
else: else:
self.assertIn( self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(), ((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0], [0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
@slow @slow
......
...@@ -101,9 +101,7 @@ class AutoTokenizerTest(unittest.TestCase): ...@@ -101,9 +101,7 @@ class AutoTokenizerTest(unittest.TestCase):
mapping = tuple(mapping.items()) mapping = tuple(mapping.items())
for index, (child_config, _) in enumerate(mapping[1:]): for index, (child_config, _) in enumerate(mapping[1:]):
for parent_config, _ in mapping[: index + 1]: for parent_config, _ in mapping[: index + 1]:
with self.subTest( with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config)) self.assertFalse(issubclass(child_config, parent_config))
@require_tokenizers @require_tokenizers
......
...@@ -154,7 +154,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase): ...@@ -154,7 +154,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self): def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence." sentence = "A, <mask> AllenNLP sentence."
......
...@@ -250,7 +250,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -250,7 +250,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_offsets_with_special_characters(self): def test_offsets_with_special_characters(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence." sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
......
...@@ -38,7 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -38,7 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
with open(self.vocab_file, "w", encoding="utf-8") as fp: with open(self.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens: for token in vocab_tokens:
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n") fp.write(f"{token} {vocab_tokens[token]}\n")
with open(self.merges_file, "w", encoding="utf-8") as fp: with open(self.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges)) fp.write("\n".join(merges))
......
...@@ -1216,18 +1216,18 @@ class TokenizerTesterMixin: ...@@ -1216,18 +1216,18 @@ class TokenizerTesterMixin:
empty_tokens = tokenizer("", padding=True, pad_to_multiple_of=8) empty_tokens = tokenizer("", padding=True, pad_to_multiple_of=8)
normal_tokens = tokenizer("This is a sample input", padding=True, pad_to_multiple_of=8) normal_tokens = tokenizer("This is a sample input", padding=True, pad_to_multiple_of=8)
for key, value in empty_tokens.items(): for key, value in empty_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
normal_tokens = tokenizer("This", pad_to_multiple_of=8) normal_tokens = tokenizer("This", pad_to_multiple_of=8)
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# Should also work with truncation # Should also work with truncation
normal_tokens = tokenizer("This", padding=True, truncation=True, pad_to_multiple_of=8) normal_tokens = tokenizer("This", padding=True, truncation=True, pad_to_multiple_of=8)
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# truncation to something which is not a multiple of pad_to_multiple_of raises an error # truncation to something which is not a multiple of pad_to_multiple_of raises an error
self.assertRaises( self.assertRaises(
...@@ -1897,7 +1897,7 @@ class TokenizerTesterMixin: ...@@ -1897,7 +1897,7 @@ class TokenizerTesterMixin:
def test_is_fast(self): def test_is_fast(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -1907,7 +1907,7 @@ class TokenizerTesterMixin: ...@@ -1907,7 +1907,7 @@ class TokenizerTesterMixin:
def test_fast_only_inputs(self): def test_fast_only_inputs(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Ensure None raise an error # Ensure None raise an error
...@@ -1918,7 +1918,7 @@ class TokenizerTesterMixin: ...@@ -1918,7 +1918,7 @@ class TokenizerTesterMixin:
def test_alignement_methods(self): def test_alignement_methods(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"] words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"]
...@@ -2144,7 +2144,7 @@ class TokenizerTesterMixin: ...@@ -2144,7 +2144,7 @@ class TokenizerTesterMixin:
def test_tokenization_python_rust_equals(self): def test_tokenization_python_rust_equals(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2181,7 +2181,7 @@ class TokenizerTesterMixin: ...@@ -2181,7 +2181,7 @@ class TokenizerTesterMixin:
def test_num_special_tokens_to_add_equal(self): def test_num_special_tokens_to_add_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2195,7 +2195,7 @@ class TokenizerTesterMixin: ...@@ -2195,7 +2195,7 @@ class TokenizerTesterMixin:
def test_max_length_equal(self): def test_max_length_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2205,7 +2205,7 @@ class TokenizerTesterMixin: ...@@ -2205,7 +2205,7 @@ class TokenizerTesterMixin:
def test_special_tokens_map_equal(self): def test_special_tokens_map_equal(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2217,7 +2217,7 @@ class TokenizerTesterMixin: ...@@ -2217,7 +2217,7 @@ class TokenizerTesterMixin:
def test_add_tokens(self): def test_add_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
vocab_size = len(tokenizer_r) vocab_size = len(tokenizer_r)
...@@ -2239,7 +2239,7 @@ class TokenizerTesterMixin: ...@@ -2239,7 +2239,7 @@ class TokenizerTesterMixin:
def test_offsets_mapping(self): def test_offsets_mapping(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
text = "Wonderful no inspiration example with subtoken" text = "Wonderful no inspiration example with subtoken"
...@@ -2285,9 +2285,7 @@ class TokenizerTesterMixin: ...@@ -2285,9 +2285,7 @@ class TokenizerTesterMixin:
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
with self.subTest( with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"):
"{} ({}, {})".format(tokenizer.__class__.__name__, pretrained_name, tokenizer.__class__.__name__)
):
if is_torch_available(): if is_torch_available():
returned_tensor = "pt" returned_tensor = "pt"
...@@ -2341,7 +2339,7 @@ class TokenizerTesterMixin: ...@@ -2341,7 +2339,7 @@ class TokenizerTesterMixin:
def test_compare_pretokenized_inputs(self): def test_compare_pretokenized_inputs(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2419,7 +2417,7 @@ class TokenizerTesterMixin: ...@@ -2419,7 +2417,7 @@ class TokenizerTesterMixin:
def test_create_token_type_ids(self): def test_create_token_type_ids(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
input_simple = [1, 2, 3] input_simple = [1, 2, 3]
...@@ -2437,7 +2435,7 @@ class TokenizerTesterMixin: ...@@ -2437,7 +2435,7 @@ class TokenizerTesterMixin:
def test_build_inputs_with_special_tokens(self): def test_build_inputs_with_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# # Input string # # Input string
...@@ -2470,7 +2468,7 @@ class TokenizerTesterMixin: ...@@ -2470,7 +2468,7 @@ class TokenizerTesterMixin:
def test_padding(self, max_length=50): def test_padding(self, max_length=50):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2688,7 +2686,7 @@ class TokenizerTesterMixin: ...@@ -2688,7 +2686,7 @@ class TokenizerTesterMixin:
def test_padding_different_model_input_name(self): def test_padding_different_model_input_name(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
self.assertEqual(tokenizer_p.pad_token_id, tokenizer_r.pad_token_id) self.assertEqual(tokenizer_p.pad_token_id, tokenizer_r.pad_token_id)
...@@ -2722,7 +2720,7 @@ class TokenizerTesterMixin: ...@@ -2722,7 +2720,7 @@ class TokenizerTesterMixin:
def test_save_pretrained(self): def test_save_pretrained(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
...@@ -2747,7 +2745,7 @@ class TokenizerTesterMixin: ...@@ -2747,7 +2745,7 @@ class TokenizerTesterMixin:
def test_embeded_special_tokens(self): def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence." sentence = "A, <mask> AllenNLP sentence."
...@@ -2772,7 +2770,7 @@ class TokenizerTesterMixin: ...@@ -2772,7 +2770,7 @@ class TokenizerTesterMixin:
def test_compare_add_special_tokens(self): def test_compare_add_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
simple_num_special_tokens_to_add = tokenizer_r.num_special_tokens_to_add(pair=False) simple_num_special_tokens_to_add = tokenizer_r.num_special_tokens_to_add(pair=False)
...@@ -2811,7 +2809,7 @@ class TokenizerTesterMixin: ...@@ -2811,7 +2809,7 @@ class TokenizerTesterMixin:
def test_compare_prepare_for_model(self): def test_compare_prepare_for_model(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
string_sequence = "Asserting that both tokenizers are equal" string_sequence = "Asserting that both tokenizers are equal"
......
...@@ -133,7 +133,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -133,7 +133,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15): def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input # Simple input
......
...@@ -87,7 +87,7 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -87,7 +87,7 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15): def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input # Simple input
......
...@@ -39,7 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -39,7 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
with open(self.vocab_file, "w", encoding="utf-8") as fp: with open(self.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens: for token in vocab_tokens:
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n") fp.write(f"{token} {vocab_tokens[token]}\n")
with open(self.merges_file, "w", encoding="utf-8") as fp: with open(self.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges)) fp.write("\n".join(merges))
......
...@@ -65,7 +65,7 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -65,7 +65,7 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding(self, max_length=15): def test_padding(self, max_length=15):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
# Simple input # Simple input
......
...@@ -167,7 +167,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -167,7 +167,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self): def test_embeded_special_tokens(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = "A, <mask> AllenNLP sentence." sentence = "A, <mask> AllenNLP sentence."
......
...@@ -312,7 +312,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -312,7 +312,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_offsets_with_special_characters(self): def test_offsets_with_special_characters(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list: for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence." sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
...@@ -807,18 +807,18 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -807,18 +807,18 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
empty_tokens = tokenizer(table, padding=True, pad_to_multiple_of=8) empty_tokens = tokenizer(table, padding=True, pad_to_multiple_of=8)
normal_tokens = tokenizer(table, "This is a sample input", padding=True, pad_to_multiple_of=8) normal_tokens = tokenizer(table, "This is a sample input", padding=True, pad_to_multiple_of=8)
for key, value in empty_tokens.items(): for key, value in empty_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
normal_tokens = tokenizer(table, "This", pad_to_multiple_of=8) normal_tokens = tokenizer(table, "This", pad_to_multiple_of=8)
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
# Should also work with truncation # Should also work with truncation
normal_tokens = tokenizer(table, "This", padding=True, truncation=True, pad_to_multiple_of=8) normal_tokens = tokenizer(table, "This", padding=True, truncation=True, pad_to_multiple_of=8)
for key, value in normal_tokens.items(): for key, value in normal_tokens.items():
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
@unittest.skip("TAPAS cannot handle `prepare_for_model` without passing by `encode_plus` or `batch_encode_plus`") @unittest.skip("TAPAS cannot handle `prepare_for_model` without passing by `encode_plus` or `batch_encode_plus`")
def test_prepare_for_model(self): def test_prepare_for_model(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment