Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
......@@ -129,9 +129,8 @@ class FillMaskPipeline(Pipeline):
target_enc = self.tokenizer.tokenize(target)
if len(target_enc) > 1 or target_enc[0] == self.tokenizer.unk_token:
logger.warning(
"The specified target token `{}` does not exist in the model vocabulary. Replacing with `{}`.".format(
target, target_enc[0]
)
f"The specified target token `{target}` does not exist in the model vocabulary. "
f"Replacing with `{target_enc[0]}`."
)
targets_proc.append(target_enc[0])
target_inds = np.array(self.tokenizer.convert_tokens_to_ids(targets_proc))
......
......@@ -42,12 +42,12 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
if k not in item:
raise KeyError("You need to provide a dictionary with keys {question:..., context:...}")
elif item[k] is None:
raise ValueError("`{}` cannot be None".format(k))
raise ValueError(f"`{k}` cannot be None")
elif isinstance(item[k], str) and len(item[k]) == 0:
raise ValueError("`{}` cannot be empty".format(k))
raise ValueError(f"`{k}` cannot be empty")
return QuestionAnsweringPipeline.create_sample(**item)
raise ValueError("{} argument needs to be of type (SquadExample, dict)".format(item))
raise ValueError(f"{item} argument needs to be of type (SquadExample, dict)")
def __call__(self, *args, **kwargs):
# Detect where the actual inputs are
......@@ -77,7 +77,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
else:
raise ValueError("Arguments can't be understood")
else:
raise ValueError("Unknown arguments {}".format(kwargs))
raise ValueError(f"Unknown arguments {kwargs}")
# Normalize inputs
if isinstance(inputs, dict):
......@@ -86,7 +86,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
# Copy to avoid overriding arguments
inputs = [i for i in inputs]
else:
raise ValueError("Invalid arguments {}".format(inputs))
raise ValueError(f"Invalid arguments {kwargs}")
for i, item in enumerate(inputs):
inputs[i] = self.normalize(item)
......@@ -210,10 +210,10 @@ class QuestionAnsweringPipeline(Pipeline):
kwargs.setdefault("handle_impossible_answer", False)
if kwargs["topk"] < 1:
raise ValueError("topk parameter should be >= 1 (got {})".format(kwargs["topk"]))
raise ValueError(f"topk parameter should be >= 1 (got {kwargs['topk']})")
if kwargs["max_answer_len"] < 1:
raise ValueError("max_answer_len parameter should be >= 1 (got {})".format(kwargs["max_answer_len"]))
raise ValueError(f"max_answer_len parameter should be >= 1 (got {(kwargs['max_answer_len'])}")
# Convert inputs to features
examples = self._args_parser(*args, **kwargs)
......
......@@ -101,9 +101,7 @@ class Text2TextGenerationPipeline(Pipeline):
padding = False
else:
raise ValueError(
" `args[0]`: {} have the wrong format. The should be either of type `str` or type `list`".format(
args[0]
)
f" `args[0]`: {args[0]} have the wrong format. The should be either of type `str` or type `list`"
)
with self.device_placement():
......@@ -198,16 +196,14 @@ class SummarizationPipeline(Text2TextGenerationPipeline):
"""
if input_length < min_length // 2:
logger.warning(
"Your min_length is set to {}, but you input_length is only {}. You might consider decreasing min_length manually, e.g. summarizer('...', min_length=10)".format(
min_length, input_length
)
f"Your min_length is set to {min_length}, but you input_length is only {input_length}. You might "
"consider decreasing min_length manually, e.g. summarizer('...', min_length=10)"
)
if input_length < max_length:
logger.warning(
"Your max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)".format(
max_length, input_length
)
f"Your max_length is set to {max_length}, but you input_length is only {input_length}. You might "
"consider decreasing max_length manually, e.g. summarizer('...', max_length=50)"
)
......@@ -234,9 +230,8 @@ class TranslationPipeline(Text2TextGenerationPipeline):
def check_inputs(self, input_length: int, min_length: int, max_length: int):
if input_length > 0.9 * max_length:
logger.warning(
"Your input_length: {} is bigger than 0.9 * max_length: {}. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)".format(
input_length, max_length
)
f"Your input_length: {input_length} is bigger than 0.9 * max_length: {max_length}. You might consider "
"increasing your max_length manually, e.g. translator('...', max_length=400)"
)
def __call__(self, *args, **kwargs):
......
......@@ -176,7 +176,7 @@ class SageMakerTrainer(Trainer):
return
output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True)
logger.info("Saving model checkpoint to %s", output_dir)
logger.info(f"Saving model checkpoint to {output_dir}")
# Calling the state_dict needs to be done on the wrapped model
state_dict = self.model_wrapped.state_dict()
......
......@@ -62,7 +62,7 @@ def parse_flag_from_env(key, default=False):
_value = strtobool(value)
except ValueError:
# More values are supported, but let's keep the message simple.
raise ValueError("If set, {} must be yes or no.".format(key))
raise ValueError(f"If set, {key} must be yes or no.")
return _value
......@@ -75,7 +75,7 @@ def parse_int_from_env(key, default=None):
try:
_value = int(value)
except ValueError:
raise ValueError("If set, {} must be a int.".format(key))
raise ValueError(f"If set, {key} must be a int.")
return _value
......
......@@ -190,7 +190,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
):
tokens_to_add.append(token)
if self.verbose:
logger.info("Adding %s to the vocabulary", token)
logger.info(f"Adding {token} to the vocabulary")
added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
......
......@@ -685,7 +685,7 @@ class BatchEncoding(UserDict):
# (mfuntowicz: This code is unreachable)
# else:
# raise ImportError(
# "Unable to convert output to tensors format {}".format(tensor_type)
# f"Unable to convert output to tensors format {tensor_type}"
# )
# Do the tensor conversion in batch
......@@ -805,9 +805,7 @@ class SpecialTokensMixin:
elif isinstance(value, (str, AddedToken)):
setattr(self, key, value)
else:
raise TypeError(
"special token {} has to be either str or AddedToken but got: {}".format(key, type(value))
)
raise TypeError(f"special token {key} has to be either str or AddedToken but got: {type(value)}")
def sanitize_special_tokens(self) -> int:
"""
......@@ -872,7 +870,7 @@ class SpecialTokensMixin:
assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token"
if self.verbose:
logger.info("Assigning %s to the %s key of the tokenizer", value, key)
logger.info(f"Assigning {value} to the {key} key of the tokenizer")
setattr(self, key, value)
if key == "additional_special_tokens":
......@@ -1866,7 +1864,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
A tuple of :obj:`str`: The files saved.
"""
if os.path.isfile(save_directory):
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
return
os.makedirs(save_directory, exist_ok=True)
......@@ -3137,8 +3135,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False):
logger.warning(
"Token indices sequence length is longer than the specified maximum sequence length "
"for this model ({} > {}). Running this sequence through the model will result in "
"indexing errors".format(len(ids), self.model_max_length)
f"for this model ({len(ids)} > {self.model_max_length}). Running this sequence through the model "
"will result in indexing errors"
)
self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True
......
......@@ -362,9 +362,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
) -> BatchEncoding:
if not isinstance(batch_text_or_text_pairs, list):
raise TypeError(
"batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs))
)
raise TypeError(f"batch_text_or_text_pairs has to be a list (got {type(batch_text_or_text_pairs)})")
# Set the truncation and padding strategy and restore the initial configuration
self.set_truncation_and_padding(
......
......@@ -1567,7 +1567,7 @@ class Trainer:
def _save_tpu(self, output_dir: Optional[str] = None):
output_dir = output_dir if output_dir is not None else self.args.output_dir
logger.info("Saving model checkpoint to %s", output_dir)
logger.info(f"Saving model checkpoint to {output_dir}")
if xm.is_master_ordinal():
os.makedirs(output_dir, exist_ok=True)
......@@ -1597,7 +1597,7 @@ class Trainer:
# If we are executing this function, we are the process zero, so we don't check for that.
output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True)
logger.info("Saving model checkpoint to %s", output_dir)
logger.info(f"Saving model checkpoint to {output_dir}")
# Save a trained model and configuration using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
if not isinstance(self.model, PreTrainedModel):
......@@ -1664,7 +1664,7 @@ class Trainer:
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit)
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
for checkpoint in checkpoints_to_be_deleted:
logger.info("Deleting older checkpoint [{}] due to args.save_total_limit".format(checkpoint))
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
shutil.rmtree(checkpoint)
def evaluate(
......@@ -1814,9 +1814,9 @@ class Trainer:
batch_size = dataloader.batch_size
num_examples = self.num_examples(dataloader)
logger.info("***** Running %s *****", description)
logger.info(" Num examples = %d", num_examples)
logger.info(" Batch size = %d", batch_size)
logger.info(f"***** Running {description} *****")
logger.info(f" Num examples = {num_examples}")
logger.info(f" Batch size = {batch_size}")
losses_host: torch.Tensor = None
preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
......
......@@ -303,11 +303,11 @@ class TFTrainer:
prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
)
logger.info("***** Running %s *****", description)
logger.info(" Num examples in dataset = %d", num_examples)
logger.info(f"***** Running {description} *****")
logger.info(f" Num examples in dataset = {num_examples}")
if description == "Evaluation":
logger.info(" Num examples in used in evaluation = %d", self.args.eval_batch_size * steps)
logger.info(" Batch size = %d", self.args.eval_batch_size)
logger.info(f" Num examples in used in evaluation = {self.args.eval_batch_size * steps}")
logger.info(f" Batch size = {self.args.eval_batch_size}")
label_ids: np.ndarray = None
preds: np.ndarray = None
......@@ -504,7 +504,7 @@ class TFTrainer:
if self.model.ckpt_manager.latest_checkpoint:
logger.info(
"Checkpoint file %s found and restoring from checkpoint", self.model.ckpt_manager.latest_checkpoint
f"Checkpoint file {self.model.ckpt_manager.latest_checkpoint} found and restoring from checkpoint"
)
ckpt.restore(self.model.ckpt_manager.latest_checkpoint).expect_partial()
......@@ -514,9 +514,9 @@ class TFTrainer:
steps_trained_in_current_epoch = self.global_step % self.steps_per_epoch
logger.info(" Continuing training from checkpoint, will skip to saved global_step")
logger.info(" Continuing training from epoch %d", epochs_trained)
logger.info(" Continuing training from global step %d", self.global_step)
logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
logger.info(f" Continuing training from epoch {epochs_trained}")
logger.info(f" Continuing training from global step {self.global_step}")
logger.info(f" Will skip the first {steps_trained_in_current_epoch} steps in the first epoch")
tf.summary.experimental.set_step(self.global_step)
......@@ -526,16 +526,16 @@ class TFTrainer:
self.tb_writer.flush()
logger.info("***** Running training *****")
logger.info(" Num examples = %d", self.num_train_examples)
logger.info(f" Num examples = {self.num_train_examples}")
# TODO: We might want to print a more precise ``epochs`` if self.args.max_steps > 0 ?
logger.info(" Num Epochs = %d", epochs)
logger.info(" Instantaneous batch size per device = %d", self.args.per_device_train_batch_size)
logger.info(f" Num Epochs = {epochs}")
logger.info(f" Instantaneous batch size per device = {self.args.per_device_train_batch_size}")
logger.info(
" Total train batch size (w. parallel, distributed & accumulation) = %d", self.total_train_batch_size
f" Total train batch size (w. parallel, distributed & accumulation) = {self.total_train_batch_size}"
)
logger.info(" Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
logger.info(" Steps per epoch = %d", self.steps_per_epoch)
logger.info(" Total optimization steps = %d", t_total)
logger.info(f" Gradient Accumulation steps = {self.args.gradient_accumulation_steps}")
logger.info(f" Steps per epoch = {self.steps_per_epoch}")
logger.info(f" Total optimization steps = {t_total}")
self.train_loss = tf.keras.metrics.Sum()
start_time = datetime.datetime.now()
......@@ -592,7 +592,7 @@ class TFTrainer:
if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0:
ckpt_save_path = self.model.ckpt_manager.save()
logger.info("Saving checkpoint for step {} at {}".format(self.global_step, ckpt_save_path))
logger.info(f"Saving checkpoint for step {self.global_step} at {ckpt_save_path}")
if self.args.max_steps > 0 and self.global_step >= t_total:
break
......@@ -607,7 +607,7 @@ class TFTrainer:
end_time = datetime.datetime.now()
logger.info("Training took: {}".format(str(end_time - start_time)))
logger.info(f"Training took: {str(end_time - start_time)}")
if self.args.past_index and hasattr(self, "_past"):
# Clean the state at the end of training
......@@ -782,7 +782,7 @@ class TFTrainer:
"""
output_dir = output_dir if output_dir is not None else self.args.output_dir
logger.info("Saving model in {}".format(output_dir))
logger.info(f"Saving model in {output_dir}")
if not isinstance(self.model, TFPreTrainedModel):
raise ValueError("Trainer.model appears to not be a PreTrainedModel")
......
......@@ -236,7 +236,7 @@ def main():
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
transformers.utils.logging.set_verbosity_info()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")
# Set seed before initializing model.
set_seed(training_args.seed)
......
......@@ -357,7 +357,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call(
self,
......
......@@ -78,13 +78,13 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
......@@ -97,7 +97,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
pointer = model
for m_name in name:
......@@ -117,7 +117,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
......@@ -133,7 +133,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......@@ -196,8 +196,8 @@ class {{cookiecutter.camelcase_modelname}}SelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
......
......@@ -585,10 +585,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -86,6 +86,6 @@ if __name__ == "__main__":
end_train_time = time.time() - start_train_time
logger.info("*** Train ***")
logger.info("train_runtime = %s", end_train_time)
logger.info(f"train_runtime = {end_train_time}")
for key, value in train_results.history.items():
logger.info(" %s = %s", key, value)
logger.info(f" {key} = {value}")
......@@ -157,7 +157,7 @@ if __name__ == "__main__":
)
end_train_time = time.time() - start_train_time
logger.info("*** Train ***")
logger.info("train_runtime = %s", end_train_time)
logger.info(f"train_runtime = {end_train_time}")
output_eval_file = os.path.join(args.output_dir, "train_results.txt")
......@@ -166,8 +166,8 @@ if __name__ == "__main__":
logger.info("***** Train results *****")
logger.info(train_results)
for key, value in train_results.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Evaluation
if args.do_eval and (not SDP_ENABLED or sdp.rank() == 0):
......@@ -181,8 +181,8 @@ if __name__ == "__main__":
logger.info("***** Eval results *****")
logger.info(result)
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Save result
if SDP_ENABLED:
......
......@@ -31,8 +31,8 @@ PASS = "__DUMMY_TRANSFORMERS_PASS__"
ENDPOINT_STAGING = "https://moon-staging.huggingface.co"
ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@moon-staging.huggingface.co"
REPO_NAME = "my-model-{}".format(int(time.time()))
REPO_NAME_LARGE_FILE = "my-model-largefiles-{}".format(int(time.time()))
REPO_NAME = f"my-model-{int(time.time())}"
REPO_NAME_LARGE_FILE = f"my-model-largefiles-{int(time.time())}"
WORKING_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/working_repo")
LARGE_FILE_14MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.epub"
LARGE_FILE_18MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.pdf"
......@@ -95,7 +95,7 @@ class HfFolderTest(unittest.TestCase):
Test the whole token save/get/delete workflow,
with the desired behavior with respect to non-existent tokens.
"""
token = "token-{}".format(int(time.time()))
token = f"token-{int(time.time())}"
HfFolder.save_token(token)
self.assertEqual(HfFolder.get_token(), token)
HfFolder.delete_token()
......
......@@ -172,7 +172,7 @@ class ModelTesterMixin:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def test_determinism(self):
......@@ -928,7 +928,7 @@ class ModelTesterMixin:
model.base_model.save_pretrained(temp_dir_name)
model, loading_info = model_class.from_pretrained(temp_dir_name, output_loading_info=True)
with self.subTest(msg="Missing keys for {}".format(model.__class__.__name__)):
with self.subTest(msg=f"Missing keys for {model.__class__.__name__}"):
self.assertGreater(len(loading_info["missing_keys"]), 0)
def test_tie_model_weights(self):
......
......@@ -365,10 +365,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
if len(prefix) > 0:
prefix = f"{prefix}: "
raise AssertionError(f"{prefix}{a} != {b}")
def _long_tensor(tok_lst):
......
......@@ -74,7 +74,7 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
msg = f"{a} != {b}"
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment