Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
...@@ -129,9 +129,8 @@ class FillMaskPipeline(Pipeline): ...@@ -129,9 +129,8 @@ class FillMaskPipeline(Pipeline):
target_enc = self.tokenizer.tokenize(target) target_enc = self.tokenizer.tokenize(target)
if len(target_enc) > 1 or target_enc[0] == self.tokenizer.unk_token: if len(target_enc) > 1 or target_enc[0] == self.tokenizer.unk_token:
logger.warning( logger.warning(
"The specified target token `{}` does not exist in the model vocabulary. Replacing with `{}`.".format( f"The specified target token `{target}` does not exist in the model vocabulary. "
target, target_enc[0] f"Replacing with `{target_enc[0]}`."
)
) )
targets_proc.append(target_enc[0]) targets_proc.append(target_enc[0])
target_inds = np.array(self.tokenizer.convert_tokens_to_ids(targets_proc)) target_inds = np.array(self.tokenizer.convert_tokens_to_ids(targets_proc))
......
...@@ -42,12 +42,12 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): ...@@ -42,12 +42,12 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
if k not in item: if k not in item:
raise KeyError("You need to provide a dictionary with keys {question:..., context:...}") raise KeyError("You need to provide a dictionary with keys {question:..., context:...}")
elif item[k] is None: elif item[k] is None:
raise ValueError("`{}` cannot be None".format(k)) raise ValueError(f"`{k}` cannot be None")
elif isinstance(item[k], str) and len(item[k]) == 0: elif isinstance(item[k], str) and len(item[k]) == 0:
raise ValueError("`{}` cannot be empty".format(k)) raise ValueError(f"`{k}` cannot be empty")
return QuestionAnsweringPipeline.create_sample(**item) return QuestionAnsweringPipeline.create_sample(**item)
raise ValueError("{} argument needs to be of type (SquadExample, dict)".format(item)) raise ValueError(f"{item} argument needs to be of type (SquadExample, dict)")
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
# Detect where the actual inputs are # Detect where the actual inputs are
...@@ -77,7 +77,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): ...@@ -77,7 +77,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
else: else:
raise ValueError("Arguments can't be understood") raise ValueError("Arguments can't be understood")
else: else:
raise ValueError("Unknown arguments {}".format(kwargs)) raise ValueError(f"Unknown arguments {kwargs}")
# Normalize inputs # Normalize inputs
if isinstance(inputs, dict): if isinstance(inputs, dict):
...@@ -86,7 +86,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): ...@@ -86,7 +86,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
# Copy to avoid overriding arguments # Copy to avoid overriding arguments
inputs = [i for i in inputs] inputs = [i for i in inputs]
else: else:
raise ValueError("Invalid arguments {}".format(inputs)) raise ValueError(f"Invalid arguments {kwargs}")
for i, item in enumerate(inputs): for i, item in enumerate(inputs):
inputs[i] = self.normalize(item) inputs[i] = self.normalize(item)
...@@ -210,10 +210,10 @@ class QuestionAnsweringPipeline(Pipeline): ...@@ -210,10 +210,10 @@ class QuestionAnsweringPipeline(Pipeline):
kwargs.setdefault("handle_impossible_answer", False) kwargs.setdefault("handle_impossible_answer", False)
if kwargs["topk"] < 1: if kwargs["topk"] < 1:
raise ValueError("topk parameter should be >= 1 (got {})".format(kwargs["topk"])) raise ValueError(f"topk parameter should be >= 1 (got {kwargs['topk']})")
if kwargs["max_answer_len"] < 1: if kwargs["max_answer_len"] < 1:
raise ValueError("max_answer_len parameter should be >= 1 (got {})".format(kwargs["max_answer_len"])) raise ValueError(f"max_answer_len parameter should be >= 1 (got {(kwargs['max_answer_len'])}")
# Convert inputs to features # Convert inputs to features
examples = self._args_parser(*args, **kwargs) examples = self._args_parser(*args, **kwargs)
......
...@@ -101,9 +101,7 @@ class Text2TextGenerationPipeline(Pipeline): ...@@ -101,9 +101,7 @@ class Text2TextGenerationPipeline(Pipeline):
padding = False padding = False
else: else:
raise ValueError( raise ValueError(
" `args[0]`: {} have the wrong format. The should be either of type `str` or type `list`".format( f" `args[0]`: {args[0]} have the wrong format. The should be either of type `str` or type `list`"
args[0]
)
) )
with self.device_placement(): with self.device_placement():
...@@ -198,16 +196,14 @@ class SummarizationPipeline(Text2TextGenerationPipeline): ...@@ -198,16 +196,14 @@ class SummarizationPipeline(Text2TextGenerationPipeline):
""" """
if input_length < min_length // 2: if input_length < min_length // 2:
logger.warning( logger.warning(
"Your min_length is set to {}, but you input_length is only {}. You might consider decreasing min_length manually, e.g. summarizer('...', min_length=10)".format( f"Your min_length is set to {min_length}, but you input_length is only {input_length}. You might "
min_length, input_length "consider decreasing min_length manually, e.g. summarizer('...', min_length=10)"
)
) )
if input_length < max_length: if input_length < max_length:
logger.warning( logger.warning(
"Your max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)".format( f"Your max_length is set to {max_length}, but you input_length is only {input_length}. You might "
max_length, input_length "consider decreasing max_length manually, e.g. summarizer('...', max_length=50)"
)
) )
...@@ -234,9 +230,8 @@ class TranslationPipeline(Text2TextGenerationPipeline): ...@@ -234,9 +230,8 @@ class TranslationPipeline(Text2TextGenerationPipeline):
def check_inputs(self, input_length: int, min_length: int, max_length: int): def check_inputs(self, input_length: int, min_length: int, max_length: int):
if input_length > 0.9 * max_length: if input_length > 0.9 * max_length:
logger.warning( logger.warning(
"Your input_length: {} is bigger than 0.9 * max_length: {}. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)".format( f"Your input_length: {input_length} is bigger than 0.9 * max_length: {max_length}. You might consider "
input_length, max_length "increasing your max_length manually, e.g. translator('...', max_length=400)"
)
) )
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
......
...@@ -176,7 +176,7 @@ class SageMakerTrainer(Trainer): ...@@ -176,7 +176,7 @@ class SageMakerTrainer(Trainer):
return return
output_dir = output_dir if output_dir is not None else self.args.output_dir output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
logger.info("Saving model checkpoint to %s", output_dir) logger.info(f"Saving model checkpoint to {output_dir}")
# Calling the state_dict needs to be done on the wrapped model # Calling the state_dict needs to be done on the wrapped model
state_dict = self.model_wrapped.state_dict() state_dict = self.model_wrapped.state_dict()
......
...@@ -62,7 +62,7 @@ def parse_flag_from_env(key, default=False): ...@@ -62,7 +62,7 @@ def parse_flag_from_env(key, default=False):
_value = strtobool(value) _value = strtobool(value)
except ValueError: except ValueError:
# More values are supported, but let's keep the message simple. # More values are supported, but let's keep the message simple.
raise ValueError("If set, {} must be yes or no.".format(key)) raise ValueError(f"If set, {key} must be yes or no.")
return _value return _value
...@@ -75,7 +75,7 @@ def parse_int_from_env(key, default=None): ...@@ -75,7 +75,7 @@ def parse_int_from_env(key, default=None):
try: try:
_value = int(value) _value = int(value)
except ValueError: except ValueError:
raise ValueError("If set, {} must be a int.".format(key)) raise ValueError(f"If set, {key} must be a int.")
return _value return _value
......
...@@ -190,7 +190,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): ...@@ -190,7 +190,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
): ):
tokens_to_add.append(token) tokens_to_add.append(token)
if self.verbose: if self.verbose:
logger.info("Adding %s to the vocabulary", token) logger.info(f"Adding {token} to the vocabulary")
added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add)) added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
......
...@@ -685,7 +685,7 @@ class BatchEncoding(UserDict): ...@@ -685,7 +685,7 @@ class BatchEncoding(UserDict):
# (mfuntowicz: This code is unreachable) # (mfuntowicz: This code is unreachable)
# else: # else:
# raise ImportError( # raise ImportError(
# "Unable to convert output to tensors format {}".format(tensor_type) # f"Unable to convert output to tensors format {tensor_type}"
# ) # )
# Do the tensor conversion in batch # Do the tensor conversion in batch
...@@ -805,9 +805,7 @@ class SpecialTokensMixin: ...@@ -805,9 +805,7 @@ class SpecialTokensMixin:
elif isinstance(value, (str, AddedToken)): elif isinstance(value, (str, AddedToken)):
setattr(self, key, value) setattr(self, key, value)
else: else:
raise TypeError( raise TypeError(f"special token {key} has to be either str or AddedToken but got: {type(value)}")
"special token {} has to be either str or AddedToken but got: {}".format(key, type(value))
)
def sanitize_special_tokens(self) -> int: def sanitize_special_tokens(self) -> int:
""" """
...@@ -872,7 +870,7 @@ class SpecialTokensMixin: ...@@ -872,7 +870,7 @@ class SpecialTokensMixin:
assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token" assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token"
if self.verbose: if self.verbose:
logger.info("Assigning %s to the %s key of the tokenizer", value, key) logger.info(f"Assigning {value} to the {key} key of the tokenizer")
setattr(self, key, value) setattr(self, key, value)
if key == "additional_special_tokens": if key == "additional_special_tokens":
...@@ -1866,7 +1864,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): ...@@ -1866,7 +1864,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
A tuple of :obj:`str`: The files saved. A tuple of :obj:`str`: The files saved.
""" """
if os.path.isfile(save_directory): if os.path.isfile(save_directory):
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
return return
os.makedirs(save_directory, exist_ok=True) os.makedirs(save_directory, exist_ok=True)
...@@ -3137,8 +3135,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): ...@@ -3137,8 +3135,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False): if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False):
logger.warning( logger.warning(
"Token indices sequence length is longer than the specified maximum sequence length " "Token indices sequence length is longer than the specified maximum sequence length "
"for this model ({} > {}). Running this sequence through the model will result in " f"for this model ({len(ids)} > {self.model_max_length}). Running this sequence through the model "
"indexing errors".format(len(ids), self.model_max_length) "will result in indexing errors"
) )
self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True
......
...@@ -362,9 +362,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ...@@ -362,9 +362,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
) -> BatchEncoding: ) -> BatchEncoding:
if not isinstance(batch_text_or_text_pairs, list): if not isinstance(batch_text_or_text_pairs, list):
raise TypeError( raise TypeError(f"batch_text_or_text_pairs has to be a list (got {type(batch_text_or_text_pairs)})")
"batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs))
)
# Set the truncation and padding strategy and restore the initial configuration # Set the truncation and padding strategy and restore the initial configuration
self.set_truncation_and_padding( self.set_truncation_and_padding(
......
...@@ -1567,7 +1567,7 @@ class Trainer: ...@@ -1567,7 +1567,7 @@ class Trainer:
def _save_tpu(self, output_dir: Optional[str] = None): def _save_tpu(self, output_dir: Optional[str] = None):
output_dir = output_dir if output_dir is not None else self.args.output_dir output_dir = output_dir if output_dir is not None else self.args.output_dir
logger.info("Saving model checkpoint to %s", output_dir) logger.info(f"Saving model checkpoint to {output_dir}")
if xm.is_master_ordinal(): if xm.is_master_ordinal():
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
...@@ -1597,7 +1597,7 @@ class Trainer: ...@@ -1597,7 +1597,7 @@ class Trainer:
# If we are executing this function, we are the process zero, so we don't check for that. # If we are executing this function, we are the process zero, so we don't check for that.
output_dir = output_dir if output_dir is not None else self.args.output_dir output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
logger.info("Saving model checkpoint to %s", output_dir) logger.info(f"Saving model checkpoint to {output_dir}")
# Save a trained model and configuration using `save_pretrained()`. # Save a trained model and configuration using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()` # They can then be reloaded using `from_pretrained()`
if not isinstance(self.model, PreTrainedModel): if not isinstance(self.model, PreTrainedModel):
...@@ -1664,7 +1664,7 @@ class Trainer: ...@@ -1664,7 +1664,7 @@ class Trainer:
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit) number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit)
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete] checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
for checkpoint in checkpoints_to_be_deleted: for checkpoint in checkpoints_to_be_deleted:
logger.info("Deleting older checkpoint [{}] due to args.save_total_limit".format(checkpoint)) logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
shutil.rmtree(checkpoint) shutil.rmtree(checkpoint)
def evaluate( def evaluate(
...@@ -1814,9 +1814,9 @@ class Trainer: ...@@ -1814,9 +1814,9 @@ class Trainer:
batch_size = dataloader.batch_size batch_size = dataloader.batch_size
num_examples = self.num_examples(dataloader) num_examples = self.num_examples(dataloader)
logger.info("***** Running %s *****", description) logger.info(f"***** Running {description} *****")
logger.info(" Num examples = %d", num_examples) logger.info(f" Num examples = {num_examples}")
logger.info(" Batch size = %d", batch_size) logger.info(f" Batch size = {batch_size}")
losses_host: torch.Tensor = None losses_host: torch.Tensor = None
preds_host: Union[torch.Tensor, List[torch.Tensor]] = None preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
labels_host: Union[torch.Tensor, List[torch.Tensor]] = None labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
......
...@@ -303,11 +303,11 @@ class TFTrainer: ...@@ -303,11 +303,11 @@ class TFTrainer:
prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
) )
logger.info("***** Running %s *****", description) logger.info(f"***** Running {description} *****")
logger.info(" Num examples in dataset = %d", num_examples) logger.info(f" Num examples in dataset = {num_examples}")
if description == "Evaluation": if description == "Evaluation":
logger.info(" Num examples in used in evaluation = %d", self.args.eval_batch_size * steps) logger.info(f" Num examples in used in evaluation = {self.args.eval_batch_size * steps}")
logger.info(" Batch size = %d", self.args.eval_batch_size) logger.info(f" Batch size = {self.args.eval_batch_size}")
label_ids: np.ndarray = None label_ids: np.ndarray = None
preds: np.ndarray = None preds: np.ndarray = None
...@@ -504,7 +504,7 @@ class TFTrainer: ...@@ -504,7 +504,7 @@ class TFTrainer:
if self.model.ckpt_manager.latest_checkpoint: if self.model.ckpt_manager.latest_checkpoint:
logger.info( logger.info(
"Checkpoint file %s found and restoring from checkpoint", self.model.ckpt_manager.latest_checkpoint f"Checkpoint file {self.model.ckpt_manager.latest_checkpoint} found and restoring from checkpoint"
) )
ckpt.restore(self.model.ckpt_manager.latest_checkpoint).expect_partial() ckpt.restore(self.model.ckpt_manager.latest_checkpoint).expect_partial()
...@@ -514,9 +514,9 @@ class TFTrainer: ...@@ -514,9 +514,9 @@ class TFTrainer:
steps_trained_in_current_epoch = self.global_step % self.steps_per_epoch steps_trained_in_current_epoch = self.global_step % self.steps_per_epoch
logger.info(" Continuing training from checkpoint, will skip to saved global_step") logger.info(" Continuing training from checkpoint, will skip to saved global_step")
logger.info(" Continuing training from epoch %d", epochs_trained) logger.info(f" Continuing training from epoch {epochs_trained}")
logger.info(" Continuing training from global step %d", self.global_step) logger.info(f" Continuing training from global step {self.global_step}")
logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) logger.info(f" Will skip the first {steps_trained_in_current_epoch} steps in the first epoch")
tf.summary.experimental.set_step(self.global_step) tf.summary.experimental.set_step(self.global_step)
...@@ -526,16 +526,16 @@ class TFTrainer: ...@@ -526,16 +526,16 @@ class TFTrainer:
self.tb_writer.flush() self.tb_writer.flush()
logger.info("***** Running training *****") logger.info("***** Running training *****")
logger.info(" Num examples = %d", self.num_train_examples) logger.info(f" Num examples = {self.num_train_examples}")
# TODO: We might want to print a more precise ``epochs`` if self.args.max_steps > 0 ? # TODO: We might want to print a more precise ``epochs`` if self.args.max_steps > 0 ?
logger.info(" Num Epochs = %d", epochs) logger.info(f" Num Epochs = {epochs}")
logger.info(" Instantaneous batch size per device = %d", self.args.per_device_train_batch_size) logger.info(f" Instantaneous batch size per device = {self.args.per_device_train_batch_size}")
logger.info( logger.info(
" Total train batch size (w. parallel, distributed & accumulation) = %d", self.total_train_batch_size f" Total train batch size (w. parallel, distributed & accumulation) = {self.total_train_batch_size}"
) )
logger.info(" Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps) logger.info(f" Gradient Accumulation steps = {self.args.gradient_accumulation_steps}")
logger.info(" Steps per epoch = %d", self.steps_per_epoch) logger.info(f" Steps per epoch = {self.steps_per_epoch}")
logger.info(" Total optimization steps = %d", t_total) logger.info(f" Total optimization steps = {t_total}")
self.train_loss = tf.keras.metrics.Sum() self.train_loss = tf.keras.metrics.Sum()
start_time = datetime.datetime.now() start_time = datetime.datetime.now()
...@@ -592,7 +592,7 @@ class TFTrainer: ...@@ -592,7 +592,7 @@ class TFTrainer:
if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0: if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0:
ckpt_save_path = self.model.ckpt_manager.save() ckpt_save_path = self.model.ckpt_manager.save()
logger.info("Saving checkpoint for step {} at {}".format(self.global_step, ckpt_save_path)) logger.info(f"Saving checkpoint for step {self.global_step} at {ckpt_save_path}")
if self.args.max_steps > 0 and self.global_step >= t_total: if self.args.max_steps > 0 and self.global_step >= t_total:
break break
...@@ -607,7 +607,7 @@ class TFTrainer: ...@@ -607,7 +607,7 @@ class TFTrainer:
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
logger.info("Training took: {}".format(str(end_time - start_time))) logger.info(f"Training took: {str(end_time - start_time)}")
if self.args.past_index and hasattr(self, "_past"): if self.args.past_index and hasattr(self, "_past"):
# Clean the state at the end of training # Clean the state at the end of training
...@@ -782,7 +782,7 @@ class TFTrainer: ...@@ -782,7 +782,7 @@ class TFTrainer:
""" """
output_dir = output_dir if output_dir is not None else self.args.output_dir output_dir = output_dir if output_dir is not None else self.args.output_dir
logger.info("Saving model in {}".format(output_dir)) logger.info(f"Saving model in {output_dir}")
if not isinstance(self.model, TFPreTrainedModel): if not isinstance(self.model, TFPreTrainedModel):
raise ValueError("Trainer.model appears to not be a PreTrainedModel") raise ValueError("Trainer.model appears to not be a PreTrainedModel")
......
...@@ -236,7 +236,7 @@ def main(): ...@@ -236,7 +236,7 @@ def main():
# Set the verbosity to info of the Transformers logger (on main process only): # Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank): if is_main_process(training_args.local_rank):
transformers.utils.logging.set_verbosity_info() transformers.utils.logging.set_verbosity_info()
logger.info("Training/evaluation parameters %s", training_args) logger.info(f"Training/evaluation parameters {training_args}")
# Set seed before initializing model. # Set seed before initializing model.
set_seed(training_args.seed) set_seed(training_args.seed)
......
...@@ -357,7 +357,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): ...@@ -357,7 +357,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs): def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call( def call(
self, self,
......
...@@ -78,13 +78,13 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch ...@@ -78,13 +78,13 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
) )
raise raise
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model # Load weights from TF model
init_vars = tf.train.list_variables(tf_path) init_vars = tf.train.list_variables(tf_path)
names = [] names = []
arrays = [] arrays = []
for name, shape in init_vars: for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape)) logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name) array = tf.train.load_variable(tf_path, name)
names.append(name) names.append(name)
arrays.append(array) arrays.append(array)
...@@ -97,7 +97,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch ...@@ -97,7 +97,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name for n in name
): ):
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
pointer = model pointer = model
for m_name in name: for m_name in name:
...@@ -117,7 +117,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch ...@@ -117,7 +117,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
try: try:
pointer = getattr(pointer, scope_names[0]) pointer = getattr(pointer, scope_names[0])
except AttributeError: except AttributeError:
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
if len(scope_names) >= 2: if len(scope_names) >= 2:
num = int(scope_names[1]) num = int(scope_names[1])
...@@ -133,7 +133,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch ...@@ -133,7 +133,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise
logger.info("Initialize PyTorch weight {}".format(name)) logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array) pointer.data = torch.from_numpy(array)
return model return model
...@@ -196,8 +196,8 @@ class {{cookiecutter.camelcase_modelname}}SelfAttention(nn.Module): ...@@ -196,8 +196,8 @@ class {{cookiecutter.camelcase_modelname}}SelfAttention(nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
......
...@@ -585,10 +585,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -585,10 +585,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -86,6 +86,6 @@ if __name__ == "__main__": ...@@ -86,6 +86,6 @@ if __name__ == "__main__":
end_train_time = time.time() - start_train_time end_train_time = time.time() - start_train_time
logger.info("*** Train ***") logger.info("*** Train ***")
logger.info("train_runtime = %s", end_train_time) logger.info(f"train_runtime = {end_train_time}")
for key, value in train_results.history.items(): for key, value in train_results.history.items():
logger.info(" %s = %s", key, value) logger.info(f" {key} = {value}")
...@@ -157,7 +157,7 @@ if __name__ == "__main__": ...@@ -157,7 +157,7 @@ if __name__ == "__main__":
) )
end_train_time = time.time() - start_train_time end_train_time = time.time() - start_train_time
logger.info("*** Train ***") logger.info("*** Train ***")
logger.info("train_runtime = %s", end_train_time) logger.info(f"train_runtime = {end_train_time}")
output_eval_file = os.path.join(args.output_dir, "train_results.txt") output_eval_file = os.path.join(args.output_dir, "train_results.txt")
...@@ -166,8 +166,8 @@ if __name__ == "__main__": ...@@ -166,8 +166,8 @@ if __name__ == "__main__":
logger.info("***** Train results *****") logger.info("***** Train results *****")
logger.info(train_results) logger.info(train_results)
for key, value in train_results.items(): for key, value in train_results.items():
logger.info(" %s = %s", key, value) logger.info(f" {key} = {value}")
writer.write("%s = %s\n" % (key, value)) writer.write(f"{key} = {value}\n")
# Evaluation # Evaluation
if args.do_eval and (not SDP_ENABLED or sdp.rank() == 0): if args.do_eval and (not SDP_ENABLED or sdp.rank() == 0):
...@@ -181,8 +181,8 @@ if __name__ == "__main__": ...@@ -181,8 +181,8 @@ if __name__ == "__main__":
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
logger.info(result) logger.info(result)
for key, value in result.items(): for key, value in result.items():
logger.info(" %s = %s", key, value) logger.info(f" {key} = {value}")
writer.write("%s = %s\n" % (key, value)) writer.write(f"{key} = {value}\n")
# Save result # Save result
if SDP_ENABLED: if SDP_ENABLED:
......
...@@ -31,8 +31,8 @@ PASS = "__DUMMY_TRANSFORMERS_PASS__" ...@@ -31,8 +31,8 @@ PASS = "__DUMMY_TRANSFORMERS_PASS__"
ENDPOINT_STAGING = "https://moon-staging.huggingface.co" ENDPOINT_STAGING = "https://moon-staging.huggingface.co"
ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@moon-staging.huggingface.co" ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@moon-staging.huggingface.co"
REPO_NAME = "my-model-{}".format(int(time.time())) REPO_NAME = f"my-model-{int(time.time())}"
REPO_NAME_LARGE_FILE = "my-model-largefiles-{}".format(int(time.time())) REPO_NAME_LARGE_FILE = f"my-model-largefiles-{int(time.time())}"
WORKING_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/working_repo") WORKING_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/working_repo")
LARGE_FILE_14MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.epub" LARGE_FILE_14MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.epub"
LARGE_FILE_18MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.pdf" LARGE_FILE_18MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.pdf"
...@@ -95,7 +95,7 @@ class HfFolderTest(unittest.TestCase): ...@@ -95,7 +95,7 @@ class HfFolderTest(unittest.TestCase):
Test the whole token save/get/delete workflow, Test the whole token save/get/delete workflow,
with the desired behavior with respect to non-existent tokens. with the desired behavior with respect to non-existent tokens.
""" """
token = "token-{}".format(int(time.time())) token = f"token-{int(time.time())}"
HfFolder.save_token(token) HfFolder.save_token(token)
self.assertEqual(HfFolder.get_token(), token) self.assertEqual(HfFolder.get_token(), token)
HfFolder.delete_token() HfFolder.delete_token()
......
...@@ -172,7 +172,7 @@ class ModelTesterMixin: ...@@ -172,7 +172,7 @@ class ModelTesterMixin:
self.assertIn( self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(), ((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0], [0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
def test_determinism(self): def test_determinism(self):
...@@ -928,7 +928,7 @@ class ModelTesterMixin: ...@@ -928,7 +928,7 @@ class ModelTesterMixin:
model.base_model.save_pretrained(temp_dir_name) model.base_model.save_pretrained(temp_dir_name)
model, loading_info = model_class.from_pretrained(temp_dir_name, output_loading_info=True) model, loading_info = model_class.from_pretrained(temp_dir_name, output_loading_info=True)
with self.subTest(msg="Missing keys for {}".format(model.__class__.__name__)): with self.subTest(msg=f"Missing keys for {model.__class__.__name__}"):
self.assertGreater(len(loading_info["missing_keys"]), 0) self.assertGreater(len(loading_info["missing_keys"]), 0)
def test_tie_model_weights(self): def test_tie_model_weights(self):
......
...@@ -365,10 +365,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -365,10 +365,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) if len(prefix) > 0:
if prefix: prefix = f"{prefix}: "
msg = prefix + ": " + msg raise AssertionError(f"{prefix}{a} != {b}")
raise AssertionError(msg)
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
......
...@@ -74,7 +74,7 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): ...@@ -74,7 +74,7 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
return True return True
raise raise
except Exception: except Exception:
msg = "{} != {}".format(a, b) msg = f"{a} != {b}"
if prefix: if prefix:
msg = prefix + ": " + msg msg = prefix + ": " + msg
raise AssertionError(msg) raise AssertionError(msg)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment