Enforce string-formatting with f-strings (#10980)

* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Enforce string-formatting with f-strings (#10980)
* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
acc3bd9d · Sylvain Gugger · GitHub · d0b3797a · acc3bd9d · acc3bd9d
Unverified Commit acc3bd9d authored Mar 31, 2021 by Sylvain Gugger Committed by GitHub Mar 31, 2021
20 changed files
--- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py
@@ -67,7 +67,7 @@ def build_tf_to_pytorch_map(model, config):
        for i, (out_l, proj_l, tie_proj) in enumerate(
            zip(model.crit.out_layers, model.crit.out_projs, config.tie_projs)
        ):
-            layer_str = "transformer/adaptive_softmax/cutoff_%d/" % i
+            layer_str = f"transformer/adaptive_softmax/cutoff_{i}/"
            if config.tie_word_embeddings:
                tf_to_pt_map.update({layer_str + "b": out_l.bias})
            else:
@@ -81,12 +81,12 @@ def build_tf_to_pytorch_map(model, config):

    # Embeddings
    for i, (embed_l, proj_l) in enumerate(zip(model.word_emb.emb_layers, model.word_emb.emb_projs)):
-        layer_str = "transformer/adaptive_embed/cutoff_%d/" % i
+        layer_str = f"transformer/adaptive_embed/cutoff_{i}/"
        tf_to_pt_map.update({layer_str + "lookup_table": embed_l.weight, layer_str + "proj_W": proj_l})

    # Transformer blocks
    for i, b in enumerate(model.layers):
-        layer_str = "transformer/layer_%d/" % i
+        layer_str = f"transformer/layer_{i}/"
        tf_to_pt_map.update(
            {
                layer_str + "rel_attn/LayerNorm/gamma": b.dec_attn.layer_norm.weight,
@@ -135,7 +135,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
    init_vars = tf.train.list_variables(tf_path)
    tf_weights = {}
    for name, shape in init_vars:
-        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        tf_weights[name] = array

@@ -156,7 +156,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
                except AssertionError as e:
                    e.args += (p_i.shape, arr_i.shape)
                    raise
-                logger.info("Initialize PyTorch weight {} for layer {}".format(name, i))
+                logger.info(f"Initialize PyTorch weight {name} for layer {i}")
                p_i.data = torch.from_numpy(arr_i)
        else:
            try:
@@ -166,13 +166,13 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
-            logger.info("Initialize PyTorch weight {}".format(name))
+            logger.info(f"Initialize PyTorch weight {name}")
            pointer.data = torch.from_numpy(array)
        tf_weights.pop(name, None)
        tf_weights.pop(name + "/Adam", None)
        tf_weights.pop(name + "/Adam_1", None)

-    logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys())))
+    logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}")
    return model



--- a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py
@@ -198,7 +198,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        self.vocab_file = vocab_file
        self.never_split = never_split
        self.punctuation_symbols = '!"#$%&()*+,-./\\:;<=>?@[\\]^_`{|}~'
-        self.punction_without_space_before_pattern = re.compile(r"[^\s][{}]".format(self.punctuation_symbols))
+        self.punction_without_space_before_pattern = re.compile(rf"[^\s][{self.punctuation_symbols}]")
        self.punctuation_with_space_around_pattern = self._compile_space_around_punctuation_pattern()
        self.language = language
        self.moses_punct_normalizer = sm.MosesPunctNormalizer(language)
@@ -235,9 +235,9 @@ class TransfoXLTokenizer(PreTrainedTokenizer):

        except Exception as e:
            raise ValueError(
-                "Unable to parse file {}. Unknown format. "
+                f"Unable to parse file {pretrained_vocab_file}. Unknown format. "
                "If you tried to load a model saved through TransfoXLTokenizerFast,"
-                "please note they are not compatible.".format(pretrained_vocab_file)
+                "please note they are not compatible."
            ) from e

        if vocab_file is not None:
@@ -248,20 +248,20 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        return self.lower_case

    def _compile_space_around_punctuation_pattern(self):
-        look_ahead_for_special_token = "(?=[{}])".format(self.punctuation_symbols)
+        look_ahead_for_special_token = f"(?=[{self.punctuation_symbols}])"
        look_ahead_to_match_all_except_space = r"(?=[^\s])"
        return re.compile(r"" + look_ahead_for_special_token + look_ahead_to_match_all_except_space)

    def count_file(self, path, verbose=False, add_eos=False):
        if verbose:
-            logger.info("counting file {} ...".format(path))
+            logger.info(f"counting file {path} ...")
        assert os.path.exists(path), f"Input file {path} not found"

        sents = []
        with open(path, "r", encoding="utf-8") as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
-                    logger.info("    line {}".format(idx))
+                    logger.info(f"    line {idx}")
                symbols = self.tokenize(line, add_eos=add_eos)
                self.counter.update(symbols)
                sents.append(symbols)
@@ -273,10 +273,10 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        sents : a list of sentences, each a list of tokenized symbols
        """
        if verbose:
-            logger.info("counting {} sents ...".format(len(sents)))
+            logger.info(f"counting {len(sents)} sents ...")
        for idx, symbols in enumerate(sents):
            if verbose and idx > 0 and idx % 500000 == 0:
-                logger.info("    line {}".format(idx))
+                logger.info(f"    line {idx}")
            self.counter.update(symbols)

    def _build_from_file(self, vocab_file):
@@ -308,11 +308,11 @@ class TransfoXLTokenizer(PreTrainedTokenizer):

    def build_vocab(self):
        if self.vocab_file:
-            logger.info("building vocab from {}".format(self.vocab_file))
+            logger.info(f"building vocab from {self.vocab_file}")
            self._build_from_file(self.vocab_file)
-            logger.info("final vocab size {}".format(len(self)))
+            logger.info(f"final vocab size {len(self)}")
        else:
-            logger.info("building vocab with min_freq={}, max_size={}".format(self.min_freq, self.max_size))
+            logger.info(f"building vocab with min_freq={self.min_freq}, max_size={self.max_size}")
            self.idx2sym = []
            self.sym2idx = OrderedDict()

@@ -324,18 +324,18 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
                    break
                self.add_symbol(sym)

-            logger.info("final vocab size {} from {} unique tokens".format(len(self), len(self.counter)))
+            logger.info(f"final vocab size {len(self)} from {len(self.counter)} unique tokens")

    @torch_only_method
    def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False):
        if verbose:
-            logger.info("encoding file {} ...".format(path))
+            logger.info(f"encoding file {path} ...")
        assert os.path.exists(path), f"Output file {path} not found"
        encoded = []
        with open(path, "r", encoding="utf-8") as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
-                    logger.info("    line {}".format(idx))
+                    logger.info(f"    line {idx}")
                symbols = self.tokenize(line, add_eos=add_eos, add_double_eos=add_double_eos)
                encoded.append(self.convert_to_tensor(symbols))

@@ -347,11 +347,11 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
    @torch_only_method
    def encode_sents(self, sents, ordered=False, verbose=False):
        if verbose:
-            logger.info("encoding {} sents ...".format(len(sents)))
+            logger.info(f"encoding {len(sents)} sents ...")
        encoded = []
        for idx, symbols in enumerate(sents):
            if verbose and idx > 0 and idx % 500000 == 0:
-                logger.info("    line {}".format(idx))
+                logger.info(f"    line {idx}")
            encoded.append(self.convert_to_tensor(symbols))

        if ordered:
@@ -363,7 +363,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        if sym not in self.sym2idx:
            self.idx2sym.append(sym)
            self.sym2idx[sym] = len(self.idx2sym) - 1
-            setattr(self, "{}_idx".format(sym.strip("<>")), self.sym2idx[sym])
+            setattr(self, f"{sym.strip('<>')}_idx", self.sym2idx[sym])

    def add_symbol(self, sym):
        if sym not in self.sym2idx:
@@ -430,7 +430,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):

    def _convert_id_to_token(self, idx):
        """Converts an id in a token (BPE) using the vocab."""
-        assert 0 <= idx < len(self), "Index {} out of vocabulary range".format(idx)
+        assert 0 <= idx < len(self), f"Index {idx} out of vocabulary range"
        return self.idx2sym[idx]

    def _convert_token_to_id(self, sym):
@@ -438,7 +438,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
        if sym in self.sym2idx:
            return self.sym2idx[sym]
        else:
-            # logger.info('encounter unk {}'.format(sym))
+            # logger.info(f'encounter unk {sym}')
            # assert '<eos>' not in sym
            if hasattr(self, "unk_idx"):
                return self.sym2idx.get(sym, self.unk_idx)
@@ -675,20 +675,16 @@ class TransfoXLCorpus(object):
            resolved_corpus_file = cached_path(corpus_file, cache_dir=cache_dir)
        except EnvironmentError:
            logger.error(
-                "Corpus '{}' was not found in corpus list ({}). "
-                "We assumed '{}' was a path or url but couldn't find files {} "
-                "at this path or url.".format(
-                    pretrained_model_name_or_path,
-                    ", ".join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys()),
-                    pretrained_model_name_or_path,
-                    corpus_file,
-                )
+                f"Corpus '{pretrained_model_name_or_path}' was not found in corpus list "
+                f"({', '.join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys())}. "
+                f"We assumed '{pretrained_model_name_or_path}' was a path or url but couldn't find files {corpus_file} "
+                "at this path or url."
            )
            return None
        if resolved_corpus_file == corpus_file:
-            logger.info("loading corpus file {}".format(corpus_file))
+            logger.info(f"loading corpus file {corpus_file}")
        else:
-            logger.info("loading corpus file {} from cache at {}".format(corpus_file, resolved_corpus_file))
+            logger.info(f"loading corpus file {corpus_file} from cache at {resolved_corpus_file}")

        # Instantiate tokenizer.
        corpus = cls(*inputs, **kwargs)
@@ -777,7 +773,7 @@ def get_lm_corpus(datadir, dataset):
        with open(fn, "rb") as fp:
            corpus = pickle.load(fp)
    else:
-        logger.info("Producing dataset {}...".format(dataset))
+        logger.info(f"Producing dataset {dataset}...")
        kwargs = {}
        if dataset in ["wt103", "wt2"]:
            kwargs["special"] = ["<eos>"]

--- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
@@ -260,7 +260,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
@@ -514,7 +514,7 @@ class Wav2Vec2Tokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
@@ -54,14 +54,14 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
    pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
    pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["vocab_file"]

-    print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
+    print(f"Save PyTorch model to {pytorch_weights_dump_path}")
    torch.save(two_levels_state_dict, pytorch_weights_dump_path)

-    print("Save configuration file to {}".format(pytorch_config_dump_path))
+    print(f"Save configuration file to {pytorch_config_dump_path}")
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(json.dumps(config, indent=2) + "\n")

-    print("Save vocab file to {}".format(pytorch_config_dump_path))
+    print(f"Save vocab file to {pytorch_config_dump_path}")
    with open(pytorch_vocab_dump_path, "w", encoding="utf-8") as f:
        f.write(json.dumps(vocab, indent=2) + "\n")


--- a/src/transformers/models/xlm/modeling_tf_xlm.py
+++ b/src/transformers/models/xlm/modeling_tf_xlm.py
@@ -146,7 +146,7 @@ class TFXLMMultiHeadAttention(tf.keras.layers.Layer):
        else:
            klen = shape_list(kv)[1]

-        # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim)
+        # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
        dim_per_head = self.dim // self.n_heads
        mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen)

@@ -289,19 +289,19 @@ class TFXLMMainLayer(tf.keras.layers.Layer):

        for i in range(self.n_layers):
            self.attentions.append(
-                TFXLMMultiHeadAttention(self.n_heads, self.dim, config=config, name="attentions_._{}".format(i))
+                TFXLMMultiHeadAttention(self.n_heads, self.dim, config=config, name=f"attentions_._{i}")
            )
            self.layer_norm1.append(
-                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm1_._{}".format(i))
+                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm1_._{i}")
            )
            # if self.is_decoder:
            #     self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps))
            #     self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout))
            self.ffns.append(
-                TFXLMTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name="ffns_._{}".format(i))
+                TFXLMTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name=f"ffns_._{i}")
            )
            self.layer_norm2.append(
-                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm2_._{}".format(i))
+                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm2_._{i}")
            )

        if hasattr(config, "pruned_heads"):

--- a/src/transformers/models/xlm/modeling_xlm.py
+++ b/src/transformers/models/xlm/modeling_xlm.py
@@ -153,7 +153,7 @@ class MultiHeadAttention(nn.Module):
            klen = qlen if cache is None else cache["slen"] + qlen
        else:
            klen = kv.size(1)
-        # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim)
+        # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
        n_heads = self.n_heads
        dim_per_head = self.dim // n_heads
        mask_reshape = (bs, 1, qlen, klen) if mask.dim() == 3 else (bs, 1, 1, klen)

--- a/src/transformers/models/xlm/tokenization_xlm.py
+++ b/src/transformers/models/xlm/tokenization_xlm.py
@@ -682,7 +682,7 @@ class XLMTokenizer(PreTrainedTokenizer):
                import Mykytea

                self.ja_word_tokenizer = Mykytea.Mykytea(
-                    "-model %s/local/share/kytea/model.bin" % os.path.expanduser("~")
+                    f"-model {os.path.expanduser('~')}/local/share/kytea/model.bin"
                )
            except (AttributeError, ImportError):
                logger.error(
@@ -954,7 +954,7 @@ class XLMTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
@@ -971,8 +971,8 @@ class XLMTokenizer(PreTrainedTokenizer):
            for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
                if index != token_index:
                    logger.warning(
-                        "Saving vocabulary to {}: BPE merge indices are not consecutive."
-                        " Please check that the tokenizer is not corrupted!".format(merge_file)
+                        f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
+                        " Please check that the tokenizer is not corrupted!"
                    )
                    index = token_index
                writer.write(" ".join(bpe_tokens) + "\n")

--- a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py
+++ b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py
@@ -153,7 +153,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer):
        self.fairseq_tokens_to_ids = {"[PAD]": 0, "[CLS]": 1, "[SEP]": 2, "[UNK]": 3, "[MASK]": 4}

        for i in range(10):
-            tok = "[unused{}]".format(i)
+            tok = f"[unused{i}]"
            self.fairseq_tokens_to_ids[tok] = 5 + i

        # The first "real" token "," has position 15 in the embedding vocab and position 3 in the spm vocab
@@ -269,7 +269,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py
+++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py
@@ -276,7 +276,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py
+++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py
@@ -230,7 +230,7 @@ class XLMRobertaTokenizerFast(PreTrainedTokenizerFast):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory.")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py
@@ -55,7 +55,7 @@ def convert_xlnet_checkpoint_to_pytorch(

    finetuning_task = finetuning_task.lower() if finetuning_task is not None else ""
    if finetuning_task in GLUE_TASKS_NUM_LABELS:
-        print("Building PyTorch XLNetForSequenceClassification model from configuration: {}".format(str(config)))
+        print(f"Building PyTorch XLNetForSequenceClassification model from configuration: {config}")
        config.finetuning_task = finetuning_task
        config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task]
        model = XLNetForSequenceClassification(config)
@@ -71,9 +71,9 @@ def convert_xlnet_checkpoint_to_pytorch(
    # Save pytorch-model
    pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
    pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
-    print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path)))
+    print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}")
    torch.save(model.state_dict(), pytorch_weights_dump_path)
-    print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path)))
+    print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}")
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())


--- a/src/transformers/models/xlnet/modeling_tf_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py
@@ -69,8 +69,8 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):

        if config.d_model % config.n_head != 0:
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.d_model, config.n_head)
+                f"The hidden size ({config.d_model}) is not a multiple of the number of attention "
+                f"heads ({config.n_head}"
            )

        self.n_head = config.n_head
@@ -455,7 +455,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
        self.word_embedding = TFSharedEmbeddings(
            config.vocab_size, config.d_model, initializer_range=config.initializer_range, name="word_embedding"
        )
-        self.layer = [TFXLNetLayer(config, name="layer_._{}".format(i)) for i in range(config.n_layer)]
+        self.layer = [TFXLNetLayer(config, name=f"layer_._{i}") for i in range(config.n_layer)]
        self.dropout = tf.keras.layers.Dropout(config.dropout)

        self.use_mems_eval = config.use_mems_eval
@@ -550,7 +550,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
            # beg, end = klen - 1, -1
            beg, end = klen, -1
        else:
-            raise ValueError("Unknown `attn_type` {}.".format(self.attn_type))
+            raise ValueError(f"Unknown `attn_type` {self.attn_type}.")

        if self.bi_data:
            fwd_pos_seq = tf.range(beg, end, -1.0)
@@ -662,7 +662,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
        elif self.attn_type == "bi":
            attn_mask = None
        else:
-            raise ValueError("Unsupported attention type: {}".format(self.attn_type))
+            raise ValueError(f"Unsupported attention type: {self.attn_type}")

        # data mask: input mask & perm mask
        assert inputs["input_mask"] is None or inputs["attention_mask"] is None, (

--- a/src/transformers/models/xlnet/modeling_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_xlnet.py
@@ -77,10 +77,10 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
        if (
            hasattr(model, "logits_proj")
            and config.finetuning_task is not None
-            and "model/regression_{}/logit/kernel".format(config.finetuning_task) in tf_weights
+            and f"model/regression_{config.finetuning_task}/logit/kernel" in tf_weights
        ):
-            tf_to_pt_map["model/regression_{}/logit/kernel".format(config.finetuning_task)] = model.logits_proj.weight
-            tf_to_pt_map["model/regression_{}/logit/bias".format(config.finetuning_task)] = model.logits_proj.bias
+            tf_to_pt_map[f"model/regression_{config.finetuning_task}/logit/kernel"] = model.logits_proj.weight
+            tf_to_pt_map[f"model/regression_{config.finetuning_task}/logit/bias"] = model.logits_proj.bias

        # Now load the rest of the transformer
        model = model.transformer
@@ -95,7 +95,7 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):

    # Transformer blocks
    for i, b in enumerate(model.layer):
-        layer_str = "model/transformer/layer_%d/" % i
+        layer_str = f"model/transformer/layer_{i}/"
        tf_to_pt_map.update(
            {
                layer_str + "rel_attn/LayerNorm/gamma": b.rel_attn.layer_norm.weight,
@@ -156,7 +156,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
    init_vars = tf.train.list_variables(tf_path)
    tf_weights = {}
    for name, shape in init_vars:
-        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        tf_weights[name] = array

@@ -164,9 +164,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
    tf_to_pt_map = build_tf_xlnet_to_pytorch_map(model, config, tf_weights)

    for name, pointer in tf_to_pt_map.items():
-        logger.info("Importing {}".format(name))
+        logger.info(f"Importing {name}")
        if name not in tf_weights:
-            logger.info("{} not in tf pre-trained weights, skipping".format(name))
+            logger.info(f"{name} not in tf pre-trained weights, skipping")
            continue
        array = tf_weights[name]
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
@@ -188,7 +188,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
                except AssertionError as e:
                    e.args += (p_i.shape, arr_i.shape)
                    raise
-                logger.info("Initialize PyTorch weight {} for layer {}".format(name, i))
+                logger.info(f"Initialize PyTorch weight {name} for layer {i}")
                p_i.data = torch.from_numpy(arr_i)
        else:
            try:
@@ -198,13 +198,13 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
-            logger.info("Initialize PyTorch weight {}".format(name))
+            logger.info(f"Initialize PyTorch weight {name}")
            pointer.data = torch.from_numpy(array)
        tf_weights.pop(name, None)
        tf_weights.pop(name + "/Adam", None)
        tf_weights.pop(name + "/Adam_1", None)

-    logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys())))
+    logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}")
    return model


@@ -214,8 +214,8 @@ class XLNetRelativeAttention(nn.Module):

        if config.d_model % config.n_head != 0:
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.d_model, config.n_head)
+                f"The hidden size ({config.d_model}) is not a multiple of the number of attention "
+                f"heads ({config.n_head}"
            )

        self.n_head = config.n_head
@@ -1041,7 +1041,7 @@ class XLNetModel(XLNetPreTrainedModel):
            # beg, end = klen - 1, -1
            beg, end = klen, -1
        else:
-            raise ValueError("Unknown `attn_type` {}.".format(self.attn_type))
+            raise ValueError(f"Unknown `attn_type` {self.attn_type}.")

        if self.bi_data:
            fwd_pos_seq = torch.arange(beg, end, -1.0, dtype=torch.float)
@@ -1145,7 +1145,7 @@ class XLNetModel(XLNetPreTrainedModel):
        elif self.attn_type == "bi":
            attn_mask = None
        else:
-            raise ValueError("Unsupported attention type: {}".format(self.attn_type))
+            raise ValueError(f"Unsupported attention type: {self.attn_type}")

        # data mask: input mask & perm mask
        assert input_mask is None or attention_mask is None, "You can only use one of input_mask (uses 1 for padding) "

--- a/src/transformers/models/xlnet/tokenization_xlnet.py
+++ b/src/transformers/models/xlnet/tokenization_xlnet.py
@@ -314,7 +314,7 @@ class XLNetTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/xlnet/tokenization_xlnet_fast.py
+++ b/src/transformers/models/xlnet/tokenization_xlnet_fast.py
@@ -254,7 +254,7 @@ class XLNetTokenizerFast(PreTrainedTokenizerFast):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/optimization.py
+++ b/src/transformers/optimization.py
@@ -296,13 +296,13 @@ class AdamW(Optimizer):
        correct_bias: bool = True,
    ):
        if lr < 0.0:
-            raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
+            raise ValueError(f"Invalid learning rate: {lr} - should be >= 0.0")
        if not 0.0 <= betas[0] < 1.0:
-            raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[0]))
+            raise ValueError(f"Invalid beta parameter: {betas[0]} - should be in [0.0, 1.0[")
        if not 0.0 <= betas[1] < 1.0:
-            raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[1]))
+            raise ValueError(f"Invalid beta parameter: {betas[1]} - should be in [0.0, 1.0[")
        if not 0.0 <= eps:
-            raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(eps))
+            raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0")
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias)
        super().__init__(params, defaults)


--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -333,7 +333,7 @@ class GradientAccumulator(object):
                ]
            )
        if len(gradients) != len(self._gradients):
-            raise ValueError("Expected %s gradients, but got %d" % (len(self._gradients), len(gradients)))
+            raise ValueError(f"Expected {len(self._gradients)} gradients, but got {len(gradients)}")

        for accum_gradient, gradient in zip(self._gradients, gradients):
            if accum_gradient is not None and gradient is not None:

--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -231,10 +231,10 @@ def check_task(task: str) -> Tuple[Dict, Any]:
        if len(tokens) == 4 and tokens[0] == "translation" and tokens[2] == "to":
            targeted_task = SUPPORTED_TASKS["translation"]
            return targeted_task, (tokens[1], tokens[3])
-        raise KeyError("Invalid translation task {}, use 'translation_XX_to_YY' format".format(task))
+        raise KeyError(f"Invalid translation task {task}, use 'translation_XX_to_YY' format")

    raise KeyError(
-        "Unknown task {}, available tasks are {}".format(task, list(SUPPORTED_TASKS.keys()) + ["translation_XX_to_YY"])
+        f"Unknown task {task}, available tasks are {list(SUPPORTED_TASKS.keys()) + ['translation_XX_to_YY']}"
    )



--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -159,7 +159,7 @@ def get_default_model(targeted_task: Dict, framework: Optional[str], task_option
    defaults = targeted_task["default"]
    if task_options:
        if task_options not in defaults:
-            raise ValueError("The task does not provide any default models for options {}".format(task_options))
+            raise ValueError(f"The task does not provide any default models for options {task_options}")
        default_models = defaults[task_options]["model"]
    elif "model" in defaults:
        default_models = targeted_task["default"]["model"]
@@ -240,11 +240,11 @@ class PipelineDataFormat:

        if output_path is not None and not overwrite:
            if exists(abspath(self.output_path)):
-                raise OSError("{} already exists on disk".format(self.output_path))
+                raise OSError(f"{self.output_path} already exists on disk")

        if input_path is not None:
            if not exists(abspath(self.input_path)):
-                raise OSError("{} doesnt exist on disk".format(self.input_path))
+                raise OSError(f"{self.input_path} doesnt exist on disk")

    @abstractmethod
    def __iter__(self):
@@ -313,7 +313,7 @@ class PipelineDataFormat:
        elif format == "pipe":
            return PipedPipelineDataFormat(output_path, input_path, column, overwrite=overwrite)
        else:
-            raise KeyError("Unknown reader {} (Available reader are json/csv/pipe)".format(format))
+            raise KeyError(f"Unknown reader {format} (Available reader are json/csv/pipe)")


 class CsvPipelineDataFormat(PipelineDataFormat):
@@ -537,7 +537,7 @@ class Pipeline(_ScikitCompat):
        self.tokenizer = tokenizer
        self.modelcard = modelcard
        self.framework = framework
-        self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else "cuda:{}".format(device))
+        self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else f"cuda:{device}")
        self.binary_output = binary_output

        # Special handling
@@ -558,7 +558,7 @@ class Pipeline(_ScikitCompat):
                A path to the directory where to saved. It will be created if it doesn't exist.
        """
        if os.path.isfile(save_directory):
-            logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
+            logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
            return
        os.makedirs(save_directory, exist_ok=True)

@@ -596,7 +596,7 @@ class Pipeline(_ScikitCompat):
                output = pipe(...)
        """
        if self.framework == "tf":
-            with tf.device("/CPU:0" if self.device == -1 else "/device:GPU:{}".format(self.device)):
+            with tf.device("/CPU:0" if self.device == -1 else f"/device:GPU:{self.device}"):
                yield
        else:
            if self.device.type == "cuda":

--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -94,15 +94,14 @@ class Conversation:
        if self.new_user_input:
            if overwrite:
                logger.warning(
-                    'User input added while unprocessed input was existing: "{}" was overwritten with: "{}".'.format(
-                        self.new_user_input, text
-                    )
+                    f'User input added while unprocessed input was existing: "{self.new_user_input}" was overwritten '
+                    f'with: "{text}".'
                )
                self.new_user_input = text
            else:
                logger.warning(
-                    'User input added while unprocessed input was existing: "{}" new input ignored: "{}". '
-                    "Set `overwrite` to True to overwrite unprocessed user input".format(self.new_user_input, text)
+                    f'User input added while unprocessed input was existing: "{self.new_user_input}" new input '
+                    f'ignored: "{text}". Set `overwrite` to True to overwrite unprocessed user input'
                )
        else:
            self.new_user_input = text
@@ -148,10 +147,10 @@ class Conversation:
            Example: Conversation id: 7d15686b-dc94-49f2-9c4b-c9eac6a1f114 user >> Going to the movies tonight - any
            suggestions? bot >> The Big Lebowski
        """
-        output = "Conversation id: {} \n".format(self.uuid)
+        output = f"Conversation id: {self.uuid} \n"
        for is_user, text in self.iter_texts():
            name = "user" if is_user else "bot"
-            output += "{} >> {} \n".format(name, text)
+            output += f"{name} >> {text} \n"
        return output


@@ -232,10 +231,8 @@ class ConversationalPipeline(Pipeline):
                ), "ConversationalPipeline expects a Conversation or list of Conversations as an input"
                if conversation.new_user_input is None:
                    raise ValueError(
-                        "Conversation with UUID {} does not contain new user input to process. "
-                        "Add user inputs with the conversation's `add_user_input` method".format(
-                            type(conversation.uuid)
-                        )
+                        f"Conversation with UUID {type(conversation.uuid)} does not contain new user input to process. "
+                        "Add user inputs with the conversation's `add_user_input` method"
                    )
            assert (
                self.tokenizer.pad_token_id is not None or self.tokenizer.eos_token_id is not None