Enforce string-formatting with f-strings (#10980)

* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Enforce string-formatting with f-strings (#10980)
* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
acc3bd9d · Sylvain Gugger · GitHub · d0b3797a · acc3bd9d · acc3bd9d
Unverified Commit acc3bd9d authored Mar 31, 2021 by Sylvain Gugger Committed by GitHub Mar 31, 2021
20 changed files
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -38,12 +38,12 @@ class XnliProcessor(DataProcessor):
    def get_train_examples(self, data_dir):
        """See base class."""
        lg = self.language if self.train_language is None else self.train_language
-        lines = self._read_tsv(os.path.join(data_dir, "XNLI-MT-1.0/multinli/multinli.train.{}.tsv".format(lg)))
+        lines = self._read_tsv(os.path.join(data_dir, f"XNLI-MT-1.0/multinli/multinli.train.{lg}.tsv"))
        examples = []
        for (i, line) in enumerate(lines):
            if i == 0:
                continue
-            guid = "%s-%s" % ("train", i)
+            guid = f"train-{i}"
            text_a = line[0]
            text_b = line[1]
            label = "contradiction" if line[2] == "contradictory" else line[2]
@@ -63,7 +63,7 @@ class XnliProcessor(DataProcessor):
            language = line[0]
            if language != self.language:
                continue
-            guid = "%s-%s" % ("test", i)
+            guid = f"test-{i}"
            text_a = line[6]
            text_b = line[7]
            label = line[1]

--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -583,8 +583,8 @@ def add_start_docstrings(*docstr):
 def add_start_docstrings_to_model_forward(*docstr):
    def docstring_decorator(fn):
-        class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0])
+        class_name = f":class:`~transformers.{fn.__qualname__.split('.')[0]}`"
-        intro = "   The {} forward method, overrides the :func:`__call__` special method.".format(class_name)
+        intro = f"   The {class_name} forward method, overrides the :func:`__call__` special method."
        note = r"""
    .. note::
@@ -1048,11 +1048,11 @@ def filename_to_url(filename, cache_dir=None):
    cache_path = os.path.join(cache_dir, filename)
    if not os.path.exists(cache_path):
-        raise EnvironmentError("file {} not found".format(cache_path))
+        raise EnvironmentError(f"file {cache_path} not found")
    meta_path = cache_path + ".json"
    if not os.path.exists(meta_path):
-        raise EnvironmentError("file {} not found".format(meta_path))
+        raise EnvironmentError(f"file {meta_path} not found")
    with open(meta_path, encoding="utf-8") as meta_file:
        metadata = json.load(meta_file)
@@ -1158,10 +1158,10 @@ def cached_path(
        output_path = url_or_filename
    elif urlparse(url_or_filename).scheme == "":
        # File, but it doesn't exist.
-        raise EnvironmentError("file {} not found".format(url_or_filename))
+        raise EnvironmentError(f"file {url_or_filename} not found")
    else:
        # Something unknown
-        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+        raise ValueError(f"unable to parse {url_or_filename} as a URL or as a local path")
    if extract_compressed_file:
        if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path):
@@ -1190,7 +1190,7 @@ def cached_path(
                tar_file.extractall(output_path_extracted)
                tar_file.close()
            else:
-                raise EnvironmentError("Archive format of {} could not be identified".format(output_path))
+                raise EnvironmentError(f"Archive format of {output_path} could not be identified")
        return output_path_extracted
@@ -1252,7 +1252,7 @@ def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers
    """
    headers = copy.deepcopy(headers)
    if resume_size > 0:
-        headers["Range"] = "bytes=%d-" % (resume_size,)
+        headers["Range"] = f"bytes={resume_size}-"
    r = requests.get(url, stream=True, proxies=proxies, headers=headers)
    r.raise_for_status()
    content_length = r.headers.get("Content-Length")
@@ -1302,12 +1302,12 @@ def get_from_cache(
    headers = {"user-agent": http_user_agent(user_agent)}
    if isinstance(use_auth_token, str):
-        headers["authorization"] = "Bearer {}".format(use_auth_token)
+        headers["authorization"] = f"Bearer {use_auth_token}"
    elif use_auth_token:
        token = HfFolder.get_token()
        if token is None:
            raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
-        headers["authorization"] = "Bearer {}".format(token)
+        headers["authorization"] = f"Bearer {token}"
    url_to_download = url
    etag = None
@@ -1404,14 +1404,14 @@ def get_from_cache(
        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with temp_file_manager() as temp_file:
-            logger.info("%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name)
+            logger.info(f"{url} not found in cache or force_download set to True, downloading to {temp_file.name}")
            http_get(url_to_download, temp_file, proxies=proxies, resume_size=resume_size, headers=headers)
-        logger.info("storing %s in cache at %s", url, cache_path)
+        logger.info(f"storing {url} in cache at {cache_path}")
        os.replace(temp_file.name, cache_path)
-        logger.info("creating metadata file for %s", cache_path)
+        logger.info(f"creating metadata file for {cache_path}")
        meta = {"url": url, "etag": etag}
        meta_path = cache_path + ".json"
        with open(meta_path, "w") as meta_file:
@@ -1625,8 +1625,7 @@ class ExplicitEnum(Enum):
    @classmethod
    def _missing_(cls, value):
        raise ValueError(
-            "%r is not a valid %s, please select one of %s"
+            f"{value} is not a valid {cls.__name__}, please select one of {list(cls._value2member_map_.keys())}"
-            % (value, cls.__name__, str(list(cls._value2member_map_.keys())))
        )

--- a/src/transformers/generation_beam_search.py
+++ b/src/transformers/generation_beam_search.py
@@ -218,7 +218,7 @@ class BeamSearchScorer(BeamScorer):
            if self._done[batch_idx]:
                assert (
                    len(beam_hyp) >= self.num_beams
-                ), "Batch can only be done if at least {} beams have been generated".format(self.num_beams)
+                ), f"Batch can only be done if at least {self.num_beams} beams have been generated"
                assert (
                    eos_token_id is not None and pad_token_id is not None
                ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined"

--- a/src/transformers/generation_logits_process.py
+++ b/src/transformers/generation_logits_process.py
@@ -371,9 +371,7 @@ class NoBadWordsLogitsProcessor(LogitsProcessor):
        self.bad_words_ids = list(filter(lambda bad_token_seq: bad_token_seq != [eos_token_id], bad_words_ids))
        for banned_token_seq in self.bad_words_ids:
-            assert len(banned_token_seq) > 0, "Banned words token sequences {} cannot have an empty list".format(
+            assert len(banned_token_seq) > 0, f"Banned words token sequences {bad_words_ids} cannot have an empty list"
-                bad_words_ids
-            )
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
        banned_tokens = self._calc_banned_bad_words_ids(input_ids)

--- a/src/transformers/generation_tf_utils.py
+++ b/src/transformers/generation_tf_utils.py
@@ -159,7 +159,7 @@ class TFGenerationMixin:
            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from huggingface.co and cache.
            outputs = model.generate(max_length=40)  # do greedy decoding
-            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
+            print(f'Generated: {tokenizer.decode(outputs[0], skip_special_tokens=True)}')
            tokenizer = AutoTokenizer.from_pretrained('openai-gpt')   # Initialize tokenizer
            model = TFAutoModelWithLMHead.from_pretrained('openai-gpt')    # Download model and configuration from huggingface.co and cache.
@@ -167,7 +167,7 @@ class TFGenerationMixin:
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5)  # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
            for i in range(3): #  3 output sequences were generated
-                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
+                print(f'Generated {i}: {tokenizer.decode(outputs[i], skip_special_tokens=True)}')
            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from huggingface.co and cache.
@@ -175,14 +175,14 @@ class TFGenerationMixin:
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3, do_sample=True)  # generate 3 candidates using sampling
            for i in range(3): #  3 output sequences were generated
-                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
+                print(f'Generated {i}: {tokenizer.decode(outputs[i], skip_special_tokens=True)}')
            tokenizer = AutoTokenizer.from_pretrained('ctrl')   # Initialize tokenizer
            model = TFAutoModelWithLMHead.from_pretrained('ctrl')    # Download model and configuration from huggingface.co and cache.
            input_context = 'Legal My neighbor is'  # "Legal" is one of the control codes for ctrl
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2)  # generate sequences
-            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
+            print(f'Generated: {tokenizer.decode(outputs[0], skip_special_tokens=True)}')
            tokenizer = AutoTokenizer.from_pretrained('gpt2')   # Initialize tokenizer
            model = TFAutoModelWithLMHead.from_pretrained('gpt2')    # Download model and configuration from huggingface.co and cache.
@@ -291,9 +291,7 @@ class TFGenerationMixin:
            attention_mask = tf.ones_like(input_ids)
        if pad_token_id is None and eos_token_id is not None:
-            logger.warning(
+            logger.warning(f"Setting `pad_token_id` to {eos_token_id} (first `eos_token_id`) to generate sequence")
-                "Setting `pad_token_id` to {} (first `eos_token_id`) to generate sequence".format(eos_token_id)
-            )
            pad_token_id = eos_token_id
        # current position and vocab size
@@ -315,8 +313,8 @@ class TFGenerationMixin:
            assert (
                decoder_start_token_id is not None
            ), "decoder_start_token_id or bos_token_id has to be defined for encoder-decoder generation"
-            assert hasattr(self, "get_encoder"), "{} should have a 'get_encoder' function defined".format(self)
+            assert hasattr(self, "get_encoder"), f"{self} should have a 'get_encoder' function defined"
-            assert callable(self.get_encoder), "{} should be a method".format(self.get_encoder)
+            assert callable(self.get_encoder), f"{self.get_encoder} should be a method"
            # get encoder and store encoder outputs
            encoder = self.get_encoder()
@@ -763,7 +761,7 @@ class TFGenerationMixin:
                if done[batch_idx]:
                    assert (
                        len(generated_hyps[batch_idx]) >= num_beams
-                    ), "Batch can only be done if at least {} beams have been generated".format(num_beams)
+                    ), f"Batch can only be done if at least {num_beams} beams have been generated."
                    assert (
                        eos_token_id is not None and pad_token_id is not None
                    ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined"
@@ -843,12 +841,14 @@ class TFGenerationMixin:
            if eos_token_id is not None and all(
                (token_id % vocab_size).numpy().item() != eos_token_id for token_id in next_tokens[batch_idx]
            ):
-                assert tf.reduce_all(
+                if not tf.reduce_all(
                    next_scores[batch_idx, :num_beams] == tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx]
-                ), "If batch_idx is not done, final next scores: {} have to equal to accumulated beam_scores: {}".format(
+                ):
-                    next_scores[:, :num_beams][batch_idx], tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx]
+                    raise ValueError(
-                )
+                        f"If batch_idx is not done, final next scores: {next_scores[:, :num_beams][batch_idx]} have "
+                        "to equal to accumulated beam_scores: "
+                        f"{tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx]}"
+                    )
            # need to add best num_beams hypotheses to generated hyps
            for beam_id in range(num_beams):
                effective_beam_id = batch_idx * num_beams + beam_id
@@ -871,9 +871,9 @@ class TFGenerationMixin:
                best_hyp = sorted_hyps.pop()[1]
                sent_lengths_list.append(len(best_hyp))
                best.append(best_hyp)
-        assert output_batch_size == len(best), "Output batch size {} must match output beam hypotheses {}".format(
+        assert output_batch_size == len(
-            output_batch_size, len(best)
+            best
-        )
+        ), f"Output batch size {output_batch_size} must match output beam hypotheses {len(best)}"
        sent_lengths = tf.convert_to_tensor(sent_lengths_list, dtype=tf.int32)
@@ -992,9 +992,9 @@ def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids):
        banned_tokens_slice = []
        for banned_token_seq in bad_words_ids:
-            assert len(banned_token_seq) > 0, "Banned words token sequences {} cannot have an empty list".format(
+            assert (
-                bad_words_ids
+                len(banned_token_seq) > 0
-            )
+            ), f"Banned words token sequences { bad_words_ids} cannot have an empty list"
            if _tokens_match(prev_input_ids_slice.numpy().tolist(), banned_token_seq[:-1]) is False:
                # if tokens do not match continue

--- a/src/transformers/hf_api.py
+++ b/src/transformers/hf_api.py
@@ -83,7 +83,7 @@ class HfApi:
        Throws: requests.exceptions.HTTPError if credentials are invalid
        """
-        path = "{}/api/login".format(self.endpoint)
+        path = f"{self.endpoint}/api/login"
        r = requests.post(path, json={"username": username, "password": password})
        r.raise_for_status()
        d = r.json()
@@ -93,8 +93,8 @@ class HfApi:
        """
        Call HF API to know "whoami"
        """
-        path = "{}/api/whoami".format(self.endpoint)
+        path = f"{self.endpoint}/api/whoami"
-        r = requests.get(path, headers={"authorization": "Bearer {}".format(token)})
+        r = requests.get(path, headers={"authorization": f"Bearer {token}"})
        r.raise_for_status()
        d = r.json()
        return d["user"], d["orgs"]
@@ -103,15 +103,15 @@ class HfApi:
        """
        Call HF API to log out.
        """
-        path = "{}/api/logout".format(self.endpoint)
+        path = f"{self.endpoint}/api/logout"
-        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)})
+        r = requests.post(path, headers={"authorization": f"Bearer {token}"})
        r.raise_for_status()
    def model_list(self) -> List[ModelInfo]:
        """
        Get the public list of all the models on huggingface.co
        """
-        path = "{}/api/models".format(self.endpoint)
+        path = f"{self.endpoint}/api/models"
        r = requests.get(path)
        r.raise_for_status()
        d = r.json()
@@ -123,9 +123,9 @@ class HfApi:
        Call HF API to list all stored files for user (or one of their organizations).
        """
-        path = "{}/api/repos/ls".format(self.endpoint)
+        path = f"{self.endpoint}/api/repos/ls"
        params = {"organization": organization} if organization is not None else None
-        r = requests.get(path, params=params, headers={"authorization": "Bearer {}".format(token)})
+        r = requests.get(path, params=params, headers={"authorization": f"Bearer {token}"})
        r.raise_for_status()
        d = r.json()
        return [RepoObj(**x) for x in d]
@@ -151,13 +151,13 @@ class HfApi:
            lfsmultipartthresh: Optional: internal param for testing purposes.
        """
-        path = "{}/api/repos/create".format(self.endpoint)
+        path = f"{self.endpoint}/api/repos/create"
        json = {"name": name, "organization": organization, "private": private}
        if lfsmultipartthresh is not None:
            json["lfsmultipartthresh"] = lfsmultipartthresh
        r = requests.post(
            path,
-            headers={"authorization": "Bearer {}".format(token)},
+            headers={"authorization": f"Bearer {token}"},
            json=json,
        )
        if exist_ok and r.status_code == 409:
@@ -174,10 +174,10 @@ class HfApi:
        CAUTION(this is irreversible).
        """
-        path = "{}/api/repos/delete".format(self.endpoint)
+        path = f"{self.endpoint}/api/repos/delete"
        r = requests.delete(
            path,
-            headers={"authorization": "Bearer {}".format(token)},
+            headers={"authorization": f"Bearer {token}"},
            json={"name": name, "organization": organization},
        )
        r.raise_for_status()

--- a/src/transformers/hf_argparser.py
+++ b/src/transformers/hf_argparser.py
@@ -123,7 +123,7 @@ class HfArgumentParser(ArgumentParser):
                kwargs["type"] = field.type.__args__[0]
                assert all(
                    x == kwargs["type"] for x in field.type.__args__
-                ), "{} cannot be a List of mixed types".format(field.name)
+                ), f"{field.name} cannot be a List of mixed types"
                if field.default_factory is not dataclasses.MISSING:
                    kwargs["default"] = field.default_factory()
                elif field.default is dataclasses.MISSING:

--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
@@ -533,12 +533,9 @@ class TensorBoardCallback(TrainerCallback):
                else:
                    logger.warning(
                        "Trainer is attempting to log a value of "
-                        '"%s" of type %s for key "%s" as a scalar. '
+                        f'"{v}" of type {type(v)} for key "{k}" as a scalar. '
                        "This invocation of Tensorboard's writer.add_scalar() "
-                        "is incorrect so we dropped this attribute.",
+                        "is incorrect so we dropped this attribute."
-                        v,
-                        type(v),
-                        k,
                    )
            self.tb_writer.flush()

--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@@ -65,7 +65,7 @@ class ModelCard:
            try:
                setattr(self, key, value)
            except AttributeError as err:
-                logger.error("Can't set {} with value {} for {}".format(key, value, self))
+                logger.error(f"Can't set {key} with value {value} for {self}")
                raise err
    def save_pretrained(self, save_directory_or_file):
@@ -77,7 +77,7 @@ class ModelCard:
            output_model_card_file = save_directory_or_file
        self.to_json_file(output_model_card_file)
-        logger.info("Model card saved in {}".format(output_model_card_file))
+        logger.info(f"Model card saved in {output_model_card_file}")
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
@@ -161,11 +161,9 @@ class ModelCard:
                model_card_file, cache_dir=cache_dir, proxies=proxies, user_agent=user_agent
            )
            if resolved_model_card_file == model_card_file:
-                logger.info("loading model card file {}".format(model_card_file))
+                logger.info(f"loading model card file {model_card_file}")
            else:
-                logger.info(
+                logger.info(f"loading model card file {model_card_file} from cache at {resolved_model_card_file}")
-                    "loading model card file {} from cache at {}".format(model_card_file, resolved_model_card_file)
-                )
            # Load model card
            modelcard = cls.from_json_file(resolved_model_card_file)
@@ -182,7 +180,7 @@ class ModelCard:
        for key in to_remove:
            kwargs.pop(key, None)
-        logger.info("Model card: %s", str(modelcard))
+        logger.info(f"Model card: {modelcard}")
        if return_unused_kwargs:
            return modelcard, kwargs
        else:

--- a/src/transformers/modeling_flax_pytorch_utils.py
+++ b/src/transformers/modeling_flax_pytorch_utils.py
@@ -43,10 +43,10 @@ def load_pytorch_checkpoint_in_flax_state_dict(flax_model, pytorch_checkpoint_pa
        raise
    pt_path = os.path.abspath(pytorch_checkpoint_path)
-    logger.info("Loading PyTorch weights from {}".format(pt_path))
+    logger.info(f"Loading PyTorch weights from {pt_path}")
    pt_state_dict = torch.load(pt_path, map_location="cpu")
-    logger.info("PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values())} parameters.")
+    logger.info(f"PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values()):,} parameters.")
    flax_state_dict = convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model)

--- a/src/transformers/modeling_flax_utils.py
+++ b/src/transformers/modeling_flax_utils.py
@@ -270,10 +270,8 @@ class FlaxPreTrainedModel(ABC):
                    archive_file = os.path.join(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME)
                else:
                    raise EnvironmentError(
-                        "Error no file named {} found in directory {} or `from_pt` set to False".format(
+                        f"Error no file named {[FLAX_WEIGHTS_NAME, WEIGHTS_NAME]} found in directory "
-                            [FLAX_WEIGHTS_NAME, WEIGHTS_NAME],
+                        f"{pretrained_model_name_or_path} or `from_pt` set to False"
-                            pretrained_model_name_or_path,
-                        )
                    )
            elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
                archive_file = pretrained_model_name_or_path
@@ -382,7 +380,7 @@ class FlaxPreTrainedModel(ABC):
                Directory to which to save. Will be created if it doesn't exist.
        """
        if os.path.isfile(save_directory):
-            logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
+            logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
            return
        os.makedirs(save_directory, exist_ok=True)

--- a/src/transformers/modeling_tf_pytorch_utils.py
+++ b/src/transformers/modeling_tf_pytorch_utils.py
@@ -98,10 +98,10 @@ def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path, tf_i
        raise
    pt_path = os.path.abspath(pytorch_checkpoint_path)
-    logger.info("Loading PyTorch weights from {}".format(pt_path))
+    logger.info(f"Loading PyTorch weights from {pt_path}")
    pt_state_dict = torch.load(pt_path, map_location="cpu")
-    logger.info("PyTorch checkpoint contains {:,} parameters".format(sum(t.numel() for t in pt_state_dict.values())))
+    logger.info(f"PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values()):,} parameters")
    return load_pytorch_weights_in_tf2_model(
        tf_model, pt_state_dict, tf_inputs=tf_inputs, allow_missing_keys=allow_missing_keys
@@ -178,7 +178,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
                if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing):
                    continue
-            raise AttributeError("{} not found in PyTorch model".format(name))
+            raise AttributeError(f"{name} not found in PyTorch model")
        array = pt_state_dict[name].numpy()
@@ -204,7 +204,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
            raise e
        tf_loaded_numel += array.size
-        # logger.warning("Initialize TF weight {}".format(symbolic_weight.name))
+        # logger.warning(f"Initialize TF weight {symbolic_weight.name}")
        weight_value_tuples.append((symbolic_weight, array))
        all_pytorch_weights.discard(name)
@@ -214,7 +214,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
    if tf_inputs is not None:
        tf_model(tf_inputs, training=False)  # Make sure restore ops are run
-    logger.info("Loaded {:,} parameters in the TF 2.0 model.".format(tf_loaded_numel))
+    logger.info(f"Loaded {tf_loaded_numel:,} parameters in the TF 2.0 model.")
    unexpected_keys = list(all_pytorch_weights)
@@ -276,7 +276,7 @@ def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, tf_inputs
    from .modeling_tf_utils import load_tf_weights
-    logger.info("Loading TensorFlow weights from {}".format(tf_checkpoint_path))
+    logger.info(f"Loading TensorFlow weights from {tf_checkpoint_path}")
    # Instantiate and load the associated TF 2.0 model
    tf_model_class_name = "TF" + pt_model.__class__.__name__  # Add "TF" at the beginning
@@ -346,7 +346,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
                missing_keys_pt.append(pt_weight_name)
                continue
-            raise AttributeError("{} not found in TF 2.0 model".format(pt_weight_name))
+            raise AttributeError(f"{pt_weight_name} not found in TF 2.0 model")
        array, transpose = tf_weights_map[pt_weight_name]
@@ -371,7 +371,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
            e.args += (pt_weight.shape, array.shape)
            raise e
-        # logger.warning("Initialize PyTorch weight {}".format(pt_weight_name))
+        # logger.warning(f"Initialize PyTorch weight {pt_weight_name}")
        new_pt_params_dict[pt_weight_name] = torch.from_numpy(array)
        loaded_pt_weights_data_ptr[pt_weight.data_ptr()] = torch.from_numpy(array)
@@ -404,6 +404,6 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
            f"you can already use {pt_model.__class__.__name__} for predictions without further training."
        )
-    logger.info("Weights or buffers not loaded from TF 2.0 model: {}".format(all_tf_weights))
+    logger.info(f"Weights or buffers not loaded from TF 2.0 model: {all_tf_weights}")
    return pt_model
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -632,11 +632,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
        super().__init__(*inputs, **kwargs)
        if not isinstance(config, PretrainedConfig):
            raise ValueError(
-                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class "
-                "To create a model from a pretrained model use "
+                "`PretrainedConfig`. To create a model from a pretrained model use "
-                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`"
-                    self.__class__.__name__, self.__class__.__name__
-                )
            )
        # Save config and origin of the pretrained weights if given in model
        self.config = config
@@ -1027,7 +1025,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
                https://www.tensorflow.org/tfx/serving/serving_basic
        """
        if os.path.isfile(save_directory):
-            logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
+            logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
            return
        os.makedirs(save_directory, exist_ok=True)
@@ -1042,7 +1040,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
        # If we save using the predefined names, we can load using `from_pretrained`
        output_model_file = os.path.join(save_directory, TF2_WEIGHTS_NAME)
        self.save_weights(output_model_file)
-        logger.info("Model weights saved in {}".format(output_model_file))
+        logger.info(f"Model weights saved in {output_model_file}")
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
@@ -1207,9 +1205,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
                    archive_file = os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME)
                else:
                    raise EnvironmentError(
-                        "Error no file named {} found in directory {} or `from_pt` set to False".format(
+                        f"Error no file named {[WEIGHTS_NAME, TF2_WEIGHTS_NAME]} found in directory "
-                            [WEIGHTS_NAME, TF2_WEIGHTS_NAME], pretrained_model_name_or_path
+                        f"{pretrained_model_name_or_path} or `from_pt` set to False"
-                        )
                    )
            elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
                archive_file = pretrained_model_name_or_path
@@ -1244,9 +1241,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
                )
                raise EnvironmentError(msg)
            if resolved_archive_file == archive_file:
-                logger.info("loading weights file {}".format(archive_file))
+                logger.info(f"loading weights file {archive_file}")
            else:
-                logger.info("loading weights file {} from cache at {}".format(archive_file, resolved_archive_file))
+                logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}")
        else:
            resolved_archive_file = None
@@ -1273,7 +1270,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
        else:
            model(model.dummy_inputs)  # build the network with dummy inputs
-        assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
+        assert os.path.isfile(resolved_archive_file), f"Error retrieving file {resolved_archive_file}"
        # 'by_name' allow us to do transfer learning by skipping/adding layers
        # see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1339-L1357
        try:
@@ -1442,7 +1439,7 @@ class TFSharedEmbeddings(tf.keras.layers.Layer):
        elif mode == "linear":
            return self._linear(inputs)
        else:
-            raise ValueError("mode {} is not valid.".format(mode))
+            raise ValueError(f"mode {mode} is not valid.")
    def _embedding(self, input_ids):
        """Applies embedding based on inputs tensor."""

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -211,9 +211,7 @@ class ModuleUtilsMixin:
            encoder_extended_attention_mask = (1.0 - encoder_extended_attention_mask) * -1e9
        else:
            raise ValueError(
-                "{} not recognized. `dtype` should be set to either `torch.float32` or `torch.float16`".format(
+                f"{self.dtype} not recognized. `dtype` should be set to either `torch.float32` or `torch.float16`"
-                    self.dtype
-                )
            )
        return encoder_extended_attention_mask
@@ -266,9 +264,7 @@ class ModuleUtilsMixin:
                extended_attention_mask = attention_mask[:, None, None, :]
        else:
            raise ValueError(
-                "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
+                f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
-                    input_shape, attention_mask.shape
-                )
            )
        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
@@ -439,11 +435,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
        super().__init__()
        if not isinstance(config, PretrainedConfig):
            raise ValueError(
-                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class "
-                "To create a model from a pretrained model use "
+                "`PretrainedConfig`. To create a model from a pretrained model use "
-                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`"
-                    self.__class__.__name__, self.__class__.__name__
-                )
            )
        # Save config and origin of the pretrained weights if given in model
        self.config = config
@@ -834,7 +828,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
        output_model_file = os.path.join(save_directory, WEIGHTS_NAME)
        save_function(state_dict, output_model_file)
-        logger.info("Model weights saved in {}".format(output_model_file))
+        logger.info(f"Model weights saved in {output_model_file}")
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
@@ -1053,9 +1047,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
                raise EnvironmentError(msg)
            if resolved_archive_file == archive_file:
-                logger.info("loading weights file {}".format(archive_file))
+                logger.info(f"loading weights file {archive_file}")
            else:
-                logger.info("loading weights file {} from cache at {}".format(archive_file, resolved_archive_file))
+                logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}")
        else:
            resolved_archive_file = None
@@ -1185,11 +1179,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
                    f"you can already use {model.__class__.__name__} for predictions without further training."
                )
            if len(error_msgs) > 0:
-                raise RuntimeError(
+                error_msg = "\n\t".join(error_msgs)
-                    "Error(s) in loading state_dict for {}:\n\t{}".format(
+                raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
-                        model.__class__.__name__, "\n\t".join(error_msgs)
-                    )
-                )
        # make sure token embedding weights are still tied if needed
        model.tie_weights()
@@ -1754,7 +1745,7 @@ def prune_layer(
    elif isinstance(layer, Conv1D):
        return prune_conv1d_layer(layer, index, dim=1 if dim is None else dim)
    else:
-        raise ValueError("Can't prune layer of class {}".format(layer.__class__))
+        raise ValueError(f"Can't prune layer of class {layer.__class__}")
 def apply_chunking_to_forward(
@@ -1793,7 +1784,7 @@ def apply_chunking_to_forward(
            return apply_chunking_to_forward(self.forward_chunk, self.chunk_size_lm_head, self.seq_len_dim, hidden_states)
    """
-    assert len(input_tensors) > 0, "{} has to be a tuple/list of tensors".format(input_tensors)
+    assert len(input_tensors) > 0, f"{input_tensors} has to be a tuple/list of tensors"
    tensor_shape = input_tensors[0].shape[chunk_dim]
    assert all(
        input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors
@@ -1801,18 +1792,18 @@ def apply_chunking_to_forward(
    # inspect.signature exist since python 3.5 and is a python method -> no problem with backward compatibility
    num_args_in_forward_chunk_fn = len(inspect.signature(forward_fn).parameters)
-    assert num_args_in_forward_chunk_fn == len(
+    if num_args_in_forward_chunk_fn != len(input_tensors):
-        input_tensors
+        raise ValueError(
-    ), "forward_chunk_fn expects {} arguments, but only {} input tensors are given".format(
+            f"forward_chunk_fn expects {num_args_in_forward_chunk_fn} arguments, but only {len(input_tensors)} input "
-        num_args_in_forward_chunk_fn, len(input_tensors)
+            "tensors are given"
-    )
+        )
    if chunk_size > 0:
-        assert (
+        if input_tensors[0].shape[chunk_dim] % chunk_size != 0:
-            input_tensors[0].shape[chunk_dim] % chunk_size == 0
+            raise ValueError(
-        ), "The dimension to be chunked {} has to be a multiple of the chunk size {}".format(
+                f"The dimension to be chunked {input_tensors[0].shape[chunk_dim]} has to be a multiple of the chunk "
-            input_tensors[0].shape[chunk_dim], chunk_size
+                f"size {chunk_size}"
-        )
+            )
        num_chunks = input_tensors[0].shape[chunk_dim] // chunk_size

--- a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py
@@ -29,14 +29,14 @@ logging.set_verbosity_info()
 def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = AlbertConfig.from_json_file(albert_config_file)
-    print("Building PyTorch model from configuration: {}".format(str(config)))
+    print(f"Building PyTorch model from configuration: {config}")
    model = AlbertForPreTraining(config)
    # Load weights from tf checkpoint
    load_tf_weights_in_albert(model, config, tf_checkpoint_path)
    # Save pytorch-model
-    print("Save PyTorch model to {}".format(pytorch_dump_path))
+    print(f"Save PyTorch model to {pytorch_dump_path}")
    torch.save(model.state_dict(), pytorch_dump_path)

--- a/src/transformers/models/albert/modeling_albert.py
+++ b/src/transformers/models/albert/modeling_albert.py
@@ -84,13 +84,13 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
        )
        raise
    tf_path = os.path.abspath(tf_checkpoint_path)
-    logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
+    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    for name, shape in init_vars:
-        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)
@@ -152,7 +152,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
            or "AdamWeightDecayOptimizer_1" in name
            or "global_step" in name
        ):
-            logger.info("Skipping {}".format("/".join(name)))
+            logger.info(f"Skipping {'/'.join(name)}")
            continue
        pointer = model
@@ -174,7 +174,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
                try:
                    pointer = getattr(pointer, scope_names[0])
                except AttributeError:
-                    logger.info("Skipping {}".format("/".join(name)))
+                    logger.info(f"Skipping {'/'.join(name)}")
                    continue
            if len(scope_names) >= 2:
                num = int(scope_names[1])
@@ -191,7 +191,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
-        print("Initialize PyTorch weight {} from {}".format(name, original_name))
+        print(f"Initialize PyTorch weight {name} from {original_name}")
        pointer.data = torch.from_numpy(array)
    return model
@@ -252,8 +252,8 @@ class AlbertAttention(nn.Module):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"heads ({config.num_attention_heads}"
            )
        self.num_attention_heads = config.num_attention_heads

--- a/src/transformers/models/albert/modeling_tf_albert.py
+++ b/src/transformers/models/albert/modeling_tf_albert.py
@@ -338,7 +338,7 @@ class TFAlbertLayerGroup(tf.keras.layers.Layer):
        super().__init__(**kwargs)
        self.albert_layers = [
-            TFAlbertLayer(config, name="albert_layers_._{}".format(i)) for i in range(config.inner_group_num)
+            TFAlbertLayer(config, name=f"albert_layers_._{i}") for i in range(config.inner_group_num)
        ]
    def call(
@@ -390,8 +390,7 @@ class TFAlbertTransformer(tf.keras.layers.Layer):
            name="embedding_hidden_mapping_in",
        )
        self.albert_layer_groups = [
-            TFAlbertLayerGroup(config, name="albert_layer_groups_._{}".format(i))
+            TFAlbertLayerGroup(config, name=f"albert_layer_groups_._{i}") for i in range(config.num_hidden_groups)
-            for i in range(config.num_hidden_groups)
        ]
    def call(

--- a/src/transformers/models/albert/tokenization_albert.py
+++ b/src/transformers/models/albert/tokenization_albert.py
@@ -311,7 +311,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/albert/tokenization_albert_fast.py
+++ b/src/transformers/models/albert/tokenization_albert_fast.py
@@ -248,7 +248,7 @@ class AlbertTokenizerFast(PreTrainedTokenizerFast):
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/auto/configuration_auto.py
+++ b/src/transformers/models/auto/configuration_auto.py
@@ -310,9 +310,7 @@ class AutoConfig:
            config_class = CONFIG_MAPPING[model_type]
            return config_class(*args, **kwargs)
        raise ValueError(
-            "Unrecognized model identifier: {}. Should contain one of {}".format(
+            f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
-                model_type, ", ".join(CONFIG_MAPPING.keys())
-            )
        )
    @classmethod
@@ -404,7 +402,7 @@ class AutoConfig:
                    return config_class.from_dict(config_dict, **kwargs)
        raise ValueError(
-            "Unrecognized model in {}. "
+            f"Unrecognized model in {pretrained_model_name_or_path}. "
            "Should have a `model_type` key in its config.json, or contain one of the following strings "
-            "in its name: {}".format(pretrained_model_name_or_path, ", ".join(CONFIG_MAPPING.keys()))
+            f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
        )