Enforce string-formatting with f-strings (#10980)

* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Enforce string-formatting with f-strings (#10980)
* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
acc3bd9d · Sylvain Gugger · GitHub · d0b3797a · acc3bd9d · acc3bd9d
Unverified Commit acc3bd9d authored Mar 31, 2021 by Sylvain Gugger Committed by GitHub Mar 31, 2021
20 changed files
--- a/src/transformers/models/camembert/tokenization_camembert_fast.py
+++ b/src/transformers/models/camembert/tokenization_camembert_fast.py
@@ -217,7 +217,7 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

--- a/src/transformers/models/convbert/modeling_convbert.py
+++ b/src/transformers/models/convbert/modeling_convbert.py
@@ -70,12 +70,12 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path):
        )
        raise
    tf_path = os.path.abspath(tf_checkpoint_path)
-    logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
+    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    tf_data = {}
    for name, shape in init_vars:
-        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        tf_data[name] = array

@@ -285,8 +285,8 @@ class ConvBertSelfAttention(nn.Module):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
            )

        new_num_attention_heads = config.num_attention_heads // config.head_ratio

--- a/src/transformers/models/convbert/modeling_tf_convbert.py
+++ b/src/transformers/models/convbert/modeling_tf_convbert.py
@@ -147,8 +147,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer):

        if config.hidden_size % config.num_attention_heads != 0:
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
            )

        new_num_attention_heads = int(config.num_attention_heads / config.head_ratio)
@@ -442,7 +442,7 @@ class TFConvBertEncoder(tf.keras.layers.Layer):
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)

-        self.layer = [TFConvBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
+        self.layer = [TFConvBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]

    def call(
        self,

--- a/src/transformers/models/ctrl/modeling_tf_ctrl.py
+++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py
@@ -234,7 +234,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
                config.resid_pdrop,
                config.layer_norm_epsilon,
                self.output_attentions,
-                name="h_._{}".format(i),
+                name=f"h_._{i}",
            )
            for i in range(config.n_layer)
        ]

--- a/src/transformers/models/ctrl/tokenization_ctrl.py
+++ b/src/transformers/models/ctrl/tokenization_ctrl.py
@@ -226,7 +226,7 @@ class CTRLTokenizer(PreTrainedTokenizer):

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
@@ -244,8 +244,8 @@ class CTRLTokenizer(PreTrainedTokenizer):
            for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
                if index != token_index:
                    logger.warning(
-                        "Saving vocabulary to {}: BPE merge indices are not consecutive."
-                        " Please check that the tokenizer is not corrupted!".format(merge_file)
+                        f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
+                        " Please check that the tokenizer is not corrupted!"
                    )
                    index = token_index
                writer.write(" ".join(bpe_tokens) + "\n")

--- a/src/transformers/models/deberta/modeling_deberta.py
+++ b/src/transformers/models/deberta/modeling_deberta.py
@@ -492,8 +492,8 @@ class DisentangledSelfAttention(torch.nn.Module):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0:
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
            )
        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)

--- a/src/transformers/models/deberta/tokenization_deberta.py
+++ b/src/transformers/models/deberta/tokenization_deberta.py
@@ -549,8 +549,8 @@ class DebertaTokenizer(PreTrainedTokenizer):

        if not os.path.isfile(vocab_file):
            raise ValueError(
-                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
-                "model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
+                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
            )
        self.do_lower_case = do_lower_case
        self.gpt2_tokenizer = GPT2Tokenizer(vocab_file)

--- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py
+++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py
@@ -561,8 +561,8 @@ class DisentangledSelfAttention(torch.nn.Module):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0:
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
            )
        self.num_attention_heads = config.num_attention_heads
        _attention_head_size = config.hidden_size // config.num_attention_heads

--- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py
+++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py
@@ -107,8 +107,8 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):

        if not os.path.isfile(vocab_file):
            raise ValueError(
-                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
-                "model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
+                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
            )
        self.do_lower_case = do_lower_case
        self.split_by_punct = split_by_punct
@@ -481,11 +481,11 @@ def convert_to_unicode(text):
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
+            raise ValueError(f"Unsupported string type: {type(text)}")
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
+            raise ValueError(f"Unsupported string type: {type(text)}")
    else:
        raise ValueError("Not running on Python2 or Python 3?")
--- a/src/transformers/models/distilbert/modeling_distilbert.py
+++ b/src/transformers/models/distilbert/modeling_distilbert.py
@@ -159,7 +159,7 @@ class MultiHeadSelfAttention(nn.Module):
        """
        bs, q_length, dim = query.size()
        k_length = key.size(1)
-        # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim)
+        # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
        # assert key.size() == value.size()

        dim_per_head = self.dim // self.n_heads
@@ -208,9 +208,7 @@ class FFN(nn.Module):
        self.seq_len_dim = 1
        self.lin1 = nn.Linear(in_features=config.dim, out_features=config.hidden_dim)
        self.lin2 = nn.Linear(in_features=config.hidden_dim, out_features=config.dim)
-        assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format(
-            config.activation
-        )
+        assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']"
        self.activation = gelu if config.activation == "gelu" else nn.ReLU()

    def forward(self, input):

--- a/src/transformers/models/distilbert/modeling_tf_distilbert.py
+++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py
@@ -168,7 +168,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
        """
        bs, q_length, dim = shape_list(query)
        k_length = shape_list(key)[1]
-        # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim)
+        # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
        # assert key.size() == value.size()
        dim_per_head = tf.math.divide(self.dim, self.n_heads)
        dim_per_head = tf.cast(dim_per_head, dtype=tf.int32)
@@ -221,9 +221,7 @@ class TFFFN(tf.keras.layers.Layer):
        self.lin2 = tf.keras.layers.Dense(
            config.dim, kernel_initializer=get_initializer(config.initializer_range), name="lin2"
        )
-        assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format(
-            config.activation
-        )
+        assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']"
        self.activation = get_tf_activation(config.activation)

    def call(self, input, training=False):
@@ -290,7 +288,7 @@ class TFTransformer(tf.keras.layers.Layer):
        self.output_hidden_states = config.output_hidden_states
        self.output_attentions = config.output_attentions

-        self.layer = [TFTransformerBlock(config, name="layer_._{}".format(i)) for i in range(config.n_layers)]
+        self.layer = [TFTransformerBlock(config, name=f"layer_._{i}") for i in range(config.n_layers)]

    def call(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict, training=False):
        # docstyle-ignore

--- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
@@ -28,7 +28,7 @@ CheckpointState = collections.namedtuple(


 def load_states_from_checkpoint(model_file: str) -> CheckpointState:
-    print("Reading saved model from %s", model_file)
+    print(f"Reading saved model from {model_file}")
    state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
    return CheckpointState(**state_dict)

@@ -55,7 +55,7 @@ class DPRState:
 class DPRContextEncoderState(DPRState):
    def load_dpr_model(self):
        model = DPRContextEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
-        print("Loading DPR biencoder from {}".format(self.src_file))
+        print(f"Loading DPR biencoder from {self.src_file}")
        saved_state = load_states_from_checkpoint(self.src_file)
        encoder, prefix = model.ctx_encoder, "ctx_model."
        # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
@@ -73,7 +73,7 @@ class DPRContextEncoderState(DPRState):
 class DPRQuestionEncoderState(DPRState):
    def load_dpr_model(self):
        model = DPRQuestionEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
-        print("Loading DPR biencoder from {}".format(self.src_file))
+        print(f"Loading DPR biencoder from {self.src_file}")
        saved_state = load_states_from_checkpoint(self.src_file)
        encoder, prefix = model.question_encoder, "question_model."
        # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
@@ -91,7 +91,7 @@ class DPRQuestionEncoderState(DPRState):
 class DPRReaderState(DPRState):
    def load_dpr_model(self):
        model = DPRReader(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
-        print("Loading DPR reader from {}".format(self.src_file))
+        print(f"Loading DPR reader from {self.src_file}")
        saved_state = load_states_from_checkpoint(self.src_file)
        # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
        state_dict = {

--- a/src/transformers/models/dpr/tokenization_dpr.py
+++ b/src/transformers/models/dpr/tokenization_dpr.py
@@ -239,7 +239,7 @@ class CustomDPRReaderTokenizerMixin:
        questions = questions if not isinstance(questions, str) else [questions] * n_passages
        assert len(titles) == len(
            texts
-        ), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts))
+        ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
        encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
        encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
        encoded_inputs = {
@@ -350,9 +350,9 @@ class CustomDPRReaderTokenizerMixin:
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        chosen_span_intervals = []
        for (start_index, end_index), score in scores:
-            assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index)
+            assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]"
            length = end_index - start_index + 1
-            assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length)
+            assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}"
            if any(
                [
                    start_index <= prev_start_index <= prev_end_index <= end_index

--- a/src/transformers/models/dpr/tokenization_dpr_fast.py
+++ b/src/transformers/models/dpr/tokenization_dpr_fast.py
@@ -240,7 +240,7 @@ class CustomDPRReaderTokenizerMixin:
        questions = questions if not isinstance(questions, str) else [questions] * n_passages
        assert len(titles) == len(
            texts
-        ), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts))
+        ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
        encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
        encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
        encoded_inputs = {
@@ -351,9 +351,9 @@ class CustomDPRReaderTokenizerMixin:
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        chosen_span_intervals = []
        for (start_index, end_index), score in scores:
-            assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index)
+            assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]"
            length = end_index - start_index + 1
-            assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length)
+            assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}"
            if any(
                [
                    start_index <= prev_start_index <= prev_end_index <= end_index

--- a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py
@@ -29,7 +29,7 @@ logging.set_verbosity_info()
 def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, discriminator_or_generator):
    # Initialise PyTorch model
    config = ElectraConfig.from_json_file(config_file)
-    print("Building PyTorch model from configuration: {}".format(str(config)))
+    print(f"Building PyTorch model from configuration: {config}")

    if discriminator_or_generator == "discriminator":
        model = ElectraForPreTraining(config)
@@ -44,7 +44,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du
    )

    # Save pytorch-model
-    print("Save PyTorch model to {}".format(pytorch_dump_path))
+    print(f"Save PyTorch model to {pytorch_dump_path}")
    torch.save(model.state_dict(), pytorch_dump_path)



--- a/src/transformers/models/electra/modeling_electra.py
+++ b/src/transformers/models/electra/modeling_electra.py
@@ -83,13 +83,13 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
        )
        raise
    tf_path = os.path.abspath(tf_checkpoint_path)
-    logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
+    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    for name, shape in init_vars:
-        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)
@@ -112,7 +112,7 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
            # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
            # which are not required for using pretrained model
            if any(n in ["global_step", "temperature"] for n in name):
-                logger.info("Skipping {}".format(original_name))
+                logger.info(f"Skipping {original_name}")
                continue
            pointer = model
            for m_name in name:
@@ -144,10 +144,10 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
-            print("Initialize PyTorch weight {}".format(name), original_name)
+            print(f"Initialize PyTorch weight {name}", original_name)
            pointer.data = torch.from_numpy(array)
        except AttributeError as e:
-            print("Skipping {}".format(original_name), name, e)
+            print(f"Skipping {original_name}", name, e)
            continue
    return model

@@ -206,8 +206,8 @@ class ElectraSelfAttention(nn.Module):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
            )

        self.num_attention_heads = config.num_attention_heads

--- a/src/transformers/models/electra/modeling_tf_electra.py
+++ b/src/transformers/models/electra/modeling_tf_electra.py
@@ -285,7 +285,7 @@ class TFElectraEncoder(tf.keras.layers.Layer):
    def __init__(self, config: ElectraConfig, **kwargs):
        super().__init__(**kwargs)

-        self.layer = [TFElectraLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
+        self.layer = [TFElectraLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]

    def call(
        self,

--- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py
+++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py
@@ -159,9 +159,7 @@ class EncoderDecoderModel(PreTrainedModel):
        if config is None:
            config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config)
        else:
-            assert isinstance(config, self.config_class), "config: {} has to be of type {}".format(
-                config, self.config_class
-            )
+            assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}"
        # initialize with config
        super().__init__(config)


--- a/src/transformers/models/flaubert/modeling_tf_flaubert.py
+++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py
@@ -337,7 +337,7 @@ class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer):
        else:
            klen = shape_list(kv)[1]

-        # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim)
+        # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
        dim_per_head = self.dim // self.n_heads
        mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen)

@@ -450,21 +450,19 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):

        for i in range(self.n_layers):
            self.attentions.append(
-                TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name="attentions_._{}".format(i))
+                TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name=f"attentions_._{i}")
            )
            self.layer_norm1.append(
-                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm1_._{}".format(i))
+                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm1_._{i}")
            )
            # if self.is_decoder:
            #     self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps))
            #     self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout))
            self.ffns.append(
-                TFFlaubertTransformerFFN(
-                    self.dim, self.hidden_dim, self.dim, config=config, name="ffns_._{}".format(i)
-                )
+                TFFlaubertTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name=f"ffns_._{i}")
            )
            self.layer_norm2.append(
-                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm2_._{}".format(i))
+                tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm2_._{i}")
            )

    def build(self, input_shape):

--- a/src/transformers/models/flaubert/tokenization_flaubert.py
+++ b/src/transformers/models/flaubert/tokenization_flaubert.py
@@ -71,7 +71,7 @@ def convert_to_unicode(text):
        elif isinstance(s, six.text_type):
            return s
        else:
-            raise TypeError("not expecting type '%s'" % type(s))
+            raise TypeError(f"not expecting type '{type(s)}'")

    return six_ensure_text(text, encoding="utf-8", errors="ignore")