Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
......@@ -205,13 +205,13 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
......@@ -231,7 +231,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path):
]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
pointer = model
for m_name in name:
......@@ -251,7 +251,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path):
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
......@@ -265,7 +265,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......@@ -315,8 +315,8 @@ class LxmertAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
......
......@@ -249,8 +249,8 @@ class TFLxmertAttention(tf.keras.layers.Layer):
super().__init__(**kwargs)
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads}"
)
self.num_attention_heads = config.num_attention_heads
......@@ -547,9 +547,9 @@ class TFLxmertEncoder(tf.keras.layers.Layer):
# Layers
# Using self.layer instead of self.l_layer to support loading BERT weights.
self.layer = [TFLxmertLayer(config, name="layer_._{}".format(i)) for i in range(self.num_l_layers)]
self.x_layers = [TFLxmertXLayer(config, name="x_layers_._{}".format(i)) for i in range(self.num_x_layers)]
self.r_layers = [TFLxmertLayer(config, name="r_layers_._{}".format(i)) for i in range(self.num_r_layers)]
self.layer = [TFLxmertLayer(config, name=f"layer_._{i}") for i in range(self.num_l_layers)]
self.x_layers = [TFLxmertXLayer(config, name=f"x_layers_._{i}") for i in range(self.num_x_layers)]
self.r_layers = [TFLxmertLayer(config, name=f"r_layers_._{i}") for i in range(self.num_r_layers)]
self.config = config
def call(
......
......@@ -210,7 +210,7 @@ class MBart50Tokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -275,7 +275,7 @@ class MBart50TokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -26,12 +26,12 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, mobilebert_config_file, pytorch_dump_path):
# Initialise PyTorch model
config = MobileBertConfig.from_json_file(mobilebert_config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = MobileBertForPreTraining(config)
# Load weights from tf checkpoint
model = load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path)
......
......@@ -77,13 +77,13 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
......@@ -100,7 +100,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
pointer = model
for m_name in name:
......@@ -120,7 +120,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
......@@ -136,7 +136,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......
......@@ -210,8 +210,8 @@ class TFMobileBertSelfAttention(tf.keras.layers.Layer):
super().__init__(**kwargs)
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads}"
)
self.num_attention_heads = config.num_attention_heads
......@@ -463,9 +463,7 @@ class TFMobileBertLayer(tf.keras.layers.Layer):
if self.use_bottleneck:
self.bottleneck = TFBottleneck(config, name="bottleneck")
if config.num_feedforward_networks > 1:
self.ffn = [
TFFFNLayer(config, name="ffn.{}".format(i)) for i in range(config.num_feedforward_networks - 1)
]
self.ffn = [TFFFNLayer(config, name=f"ffn.{i}") for i in range(config.num_feedforward_networks - 1)]
def call(self, hidden_states, attention_mask, head_mask, output_attentions, training=False):
if self.use_bottleneck:
......@@ -518,7 +516,7 @@ class TFMobileBertEncoder(tf.keras.layers.Layer):
super().__init__(**kwargs)
self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states
self.layer = [TFMobileBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
self.layer = [TFMobileBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call(
self,
......
......@@ -134,8 +134,8 @@ class MPNetSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
......
......@@ -192,8 +192,8 @@ class TFMPNetSelfAttention(tf.keras.layers.Layer):
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads}"
)
self.num_attention_heads = config.num_attention_heads
......@@ -352,7 +352,7 @@ class TFMPNetEncoder(tf.keras.layers.Layer):
self.relative_attention_num_buckets = config.relative_attention_num_buckets
self.initializer_range = config.initializer_range
self.layer = [TFMPNetLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
self.layer = [TFMPNetLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
self.relative_attention_num_buckets = config.relative_attention_num_buckets
def build(self, input_shape):
......
......@@ -169,8 +169,8 @@ class MPNetTokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
)
self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
......@@ -312,8 +312,8 @@ class MPNetTokenizer(PreTrainedTokenizer):
for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!".format(vocab_file)
f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!"
)
index = token_index
writer.write(token + "\n")
......
......@@ -41,9 +41,9 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
# Save pytorch-model
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
print(f"Save PyTorch model to {pytorch_weights_dump_path}")
torch.save(model.state_dict(), pytorch_weights_dump_path)
print("Save configuration file to {}".format(pytorch_config_dump_path))
print(f"Save configuration file to {pytorch_config_dump_path}")
with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
f.write(config.to_json_string())
......
......@@ -67,14 +67,14 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
if ".ckpt" in openai_checkpoint_folder_path:
openai_checkpoint_folder_path = os.path.dirname(openai_checkpoint_folder_path)
logger.info("Loading weights from {}".format(openai_checkpoint_folder_path))
logger.info(f"Loading weights from {openai_checkpoint_folder_path}")
with open(openai_checkpoint_folder_path + "/parameters_names.json", "r", encoding="utf-8") as names_handle:
names = json.load(names_handle)
with open(openai_checkpoint_folder_path + "/params_shapes.json", "r", encoding="utf-8") as shapes_handle:
shapes = json.load(shapes_handle)
offsets = np.cumsum([np.prod(shape) for shape in shapes])
init_params = [np.load(openai_checkpoint_folder_path + "/params_{}.npy".format(n)) for n in range(10)]
init_params = [np.load(openai_checkpoint_folder_path + f"/params_{n}.npy") for n in range(10)]
init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)]
......@@ -134,7 +134,7 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......
......@@ -210,7 +210,7 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
config.vocab_size, config.n_embd, initializer_range=config.initializer_range, name="tokens_embed"
)
self.drop = tf.keras.layers.Dropout(config.embd_pdrop)
self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.n_layer)]
self.h = [TFBlock(config.n_ctx, config, scale=True, name=f"h_._{i}") for i in range(config.n_layer)]
def build(self, input_shape):
with tf.name_scope("positions_embed"):
......
......@@ -205,7 +205,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......@@ -223,8 +223,8 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!".format(merge_file)
f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!"
)
index = token_index
writer.write(" ".join(bpe_tokens) + "\n")
......
......@@ -250,7 +250,7 @@ class PegasusTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -191,7 +191,7 @@ class PegasusTokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -312,7 +312,7 @@ class PhobertTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......@@ -346,7 +346,7 @@ class PhobertTokenizer(PreTrainedTokenizer):
except FileNotFoundError as fnfe:
raise fnfe
except UnicodeError:
raise Exception("Incorrect encoding detected in {}, please " "rebuild the dataset".format(f))
raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset")
return
lines = f.readlines()
......
......@@ -135,8 +135,8 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = ProphetNetTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = ProphetNetTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
)
self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
......@@ -255,8 +255,8 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!".format(vocab_file)
f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!"
)
index = token_index
writer.write(token + "\n")
......
......@@ -494,9 +494,7 @@ class RagModel(RagPreTrainedModel):
question_encoder.config, generator.config, **kwargs
)
else:
assert isinstance(config, self.config_class), "config: {} has to be of type {}".format(
config, self.config_class
)
assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}"
super().__init__(config)
if question_encoder is None:
from ..auto.modeling_auto import AutoModel
......
......@@ -496,9 +496,7 @@ class TFRagModel(TFRagPreTrainedModel):
question_encoder.config, generator.config, **kwargs
)
else:
assert isinstance(config, self.config_class), "config: {} has to be of type {}".format(
config, self.config_class
)
assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}"
super().__init__(config, **kwargs)
if question_encoder is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment