Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
......@@ -357,7 +357,7 @@ def _make_linear_from_emb(emb):
# Helper Functions, mostly for making masks
def _check_shapes(shape_1, shape2):
if shape_1 != shape2:
raise AssertionError("shape mismatch: {} != {}".format(shape_1, shape2))
raise AssertionError(f"shape mismatch: {shape_1} != {shape2}")
def shift_tokens_right(input_ids, pad_token_id):
......
......@@ -489,7 +489,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
src_vocab_file = os.path.join(
......@@ -514,8 +514,8 @@ class FSMTTokenizer(PreTrainedTokenizer):
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!".format(merges_file)
f"Saving vocabulary to {merges_file}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!"
)
index = token_index
writer.write(" ".join(bpe_tokens) + "\n")
......
......@@ -29,14 +29,14 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, base_model):
# Initialise PyTorch model
config = FunnelConfig.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = FunnelBaseModel(config) if base_model else FunnelModel(config)
# Load weights from tf checkpoint
load_tf_weights_in_funnel(model, config, tf_checkpoint_path)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path)
......
......@@ -80,13 +80,13 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
......@@ -116,7 +116,7 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if name[0] == "generator":
continue
......@@ -143,7 +143,7 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
try:
pointer = getattr(pointer, m_name)
except AttributeError:
print("Skipping {}".format("/".join(name)), array.shape)
print(f"Skipping {'/'.join(name)}", array.shape)
skipped = True
break
if not skipped:
......
......@@ -41,9 +41,9 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
# Save pytorch-model
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
print(f"Save PyTorch model to {pytorch_weights_dump_path}")
torch.save(model.state_dict(), pytorch_weights_dump_path)
print("Save configuration file to {}".format(pytorch_config_dump_path))
print(f"Save configuration file to {pytorch_config_dump_path}")
with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
f.write(config.to_json_string())
......
......@@ -78,13 +78,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
)
raise
tf_path = os.path.abspath(gpt2_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array.squeeze())
......@@ -117,7 +117,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......
......@@ -233,7 +233,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
config.vocab_size, config.hidden_size, initializer_range=config.initializer_range, name="wte"
)
self.drop = tf.keras.layers.Dropout(config.embd_pdrop)
self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.n_layer)]
self.h = [TFBlock(config.n_ctx, config, scale=True, name=f"h_._{i}") for i in range(config.n_layer)]
self.ln_f = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_epsilon, name="ln_f")
def build(self, input_shape):
......
......@@ -267,7 +267,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......@@ -285,8 +285,8 @@ class GPT2Tokenizer(PreTrainedTokenizer):
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!".format(merge_file)
f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!"
)
index = token_index
writer.write(" ".join(bpe_tokens) + "\n")
......
......@@ -38,14 +38,14 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du
embed_dropout=config_json["embed_dropout"],
attention_dropout=config_json["attn_dropout"],
)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = GPTNeoForCausalLM(config)
# Load weights from tf checkpoint
load_tf_weights_in_gpt_neo(model, config, tf_checkpoint_path)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path)
......
......@@ -63,7 +63,7 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
)
raise
tf_path = os.path.abspath(gpt_neo_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
......@@ -119,7 +119,7 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
print("Initialize PyTorch weight {}".format(name))
print(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
# init the final linear layer using word embeddings
......@@ -431,9 +431,8 @@ class GPTNeoAttention(nn.Module):
self.attention = GPTNeoLocalSelfAttention(config)
else:
raise NotImplementedError(
"Only attn layer types 'global' and 'local' exist, but got `config.attention_layers`: {}. Select attn layer types from ['global', 'local'] only.".format(
self.attention_layers
)
"Only attn layer types 'global' and 'local' exist, but got `config.attention_layers`: "
f"{config.attention_layers}. Select attn layer types from ['global', 'local'] only."
)
def forward(
......
......@@ -179,8 +179,8 @@ class IBertSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.quant_mode = config.quant_mode
self.weight_bit = 8
......
......@@ -151,11 +151,9 @@ class QuantAct(nn.Module):
def __repr__(self):
return (
"{0}(activation_bit={1}, "
"quant_mode: {2}, Act_min: {3:.2f}, "
"Act_max: {4:.2f})".format(
self.__class__.__name__, self.activation_bit, self.quant_mode, self.x_min.item(), self.x_max.item()
)
f"{self.__class__.__name__}(activation_bit={self.activation_bit}, "
f"quant_mode: {self.activation_bit}, Act_min: {self.x_min.item():.2f}, "
f"Act_max: {self.x_max.item():.2f})"
)
def forward(
......@@ -261,7 +259,7 @@ class QuantLinear(nn.Module):
def __repr__(self):
s = super().__repr__()
s = "(" + s + " weight_bit={}, quant_mode={})".format(self.weight_bit, self.quant_mode)
s = f"({s} weight_bit={self.weight_bit}, quant_mode={self.quant_mode})"
return s
def forward(self, x, prev_act_scaling_factor=None):
......@@ -471,7 +469,7 @@ class IntLayerNorm(nn.Module):
shift = (torch.log2(torch.sqrt(var_int / 2 ** self.max_bit)).ceil()).max()
shift_old = self.shift
self.shift = torch.max(self.shift, shift)
logger.info("Dynamic shift adjustment: {} -> {}".format(int(shift_old), int(self.shift)))
logger.info(f"Dynamic shift adjustment: {int(shift_old)} -> {int(self.shift)}")
def overflow_fallback(self, y_int):
"""
......
......@@ -135,8 +135,8 @@ class LayoutLMSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
......
......@@ -402,7 +402,7 @@ class TFLayoutLMEncoder(tf.keras.layers.Layer):
def __init__(self, config: LayoutLMConfig, **kwargs):
super().__init__(**kwargs)
self.layer = [TFLayoutLMLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
self.layer = [TFLayoutLMLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call(
self,
......
......@@ -131,8 +131,8 @@ class LEDEncoderSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_heads = config.num_attention_heads
self.head_dim = int(config.hidden_size / config.num_attention_heads)
......@@ -1673,9 +1673,8 @@ class LEDEncoder(LEDPreTrainedModel):
padding_len = (attention_window - seq_len % attention_window) % attention_window
if padding_len > 0:
logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format(
seq_len, seq_len + padding_len, attention_window
)
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}"
)
if input_ids is not None:
input_ids = F.pad(input_ids, (0, padding_len), value=pad_token_id)
......
......@@ -127,8 +127,8 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer):
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads}"
)
self.num_heads = config.num_attention_heads
......@@ -1824,9 +1824,8 @@ class TFLEDEncoder(tf.keras.layers.Layer):
if padding_len > 0:
logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format(
seq_len, seq_len + padding_len, attention_window
)
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}"
)
paddings = tf.convert_to_tensor([[0, 0], [0, padding_len]])
......
......@@ -57,7 +57,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
# save model
longformer_for_qa.save_pretrained(pytorch_dump_folder_path)
print("Conversion successful. Model saved under {}".format(pytorch_dump_folder_path))
print(f"Conversion successful. Model saved under {pytorch_dump_folder_path}")
if __name__ == "__main__":
......
......@@ -521,8 +521,8 @@ class LongformerSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_heads = config.num_attention_heads
self.head_dim = int(config.hidden_size / config.num_attention_heads)
......@@ -1542,9 +1542,8 @@ class LongformerModel(LongformerPreTrainedModel):
padding_len = (attention_window - seq_len % attention_window) % attention_window
if padding_len > 0:
logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format(
seq_len, seq_len + padding_len, attention_window
)
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}"
)
if input_ids is not None:
input_ids = F.pad(input_ids, (0, padding_len), value=pad_token_id)
......
......@@ -646,8 +646,8 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
if config.hidden_size % config.num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads}"
)
self.num_heads = config.num_attention_heads
......@@ -1518,9 +1518,7 @@ class TFLongformerEncoder(tf.keras.layers.Layer):
self.output_hidden_states = config.output_hidden_states
self.output_attentions = config.output_attentions
self.layer = [
TFLongformerLayer(config, i, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)
]
self.layer = [TFLongformerLayer(config, i, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call(
self,
......@@ -1780,9 +1778,8 @@ class TFLongformerMainLayer(tf.keras.layers.Layer):
if padding_len > 0:
logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format(
seq_len, seq_len + padding_len, attention_window
)
f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of "
f"`config.attention_window`: {attention_window}"
)
paddings = tf.convert_to_tensor([[0, 0], [0, padding_len]])
......
......@@ -29,14 +29,14 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
# Initialise PyTorch model
config = LxmertConfig.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = LxmertForPreTraining(config)
# Load weights from tf checkpoint
load_tf_weights_in_lxmert(model, config, tf_checkpoint_path)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment