Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
......@@ -133,20 +133,20 @@ class LegacyIndex(Index):
)
raise EnvironmentError(msg)
if resolved_archive_file == archive_file:
logger.info("loading file {}".format(archive_file))
logger.info(f"loading file {archive_file}")
else:
logger.info("loading file {} from cache at {}".format(archive_file, resolved_archive_file))
logger.info(f"loading file {archive_file} from cache at {resolved_archive_file}")
return resolved_archive_file
def _load_passages(self):
logger.info("Loading passages from {}".format(self.index_path))
logger.info(f"Loading passages from {self.index_path}")
passages_path = self._resolve_path(self.index_path, self.PASSAGE_FILENAME)
with open(passages_path, "rb") as passages_file:
passages = pickle.load(passages_file)
return passages
def _deserialize_index(self):
logger.info("Loading index from {}".format(self.index_path))
logger.info(f"Loading index from {self.index_path}")
resolved_index_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index.dpr")
self.index = faiss.read_index(resolved_index_path)
resolved_meta_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index_meta.dpr")
......@@ -200,12 +200,12 @@ class HFIndexBase(Index):
def _check_dataset_format(self, with_index: bool):
if not isinstance(self.dataset, Dataset):
raise ValueError("Dataset should be a datasets.Dataset object, but got {}".format(type(self.dataset)))
raise ValueError(f"Dataset should be a datasets.Dataset object, but got {type(self.dataset)}")
if len({"title", "text", "embeddings"} - set(self.dataset.column_names)) > 0:
raise ValueError(
"Dataset should be a dataset with the following columns: "
"title (str), text (str) and embeddings (arrays of dimension vector_size), "
"but got columns {}".format(self.dataset.column_names)
f"but got columns {self.dataset.column_names}"
)
if with_index and "embeddings" not in self.dataset.list_indexes():
raise ValueError(
......@@ -269,7 +269,7 @@ class CanonicalHFIndex(HFIndexBase):
self.index_name = index_name
self.index_path = index_path
self.use_dummy_dataset = use_dummy_dataset
logger.info("Loading passages from {}".format(self.dataset_name))
logger.info(f"Loading passages from {self.dataset_name}")
dataset = load_dataset(
self.dataset_name, with_index=False, split=self.dataset_split, dummy=self.use_dummy_dataset
)
......@@ -277,10 +277,10 @@ class CanonicalHFIndex(HFIndexBase):
def init_index(self):
if self.index_path is not None:
logger.info("Loading index from {}".format(self.index_path))
logger.info(f"Loading index from {self.index_path}")
self.dataset.load_faiss_index("embeddings", file=self.index_path)
else:
logger.info("Loading index from {}".format(self.dataset_name + " with index name " + self.index_name))
logger.info(f"Loading index from {self.dataset_name} with index name {self.index_name}")
self.dataset = load_dataset(
self.dataset_name,
with_embeddings=True,
......@@ -313,7 +313,7 @@ class CustomHFIndex(HFIndexBase):
@classmethod
def load_from_disk(cls, vector_size, dataset_path, index_path):
logger.info("Loading passages from {}".format(dataset_path))
logger.info(f"Loading passages from {dataset_path}")
if dataset_path is None or index_path is None:
raise ValueError(
"Please provide ``dataset_path`` and ``index_path`` after calling ``dataset.save_to_disk(dataset_path)`` "
......@@ -324,7 +324,7 @@ class CustomHFIndex(HFIndexBase):
def init_index(self):
if not self.is_initialized():
logger.info("Loading index from {}".format(self.index_path))
logger.info(f"Loading index from {self.index_path}")
self.dataset.load_faiss_index("embeddings", file=self.index_path)
self._index_initialized = True
......@@ -520,9 +520,7 @@ class RagRetriever:
start_time = time.time()
ids, vectors = self.index.get_top_docs(question_hidden_states, n_docs)
logger.debug(
"index search time: {} sec, batch size {}".format(
time.time() - start_time, question_hidden_states.shape
)
f"index search time: {time.time() - start_time} sec, batch size {question_hidden_states.shape}"
)
ids_batched.extend(ids)
vectors_batched.extend(vectors)
......
......@@ -34,7 +34,7 @@ class RagTokenizer:
def save_pretrained(self, save_directory):
if os.path.isfile(save_directory):
raise ValueError("Provided path ({}) should be a directory, not a file".format(save_directory))
raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
os.makedirs(save_directory, exist_ok=True)
question_encoder_path = os.path.join(save_directory, "question_encoder_tokenizer")
generator_path = os.path.join(save_directory, "generator_tokenizer")
......
......@@ -30,10 +30,10 @@ logging.set_verbosity_info()
def set_param(torch_layer, weight, bias=None):
# set parameter of one layer
assert torch_layer.weight.shape == weight.shape, "{} layer.weight does not match".format(torch_layer)
assert torch_layer.weight.shape == weight.shape, f"{torch_layer} layer.weight does not match"
torch_layer.weight = torch.nn.Parameter(weight)
if bias is not None:
assert torch_layer.bias.shape == bias.shape, "{} layer.bias does not match".format(torch_layer)
assert torch_layer.bias.shape == bias.shape, f"{torch_layer} layer.bias does not match"
torch_layer.bias = torch.nn.Parameter(bias)
......@@ -150,9 +150,9 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
position_embeddings = torch_model_reformer.embeddings.position_embeddings
for emb_idx in range(len(position_embeddings.weights)):
emb_weights = np.asarray(weights[3][emb_idx][0])
assert position_embeddings.weights[emb_idx].shape == emb_weights.shape, "{} emb does not match".format(
position_embeddings[emb_idx]
)
assert (
position_embeddings.weights[emb_idx].shape == emb_weights.shape
), f"{position_embeddings[emb_idx]} emb does not match"
position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights))
trax_layer_weights = weights[5]
......@@ -185,7 +185,7 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path):
# Initialise PyTorch model
config = ReformerConfig.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = ReformerModelWithLMHead(config)
with open(trax_model_pkl_path, "rb") as f:
......@@ -194,7 +194,7 @@ def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch
set_model_weights_in_torch(model_weights, model, config.hidden_size)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path)
......
......@@ -90,9 +90,8 @@ def _get_least_common_mult_chunk_len(config):
return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else:
raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format(
config.attn_layers
)
f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select "
"attn layer types from ['lsh', 'local'] only."
)
......@@ -107,9 +106,8 @@ def _get_min_chunk_len(config):
return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else:
raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format(
config.attn_layers
)
f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select "
"attn layer types from ['lsh', 'local'] only."
)
......@@ -127,11 +125,11 @@ class AxialPositionEmbeddings(nn.Module):
self.least_common_mult_chunk_length = _get_least_common_mult_chunk_len(config)
self.weights = nn.ParameterList()
assert (
sum(self.axial_pos_embds_dim) == config.hidden_size
), "Make sure that config.axial_pos_embds factors: {} sum to config.hidden_size: {}".format(
self.axial_pos_embds_dim, config.hidden_size
)
if sum(self.axial_pos_embds_dim) != config.hidden_size:
raise ValueError(
f"Make sure that config.axial_pos_embds factors: {self.axial_pos_embds_dim} sum to "
f"config.hidden_size: {config.hidden_size}"
)
# create weights
for axis, axial_pos_embd_dim in enumerate(self.axial_pos_embds_dim):
......@@ -153,11 +151,14 @@ class AxialPositionEmbeddings(nn.Module):
]
if self.training is True:
assert (
reduce(mul, self.axial_pos_shape) == sequence_length
), "If training, make sure that config.axial_pos_shape factors: {} multiply to sequence length. Got prod({}) != sequence_length: {}. You might want to consider padding your sequence length to {} or changing config.axial_pos_shape.".format(
self.axial_pos_shape, self.axial_pos_shape, sequence_length, reduce(mul, self.axial_pos_shape)
)
if reduce(mul, self.axial_pos_shape) != sequence_length:
raise ValueError(
f"If training, make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply to "
f"sequence length. Got prod({self.axial_pos_shape}) != sequence_length: {sequence_length}. "
f"You might want to consider padding your sequence length to {reduce(mul, self.axial_pos_shape)} "
"or changing config.axial_pos_shape."
)
if self.dropout > 0:
weights = torch.cat(broadcasted_weights, dim=-1)
# permute weights so that 2D correctly drops dims 1 and 2
......@@ -177,13 +178,12 @@ class AxialPositionEmbeddings(nn.Module):
)
else:
assert (
reduce(mul, self.axial_pos_shape) >= sequence_length
), "Make sure that config.axial_pos_shape factors: {} multiply at least to max(sequence_length, least_common_mult_chunk_length): max({}, {})".format(
self.axial_pos_shape,
sequence_length,
self.least_common_mult_chunk_length,
)
if reduce(mul, self.axial_pos_shape) < sequence_length:
raise ValueError(
f"Make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply at least to "
f"max(sequence_length, least_common_mult_chunk_length): max({sequence_length}, "
f"{self.least_common_mult_chunk_length})."
)
# compute how many columns are needed
max_position_id = position_ids.max().item()
......@@ -252,11 +252,11 @@ class ReformerEmbeddings(nn.Module):
if inputs_embeds is None:
inputs_embeds = self.word_embeddings(input_ids)
assert (
position_ids.shape[-1] <= self.max_position_embeddings
), "Sequence Length: {} has to be larger equal than config.max_position_embeddings: {}".format(
position_ids.shape[-1], self.max_position_embeddings
)
if position_ids.shape[-1] > self.max_position_embeddings:
raise ValueError(
f"Sequence Length: {position_ids.shape[-1]} has to be larger equal than "
f"config.max_position_embeddings {self.max_position_embeddings}."
)
# dropout
embeddings = nn.functional.dropout(inputs_embeds, p=self.dropout, training=self.training)
......@@ -322,7 +322,7 @@ class EfficientAttentionMixin:
elif len(vectors.shape) == 3:
return torch.reshape(vectors, split_dim_shape)
else:
raise ValueError("Input vector rank should be one of [3, 4], but is: {}".format(len(vectors.shape)))
raise ValueError(f"Input vector rank should be one of [3, 4], but is: {len(vectors.shape)}")
class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
......@@ -451,14 +451,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
assert (
query_key_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format(
query_key_vectors.shape[-1], self.attention_head_size
)
), f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}."
assert (
value_vectors.shape[-1] == self.attention_head_size
), "last dim of value_vectors is {} but should be {}.".format(
value_vectors.shape[-1], self.attention_head_size
)
), f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
do_standard_self_attention = (sequence_length <= self.chunk_length) or (
use_cache and past_buckets_states[1] is not None
......@@ -479,7 +475,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
assert (
int(buckets.shape[-1]) == num_hashes * sequence_length
), "last dim of buckets is {}, but should be {}".format(buckets.shape[-1], num_hashes * sequence_length)
), f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}"
sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx(
sequence_length, buckets, num_hashes
......@@ -616,16 +612,16 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
if isinstance(self.num_buckets, int):
assert (
self.num_buckets % 2 == 0
), "There should be an even number of bucktes, but `self.num_bucktes`: {}".format(self.num_buckets)
), f"There should be an even number of bucktes, but `self.num_bucktes`: {self.num_buckets}"
rotation_size = self.num_buckets
num_buckets = self.num_buckets
else:
# Factorize the hash if self.num_buckets is a list or tuple
rotation_size, num_buckets = 0, 1
for bucket_factor in self.num_buckets:
assert bucket_factor % 2 == 0, "The number of buckets should be even, but `num_bucket`: {}".format(
bucket_factor
)
assert (
bucket_factor % 2 == 0
), f"The number of buckets should be even, but `num_bucket`: {bucket_factor}"
rotation_size = rotation_size + bucket_factor
num_buckets = num_buckets * bucket_factor
......@@ -714,7 +710,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
if num_buckets > num_buckets_limit:
num_buckets = [2 ** (num_buckets_pow_2 // 2), 2 ** (num_buckets_pow_2 - num_buckets_pow_2 // 2)]
logger.warning("config.num_buckets is not set. Setting config.num_buckets to {}...".format(num_buckets))
logger.warning(f"config.num_buckets is not set. Setting config.num_buckets to {num_buckets}...")
# set num buckets in config to be properly saved
self.config.num_buckets = num_buckets
......@@ -1085,19 +1081,13 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
assert (
query_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format(
query_vectors.shape[-1], self.attention_head_size
)
), f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}."
assert (
key_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format(
key_vectors.shape[-1], self.attention_head_size
)
), f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}."
assert (
value_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format(
value_vectors.shape[-1], self.attention_head_size
)
), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
if self.chunk_length is None:
assert (
......@@ -1280,9 +1270,8 @@ class ReformerAttention(nn.Module):
self.self_attention = LocalSelfAttention(config)
else:
raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format(
self.attn_layers
)
f"Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {self.attn_layers}. "
"Select attn layer types from ['lsh', 'local'] only."
)
self.output = ReformerSelfOutput(config)
......@@ -2036,7 +2025,7 @@ class ReformerModel(ReformerPreTrainedModel):
assert (
len(input_shape) == 2
), "`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {}".format(input_shape)
), f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}"
if past_buckets_states is not None:
assert not self.training, "`past_buckets_states` can only be used for inference, not for training`."
......@@ -2062,9 +2051,9 @@ class ReformerModel(ReformerPreTrainedModel):
if self.training is True:
raise ValueError(
"If training, sequence Length {} has to be a multiple of least common multiple chunk_length {}. Please consider padding the input to a length of {}.".format(
input_shape[-1], least_common_mult_chunk_length, input_shape[-1] + padding_length
)
f"If training, sequence length {input_shape[-1]} has to be a multiple of least common multiple "
f"chunk_length {least_common_mult_chunk_length}. Please consider padding the input to a length "
f"of {input_shape[-1] + padding_length}."
)
# pad input
......@@ -2134,9 +2123,8 @@ class ReformerModel(ReformerPreTrainedModel):
device=None,
):
logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.chunk_length`: {}".format(
input_shape[-1], input_shape[-1] + padding_length, padded_seq_length
)
f"Input ids are automatically padded from {input_shape[-1]} to {input_shape[-1] + padding_length} to be a "
f"multiple of `config.chunk_length`: {padded_seq_length}"
)
padded_input_ids = torch.full(
......
......@@ -131,7 +131,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -107,7 +107,7 @@ class ReformerTokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -149,8 +149,8 @@ class RobertaSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
......
......@@ -396,7 +396,7 @@ class TFRobertaEncoder(tf.keras.layers.Layer):
def __init__(self, config: RobertaConfig, **kwargs):
super().__init__(**kwargs)
self.layer = [TFRobertaLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)]
self.layer = [TFRobertaLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call(
self,
......
......@@ -172,8 +172,7 @@ class SqueezeBertSelfAttention(nn.Module):
super().__init__()
if cin % config.num_attention_heads != 0:
raise ValueError(
"cin (%d) is not a multiple of the number of attention "
"heads (%d)" % (cin, config.num_attention_heads)
f"cin ({cin}) is not a multiple of the number of attention heads ({config.num_attention_heads})"
)
self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(cin / config.num_attention_heads)
......
......@@ -27,14 +27,14 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
# Initialise PyTorch model
config = T5Config.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = T5ForConditionalGeneration(config)
# Load weights from tf checkpoint
load_tf_weights_in_t5(model, config, tf_checkpoint_path)
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path)
......
......@@ -82,13 +82,13 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
tf_weights = {}
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
tf_weights[name] = array
......@@ -101,11 +101,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
tf_weights.pop(txt_name, None)
continue
if "_slot_" in name[-1]:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
tf_weights.pop(txt_name, None)
continue
pointer = model
......@@ -149,7 +149,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
......@@ -157,7 +157,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
if scope_names[0] not in ["kernel", "scale", "embedding"]:
pointer = getattr(pointer, "weight")
if scope_names[0] != "embedding":
logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, name))
logger.info(f"Transposing numpy weight of shape {array.shape} for {name}")
array = np.transpose(array)
try:
assert (
......@@ -166,11 +166,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array.astype(np.float32))
tf_weights.pop(txt_name, None)
logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys())))
logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}.")
return model
......@@ -428,9 +428,7 @@ class T5Attention(nn.Module):
if past_key_value is not None:
assert (
len(past_key_value) == 2
), "past_key_value should have 2 past states: keys and values. Got {} past states".format(
len(past_key_value)
)
), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
key_length = real_seq_length if key_value_states is None else key_value_states.shape[1]
......@@ -618,12 +616,12 @@ class T5Block(nn.Module):
assert self.is_decoder, "Only decoder can use `past_key_values`"
expected_num_past_key_values = 2 if encoder_hidden_states is None else 4
error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format(
expected_num_past_key_values,
"2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "",
len(past_key_value),
)
assert len(past_key_value) == expected_num_past_key_values, error_message
if len(past_key_value) != expected_num_past_key_values:
raise ValueError(
f"There should be {expected_num_past_key_values} past states. "
f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}."
f"Got {len(past_key_value)} past key / value states"
)
self_attn_past_key_value = past_key_value[:2]
cross_attn_past_key_value = past_key_value[2:]
......@@ -888,9 +886,7 @@ class T5Stack(T5PreTrainedModel):
mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length
if use_cache is True:
assert self.is_decoder, ":obj:`use_cache` can only be set to `True` if {} is used as a decoder".format(
self
)
assert self.is_decoder, f":obj:`use_cache` can only be set to `True` if {self} is used as a decoder"
if attention_mask is None:
attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device)
......
......@@ -273,9 +273,7 @@ class TFT5Attention(tf.keras.layers.Layer):
if past_key_value is not None:
assert (
len(past_key_value) == 2
), "past_key_value should have 2 past states: keys and values. Got {} past states".format(
len(past_key_value)
)
), f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length
key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1]
......@@ -472,7 +470,7 @@ class TFT5Block(tf.keras.layers.Layer):
)
)
self.layer.append(TFT5LayerFF(config, name="layer_._{}".format(len(self.layer))))
self.layer.append(TFT5LayerFF(config, name=f"layer_._{len(self.layer)}"))
def call(
self,
......@@ -494,12 +492,12 @@ class TFT5Block(tf.keras.layers.Layer):
assert self.is_decoder, "Only decoder can use `past_key_values`"
expected_num_past_key_values = 2 if encoder_hidden_states is None else 4
error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format(
expected_num_past_key_values,
"2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "",
len(past_key_value),
)
assert len(past_key_value) == expected_num_past_key_values, error_message
if len(past_key_value) != expected_num_past_key_values:
raise ValueError(
f"There should be {expected_num_past_key_values} past states. "
f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}."
f"Got {len(past_key_value)} past key / value states"
)
self_attn_past_key_value = past_key_value[:2]
cross_attn_past_key_value = past_key_value[2:]
......@@ -579,11 +577,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
self.num_hidden_layers = config.num_layers
self.block = [
TFT5Block(
config,
has_relative_attention_bias=bool(i == 0),
name="block_._{}".format(i),
)
TFT5Block(config, has_relative_attention_bias=bool(i == 0), name=f"block_._{i}")
for i in range(config.num_layers)
]
self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm")
......
......@@ -104,7 +104,7 @@ class T5Tokenizer(PreTrainedTokenizer):
):
# Add extra_ids to the special token list
if extra_ids > 0 and additional_special_tokens is None:
additional_special_tokens = ["<extra_id_{}>".format(i) for i in range(extra_ids)]
additional_special_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
elif extra_ids > 0 and additional_special_tokens is not None:
# Check that we have the right number of extra_id special tokens
extra_tokens = len(set(filter(lambda x: bool("extra_id" in x), additional_special_tokens)))
......@@ -257,7 +257,7 @@ class T5Tokenizer(PreTrainedTokenizer):
if index < self.sp_model.get_piece_size():
token = self.sp_model.IdToPiece(index)
else:
token = "<extra_id_{}>".format(self.vocab_size - 1 - index)
token = f"<extra_id_{self.vocab_size - 1 - index}>"
return token
def convert_tokens_to_string(self, tokens):
......@@ -276,7 +276,7 @@ class T5Tokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -115,7 +115,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
):
# Add extra_ids to the special token list
if extra_ids > 0 and additional_special_tokens is None:
additional_special_tokens = ["<extra_id_{}>".format(i) for i in range(extra_ids)]
additional_special_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
elif extra_ids > 0 and additional_special_tokens is not None:
# Check that we have the right number of extra special tokens
extra_tokens = len(set(filter(lambda x: bool("extra_id_" in x), additional_special_tokens)))
......@@ -141,7 +141,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
......@@ -82,20 +82,20 @@ def convert_tf_checkpoint_to_pytorch(
elif task == "INTERMEDIATE_PRETRAINING":
model = TapasModel(config=config)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
# Load weights from tf checkpoint
load_tf_weights_in_tapas(model, config, tf_checkpoint_path)
# Save pytorch-model (weights and configuration)
print("Save PyTorch model to {}".format(pytorch_dump_path))
print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path[:-17])
# Save tokenizer files
dir_name = r"C:\Users\niels.rogge\Documents\Python projecten\tensorflow\Tensorflow models\SQA\Base\tapas_sqa_inter_masklm_base_reset"
tokenizer = TapasTokenizer(vocab_file=dir_name + r"\vocab.txt", model_max_length=512)
print("Save tokenizer files to {}".format(pytorch_dump_path))
print(f"Save tokenizer files to {pytorch_dump_path}")
tokenizer.save_pretrained(pytorch_dump_path[:-17])
print("Used relative position embeddings:", model.config.reset_position_index_per_cell)
......
......@@ -142,13 +142,13 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
......@@ -169,19 +169,19 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
]
for n in name
):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
# in case the model is TapasForSequenceClassification, we skip output_bias and output_weights
# since these are not used for classification
if isinstance(model, TapasForSequenceClassification):
if any(n in ["output_bias", "output_weights"] for n in name):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
# in case the model is TapasModel, we skip output_bias, output_weights, output_bias_cls and output_weights_cls
# since this model does not have MLM and NSP heads
if isinstance(model, TapasModel):
if any(n in ["output_bias", "output_weights", "output_bias_cls", "output_weights_cls"] for n in name):
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
# if first scope name starts with "bert", change it to "tapas"
if name[0] == "bert":
......@@ -223,7 +223,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
......@@ -241,7 +241,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
# Added a check to see whether the array is a scalar (because bias terms in Tapas checkpoints can be
# scalar => should first be converted to numpy arrays)
if np.isscalar(array):
......
......@@ -324,8 +324,8 @@ class TapasTokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
)
self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
......@@ -1208,9 +1208,9 @@ class TapasTokenizer(PreTrainedTokenizer):
if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose:
if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False):
logger.warning(
"Token indices sequence length is longer than the specified maximum sequence length "
"for this model ({} > {}). Running this sequence through the model will result in "
"indexing errors".format(len(encoded_inputs["input_ids"]), self.model_max_length)
f"Token indices sequence length is longer than the specified maximum sequence length "
f"for this model ({len(encoded_inputs['input_ids'])} > {self.model_max_length}). Running this "
"sequence through the model will result in indexing errors."
)
self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True
......@@ -1670,7 +1670,7 @@ class TapasTokenizer(PreTrainedTokenizer):
def _find_tokens(self, text, segment):
"""Return start index of segment in text or None."""
logging.info("text: %s %s", text, segment)
logging.info(f"text: {text} {segment}")
for index in range(1 + len(text) - len(segment)):
for seg_index, seg_token in enumerate(segment):
if text[index + seg_index].piece != seg_token.piece:
......@@ -1685,7 +1685,7 @@ class TapasTokenizer(PreTrainedTokenizer):
answer_text,
):
"""Returns all occurrences of answer_text in the table."""
logging.info("answer text: %s", answer_text)
logging.info(f"answer text: {answer_text}")
for row_index, row in enumerate(tokenized_table.rows):
if row_index == 0:
# We don't search for answers in the header.
......@@ -2347,7 +2347,7 @@ _INF = float("INF")
def _get_numeric_value_from_date(date, mask):
"""Converts date (datetime Python object) to a NumericValue object with a Date object value."""
if date.year < _MIN_YEAR or date.year > _MAX_YEAR:
raise ValueError("Invalid year: %d" % date.year)
raise ValueError(f"Invalid year: {date.year}")
new_date = Date()
if mask.year:
......@@ -2523,7 +2523,7 @@ def _get_value_type(numeric_value):
return NUMBER_TYPE
elif numeric_value.date is not None:
return DATE_TYPE
raise ValueError("Unknown type: %s" % numeric_value)
raise ValueError(f"Unknown type: {numeric_value}")
def _get_value_as_primitive_value(numeric_value):
......@@ -2541,7 +2541,7 @@ def _get_value_as_primitive_value(numeric_value):
if date.day is not None:
value_tuple[2] = float(date.day)
return tuple(value_tuple)
raise ValueError("Unknown type: %s" % numeric_value)
raise ValueError(f"Unknown type: {numeric_value}")
def _get_all_types(numeric_values):
......@@ -2567,7 +2567,7 @@ def get_numeric_sort_key_fn(numeric_values):
"""
value_types = _get_all_types(numeric_values)
if len(value_types) != 1:
raise ValueError("No common value type in %s" % numeric_values)
raise ValueError(f"No common value type in {numeric_values}")
value_type = next(iter(value_types))
if value_type == NUMBER_TYPE:
......@@ -2586,7 +2586,7 @@ def get_numeric_sort_key_fn(numeric_values):
valid_indexes.discard(tuple_index)
if not valid_indexes:
raise ValueError("No common value in %s" % numeric_values)
raise ValueError(f"No common value in {numeric_values}")
def _sort_key_fn(numeric_value):
value = _get_value_as_primitive_value(numeric_value)
......@@ -2618,8 +2618,7 @@ def _consolidate_numeric_values(row_index_to_values, min_consolidation_fraction,
return {}
max_count = max(type_counts.values())
if max_count < len(row_index_to_values) * min_consolidation_fraction:
# logging.log_every_n(logging.INFO, 'Can\'t consolidate types: %s %s %d', 100,
# debug_info, row_index_to_values, max_count)
# logging.log_every_n(logging.INFO, f'Can\'t consolidate types: {debug_info} {row_index_to_values} {max_count}', 100)
return {}
valid_types = set()
......@@ -2708,15 +2707,13 @@ def filter_invalid_unicode_from_table(table):
cell, is_invalid = filter_invalid_unicode(cell)
if is_invalid:
logging.warning(
"Scrub an invalid table body @ table_id: %s, row_index: %d, " "col_index: %d",
table.table_id,
row_index,
col_index,
f"Scrub an invalid table body @ table_id: {table.table_id}, row_index: {row_index}, "
f"col_index: {col_index}",
)
for col_index, column in enumerate(table.columns):
column, is_invalid = filter_invalid_unicode(column)
if is_invalid:
logging.warning("Scrub an invalid table header @ table_id: %s, col_index: %d", table.table_id, col_index)
logging.warning(f"Scrub an invalid table header @ table_id: {table.table_id}, col_index: {col_index}")
def add_numeric_table_values(table, min_consolidation_fraction=0.7, debug_info=None):
......
......@@ -48,14 +48,14 @@ def convert_transfo_xl_checkpoint_to_pytorch(
corpus = pickle.load(fp, encoding="latin1")
# Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term)
pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["pretrained_vocab_file"]
print("Save vocabulary to {}".format(pytorch_vocab_dump_path))
print(f"Save vocabulary to {pytorch_vocab_dump_path}")
corpus_vocab_dict = corpus.vocab.__dict__
torch.save(corpus_vocab_dict, pytorch_vocab_dump_path)
corpus_dict_no_vocab = corpus.__dict__
corpus_dict_no_vocab.pop("vocab", None)
pytorch_dataset_dump_path = pytorch_dump_folder_path + "/" + CORPUS_NAME
print("Save dataset to {}".format(pytorch_dataset_dump_path))
print(f"Save dataset to {pytorch_dataset_dump_path}")
torch.save(corpus_dict_no_vocab, pytorch_dataset_dump_path)
if tf_checkpoint_path:
......@@ -63,22 +63,22 @@ def convert_transfo_xl_checkpoint_to_pytorch(
config_path = os.path.abspath(transfo_xl_config_file)
tf_path = os.path.abspath(tf_checkpoint_path)
print("Converting Transformer XL checkpoint from {} with config at {}".format(tf_path, config_path))
print(f"Converting Transformer XL checkpoint from {tf_path} with config at {config_path}.")
# Initialise PyTorch model
if transfo_xl_config_file == "":
config = TransfoXLConfig()
else:
config = TransfoXLConfig.from_json_file(transfo_xl_config_file)
print("Building PyTorch model from configuration: {}".format(str(config)))
print(f"Building PyTorch model from configuration: {config}")
model = TransfoXLLMHeadModel(config)
model = load_tf_weights_in_transfo_xl(model, config, tf_path)
# Save pytorch-model
pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path)))
print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}")
torch.save(model.state_dict(), pytorch_weights_dump_path)
print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path)))
print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}")
with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
f.write(config.to_json_string())
......
......@@ -368,7 +368,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer):
r_idx - l_idx,
d_emb_i,
init_std,
name="emb_layers_._{}".format(i),
name=f"emb_layers_._{i}",
)
)
......@@ -380,7 +380,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer):
shape=(d_emb_i, self.d_proj),
initializer=get_initializer(self.init_std),
trainable=True,
name="emb_projs_._{}".format(i),
name=f"emb_projs_._{i}",
)
)
......@@ -467,7 +467,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
layer_norm_epsilon=config.layer_norm_epsilon,
init_std=config.init_std,
output_attentions=self.output_attentions,
name="layers_._{}".format(i),
name=f"layers_._{i}",
)
)
else: # learnable embeddings and absolute embeddings
......
......@@ -59,25 +59,22 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
shape=(self.d_embed, self.d_proj),
initializer="zeros",
trainable=True,
name="out_projs_._{}".format(i),
name=f"out_projs_._{i}",
)
self.out_projs.append(weight)
else:
self.out_projs.append(None)
weight = self.add_weight(
shape=(
self.vocab_size,
self.d_embed,
),
shape=(self.vocab_size, self.d_embed),
initializer="zeros",
trainable=True,
name="out_layers_._{}_._weight".format(i),
name=f"out_layers_._{i}_._weight",
)
bias = self.add_weight(
shape=(self.vocab_size,),
initializer="zeros",
trainable=True,
name="out_layers_._{}_._bias".format(i),
name=f"out_layers_._{i}_._bias",
)
self.out_layers.append((weight, bias))
else:
......@@ -86,23 +83,20 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
d_emb_i = self.d_embed // (self.div_val ** i)
weight = self.add_weight(
shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name="out_projs_._{}".format(i)
shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name=f"out_projs_._{i}"
)
self.out_projs.append(weight)
weight = self.add_weight(
shape=(
r_idx - l_idx,
d_emb_i,
),
shape=(r_idx - l_idx, d_emb_i),
initializer="zeros",
trainable=True,
name="out_layers_._{}_._weight".format(i),
name=f"out_layers_._{i}_._weight",
)
bias = self.add_weight(
shape=(r_idx - l_idx,),
initializer="zeros",
trainable=True,
name="out_layers_._{}_._bias".format(i),
name=f"out_layers_._{i}_._bias",
)
self.out_layers.append((weight, bias))
super().build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment