"docs/vscode:/vscode.git/clone" did not exist on "7ca46335553609e4852dcb018c73cd5215e6e25a"
Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
...@@ -133,20 +133,20 @@ class LegacyIndex(Index): ...@@ -133,20 +133,20 @@ class LegacyIndex(Index):
) )
raise EnvironmentError(msg) raise EnvironmentError(msg)
if resolved_archive_file == archive_file: if resolved_archive_file == archive_file:
logger.info("loading file {}".format(archive_file)) logger.info(f"loading file {archive_file}")
else: else:
logger.info("loading file {} from cache at {}".format(archive_file, resolved_archive_file)) logger.info(f"loading file {archive_file} from cache at {resolved_archive_file}")
return resolved_archive_file return resolved_archive_file
def _load_passages(self): def _load_passages(self):
logger.info("Loading passages from {}".format(self.index_path)) logger.info(f"Loading passages from {self.index_path}")
passages_path = self._resolve_path(self.index_path, self.PASSAGE_FILENAME) passages_path = self._resolve_path(self.index_path, self.PASSAGE_FILENAME)
with open(passages_path, "rb") as passages_file: with open(passages_path, "rb") as passages_file:
passages = pickle.load(passages_file) passages = pickle.load(passages_file)
return passages return passages
def _deserialize_index(self): def _deserialize_index(self):
logger.info("Loading index from {}".format(self.index_path)) logger.info(f"Loading index from {self.index_path}")
resolved_index_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index.dpr") resolved_index_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index.dpr")
self.index = faiss.read_index(resolved_index_path) self.index = faiss.read_index(resolved_index_path)
resolved_meta_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index_meta.dpr") resolved_meta_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index_meta.dpr")
...@@ -200,12 +200,12 @@ class HFIndexBase(Index): ...@@ -200,12 +200,12 @@ class HFIndexBase(Index):
def _check_dataset_format(self, with_index: bool): def _check_dataset_format(self, with_index: bool):
if not isinstance(self.dataset, Dataset): if not isinstance(self.dataset, Dataset):
raise ValueError("Dataset should be a datasets.Dataset object, but got {}".format(type(self.dataset))) raise ValueError(f"Dataset should be a datasets.Dataset object, but got {type(self.dataset)}")
if len({"title", "text", "embeddings"} - set(self.dataset.column_names)) > 0: if len({"title", "text", "embeddings"} - set(self.dataset.column_names)) > 0:
raise ValueError( raise ValueError(
"Dataset should be a dataset with the following columns: " "Dataset should be a dataset with the following columns: "
"title (str), text (str) and embeddings (arrays of dimension vector_size), " "title (str), text (str) and embeddings (arrays of dimension vector_size), "
"but got columns {}".format(self.dataset.column_names) f"but got columns {self.dataset.column_names}"
) )
if with_index and "embeddings" not in self.dataset.list_indexes(): if with_index and "embeddings" not in self.dataset.list_indexes():
raise ValueError( raise ValueError(
...@@ -269,7 +269,7 @@ class CanonicalHFIndex(HFIndexBase): ...@@ -269,7 +269,7 @@ class CanonicalHFIndex(HFIndexBase):
self.index_name = index_name self.index_name = index_name
self.index_path = index_path self.index_path = index_path
self.use_dummy_dataset = use_dummy_dataset self.use_dummy_dataset = use_dummy_dataset
logger.info("Loading passages from {}".format(self.dataset_name)) logger.info(f"Loading passages from {self.dataset_name}")
dataset = load_dataset( dataset = load_dataset(
self.dataset_name, with_index=False, split=self.dataset_split, dummy=self.use_dummy_dataset self.dataset_name, with_index=False, split=self.dataset_split, dummy=self.use_dummy_dataset
) )
...@@ -277,10 +277,10 @@ class CanonicalHFIndex(HFIndexBase): ...@@ -277,10 +277,10 @@ class CanonicalHFIndex(HFIndexBase):
def init_index(self): def init_index(self):
if self.index_path is not None: if self.index_path is not None:
logger.info("Loading index from {}".format(self.index_path)) logger.info(f"Loading index from {self.index_path}")
self.dataset.load_faiss_index("embeddings", file=self.index_path) self.dataset.load_faiss_index("embeddings", file=self.index_path)
else: else:
logger.info("Loading index from {}".format(self.dataset_name + " with index name " + self.index_name)) logger.info(f"Loading index from {self.dataset_name} with index name {self.index_name}")
self.dataset = load_dataset( self.dataset = load_dataset(
self.dataset_name, self.dataset_name,
with_embeddings=True, with_embeddings=True,
...@@ -313,7 +313,7 @@ class CustomHFIndex(HFIndexBase): ...@@ -313,7 +313,7 @@ class CustomHFIndex(HFIndexBase):
@classmethod @classmethod
def load_from_disk(cls, vector_size, dataset_path, index_path): def load_from_disk(cls, vector_size, dataset_path, index_path):
logger.info("Loading passages from {}".format(dataset_path)) logger.info(f"Loading passages from {dataset_path}")
if dataset_path is None or index_path is None: if dataset_path is None or index_path is None:
raise ValueError( raise ValueError(
"Please provide ``dataset_path`` and ``index_path`` after calling ``dataset.save_to_disk(dataset_path)`` " "Please provide ``dataset_path`` and ``index_path`` after calling ``dataset.save_to_disk(dataset_path)`` "
...@@ -324,7 +324,7 @@ class CustomHFIndex(HFIndexBase): ...@@ -324,7 +324,7 @@ class CustomHFIndex(HFIndexBase):
def init_index(self): def init_index(self):
if not self.is_initialized(): if not self.is_initialized():
logger.info("Loading index from {}".format(self.index_path)) logger.info(f"Loading index from {self.index_path}")
self.dataset.load_faiss_index("embeddings", file=self.index_path) self.dataset.load_faiss_index("embeddings", file=self.index_path)
self._index_initialized = True self._index_initialized = True
...@@ -520,9 +520,7 @@ class RagRetriever: ...@@ -520,9 +520,7 @@ class RagRetriever:
start_time = time.time() start_time = time.time()
ids, vectors = self.index.get_top_docs(question_hidden_states, n_docs) ids, vectors = self.index.get_top_docs(question_hidden_states, n_docs)
logger.debug( logger.debug(
"index search time: {} sec, batch size {}".format( f"index search time: {time.time() - start_time} sec, batch size {question_hidden_states.shape}"
time.time() - start_time, question_hidden_states.shape
)
) )
ids_batched.extend(ids) ids_batched.extend(ids)
vectors_batched.extend(vectors) vectors_batched.extend(vectors)
......
...@@ -34,7 +34,7 @@ class RagTokenizer: ...@@ -34,7 +34,7 @@ class RagTokenizer:
def save_pretrained(self, save_directory): def save_pretrained(self, save_directory):
if os.path.isfile(save_directory): if os.path.isfile(save_directory):
raise ValueError("Provided path ({}) should be a directory, not a file".format(save_directory)) raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
os.makedirs(save_directory, exist_ok=True) os.makedirs(save_directory, exist_ok=True)
question_encoder_path = os.path.join(save_directory, "question_encoder_tokenizer") question_encoder_path = os.path.join(save_directory, "question_encoder_tokenizer")
generator_path = os.path.join(save_directory, "generator_tokenizer") generator_path = os.path.join(save_directory, "generator_tokenizer")
......
...@@ -30,10 +30,10 @@ logging.set_verbosity_info() ...@@ -30,10 +30,10 @@ logging.set_verbosity_info()
def set_param(torch_layer, weight, bias=None): def set_param(torch_layer, weight, bias=None):
# set parameter of one layer # set parameter of one layer
assert torch_layer.weight.shape == weight.shape, "{} layer.weight does not match".format(torch_layer) assert torch_layer.weight.shape == weight.shape, f"{torch_layer} layer.weight does not match"
torch_layer.weight = torch.nn.Parameter(weight) torch_layer.weight = torch.nn.Parameter(weight)
if bias is not None: if bias is not None:
assert torch_layer.bias.shape == bias.shape, "{} layer.bias does not match".format(torch_layer) assert torch_layer.bias.shape == bias.shape, f"{torch_layer} layer.bias does not match"
torch_layer.bias = torch.nn.Parameter(bias) torch_layer.bias = torch.nn.Parameter(bias)
...@@ -150,9 +150,9 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): ...@@ -150,9 +150,9 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
position_embeddings = torch_model_reformer.embeddings.position_embeddings position_embeddings = torch_model_reformer.embeddings.position_embeddings
for emb_idx in range(len(position_embeddings.weights)): for emb_idx in range(len(position_embeddings.weights)):
emb_weights = np.asarray(weights[3][emb_idx][0]) emb_weights = np.asarray(weights[3][emb_idx][0])
assert position_embeddings.weights[emb_idx].shape == emb_weights.shape, "{} emb does not match".format( assert (
position_embeddings[emb_idx] position_embeddings.weights[emb_idx].shape == emb_weights.shape
) ), f"{position_embeddings[emb_idx]} emb does not match"
position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights)) position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights))
trax_layer_weights = weights[5] trax_layer_weights = weights[5]
...@@ -185,7 +185,7 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): ...@@ -185,7 +185,7 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path): def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path):
# Initialise PyTorch model # Initialise PyTorch model
config = ReformerConfig.from_json_file(config_file) config = ReformerConfig.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config))) print(f"Building PyTorch model from configuration: {config}")
model = ReformerModelWithLMHead(config) model = ReformerModelWithLMHead(config)
with open(trax_model_pkl_path, "rb") as f: with open(trax_model_pkl_path, "rb") as f:
...@@ -194,7 +194,7 @@ def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch ...@@ -194,7 +194,7 @@ def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch
set_model_weights_in_torch(model_weights, model, config.hidden_size) set_model_weights_in_torch(model_weights, model, config.hidden_size)
# Save pytorch-model # Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path)) print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path) torch.save(model.state_dict(), pytorch_dump_path)
......
...@@ -90,9 +90,8 @@ def _get_least_common_mult_chunk_len(config): ...@@ -90,9 +90,8 @@ def _get_least_common_mult_chunk_len(config):
return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length) return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select "
config.attn_layers "attn layer types from ['lsh', 'local'] only."
)
) )
...@@ -107,9 +106,8 @@ def _get_min_chunk_len(config): ...@@ -107,9 +106,8 @@ def _get_min_chunk_len(config):
return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length) return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select "
config.attn_layers "attn layer types from ['lsh', 'local'] only."
)
) )
...@@ -127,11 +125,11 @@ class AxialPositionEmbeddings(nn.Module): ...@@ -127,11 +125,11 @@ class AxialPositionEmbeddings(nn.Module):
self.least_common_mult_chunk_length = _get_least_common_mult_chunk_len(config) self.least_common_mult_chunk_length = _get_least_common_mult_chunk_len(config)
self.weights = nn.ParameterList() self.weights = nn.ParameterList()
assert ( if sum(self.axial_pos_embds_dim) != config.hidden_size:
sum(self.axial_pos_embds_dim) == config.hidden_size raise ValueError(
), "Make sure that config.axial_pos_embds factors: {} sum to config.hidden_size: {}".format( f"Make sure that config.axial_pos_embds factors: {self.axial_pos_embds_dim} sum to "
self.axial_pos_embds_dim, config.hidden_size f"config.hidden_size: {config.hidden_size}"
) )
# create weights # create weights
for axis, axial_pos_embd_dim in enumerate(self.axial_pos_embds_dim): for axis, axial_pos_embd_dim in enumerate(self.axial_pos_embds_dim):
...@@ -153,11 +151,14 @@ class AxialPositionEmbeddings(nn.Module): ...@@ -153,11 +151,14 @@ class AxialPositionEmbeddings(nn.Module):
] ]
if self.training is True: if self.training is True:
assert ( if reduce(mul, self.axial_pos_shape) != sequence_length:
reduce(mul, self.axial_pos_shape) == sequence_length raise ValueError(
), "If training, make sure that config.axial_pos_shape factors: {} multiply to sequence length. Got prod({}) != sequence_length: {}. You might want to consider padding your sequence length to {} or changing config.axial_pos_shape.".format( f"If training, make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply to "
self.axial_pos_shape, self.axial_pos_shape, sequence_length, reduce(mul, self.axial_pos_shape) f"sequence length. Got prod({self.axial_pos_shape}) != sequence_length: {sequence_length}. "
) f"You might want to consider padding your sequence length to {reduce(mul, self.axial_pos_shape)} "
"or changing config.axial_pos_shape."
)
if self.dropout > 0: if self.dropout > 0:
weights = torch.cat(broadcasted_weights, dim=-1) weights = torch.cat(broadcasted_weights, dim=-1)
# permute weights so that 2D correctly drops dims 1 and 2 # permute weights so that 2D correctly drops dims 1 and 2
...@@ -177,13 +178,12 @@ class AxialPositionEmbeddings(nn.Module): ...@@ -177,13 +178,12 @@ class AxialPositionEmbeddings(nn.Module):
) )
else: else:
assert ( if reduce(mul, self.axial_pos_shape) < sequence_length:
reduce(mul, self.axial_pos_shape) >= sequence_length raise ValueError(
), "Make sure that config.axial_pos_shape factors: {} multiply at least to max(sequence_length, least_common_mult_chunk_length): max({}, {})".format( f"Make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply at least to "
self.axial_pos_shape, f"max(sequence_length, least_common_mult_chunk_length): max({sequence_length}, "
sequence_length, f"{self.least_common_mult_chunk_length})."
self.least_common_mult_chunk_length, )
)
# compute how many columns are needed # compute how many columns are needed
max_position_id = position_ids.max().item() max_position_id = position_ids.max().item()
...@@ -252,11 +252,11 @@ class ReformerEmbeddings(nn.Module): ...@@ -252,11 +252,11 @@ class ReformerEmbeddings(nn.Module):
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.word_embeddings(input_ids) inputs_embeds = self.word_embeddings(input_ids)
assert ( if position_ids.shape[-1] > self.max_position_embeddings:
position_ids.shape[-1] <= self.max_position_embeddings raise ValueError(
), "Sequence Length: {} has to be larger equal than config.max_position_embeddings: {}".format( f"Sequence Length: {position_ids.shape[-1]} has to be larger equal than "
position_ids.shape[-1], self.max_position_embeddings f"config.max_position_embeddings {self.max_position_embeddings}."
) )
# dropout # dropout
embeddings = nn.functional.dropout(inputs_embeds, p=self.dropout, training=self.training) embeddings = nn.functional.dropout(inputs_embeds, p=self.dropout, training=self.training)
...@@ -322,7 +322,7 @@ class EfficientAttentionMixin: ...@@ -322,7 +322,7 @@ class EfficientAttentionMixin:
elif len(vectors.shape) == 3: elif len(vectors.shape) == 3:
return torch.reshape(vectors, split_dim_shape) return torch.reshape(vectors, split_dim_shape)
else: else:
raise ValueError("Input vector rank should be one of [3, 4], but is: {}".format(len(vectors.shape))) raise ValueError(f"Input vector rank should be one of [3, 4], but is: {len(vectors.shape)}")
class LSHSelfAttention(nn.Module, EfficientAttentionMixin): class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
...@@ -451,14 +451,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): ...@@ -451,14 +451,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
assert ( assert (
query_key_vectors.shape[-1] == self.attention_head_size query_key_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format( ), f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}."
query_key_vectors.shape[-1], self.attention_head_size
)
assert ( assert (
value_vectors.shape[-1] == self.attention_head_size value_vectors.shape[-1] == self.attention_head_size
), "last dim of value_vectors is {} but should be {}.".format( ), f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
value_vectors.shape[-1], self.attention_head_size
)
do_standard_self_attention = (sequence_length <= self.chunk_length) or ( do_standard_self_attention = (sequence_length <= self.chunk_length) or (
use_cache and past_buckets_states[1] is not None use_cache and past_buckets_states[1] is not None
...@@ -479,7 +475,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): ...@@ -479,7 +475,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
assert ( assert (
int(buckets.shape[-1]) == num_hashes * sequence_length int(buckets.shape[-1]) == num_hashes * sequence_length
), "last dim of buckets is {}, but should be {}".format(buckets.shape[-1], num_hashes * sequence_length) ), f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}"
sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx( sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx(
sequence_length, buckets, num_hashes sequence_length, buckets, num_hashes
...@@ -616,16 +612,16 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): ...@@ -616,16 +612,16 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
if isinstance(self.num_buckets, int): if isinstance(self.num_buckets, int):
assert ( assert (
self.num_buckets % 2 == 0 self.num_buckets % 2 == 0
), "There should be an even number of bucktes, but `self.num_bucktes`: {}".format(self.num_buckets) ), f"There should be an even number of bucktes, but `self.num_bucktes`: {self.num_buckets}"
rotation_size = self.num_buckets rotation_size = self.num_buckets
num_buckets = self.num_buckets num_buckets = self.num_buckets
else: else:
# Factorize the hash if self.num_buckets is a list or tuple # Factorize the hash if self.num_buckets is a list or tuple
rotation_size, num_buckets = 0, 1 rotation_size, num_buckets = 0, 1
for bucket_factor in self.num_buckets: for bucket_factor in self.num_buckets:
assert bucket_factor % 2 == 0, "The number of buckets should be even, but `num_bucket`: {}".format( assert (
bucket_factor bucket_factor % 2 == 0
) ), f"The number of buckets should be even, but `num_bucket`: {bucket_factor}"
rotation_size = rotation_size + bucket_factor rotation_size = rotation_size + bucket_factor
num_buckets = num_buckets * bucket_factor num_buckets = num_buckets * bucket_factor
...@@ -714,7 +710,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): ...@@ -714,7 +710,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
if num_buckets > num_buckets_limit: if num_buckets > num_buckets_limit:
num_buckets = [2 ** (num_buckets_pow_2 // 2), 2 ** (num_buckets_pow_2 - num_buckets_pow_2 // 2)] num_buckets = [2 ** (num_buckets_pow_2 // 2), 2 ** (num_buckets_pow_2 - num_buckets_pow_2 // 2)]
logger.warning("config.num_buckets is not set. Setting config.num_buckets to {}...".format(num_buckets)) logger.warning(f"config.num_buckets is not set. Setting config.num_buckets to {num_buckets}...")
# set num buckets in config to be properly saved # set num buckets in config to be properly saved
self.config.num_buckets = num_buckets self.config.num_buckets = num_buckets
...@@ -1085,19 +1081,13 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin): ...@@ -1085,19 +1081,13 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
assert ( assert (
query_vectors.shape[-1] == self.attention_head_size query_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format( ), f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}."
query_vectors.shape[-1], self.attention_head_size
)
assert ( assert (
key_vectors.shape[-1] == self.attention_head_size key_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format( ), f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}."
key_vectors.shape[-1], self.attention_head_size
)
assert ( assert (
value_vectors.shape[-1] == self.attention_head_size value_vectors.shape[-1] == self.attention_head_size
), "last dim of query_key_vectors is {} but should be {}.".format( ), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
value_vectors.shape[-1], self.attention_head_size
)
if self.chunk_length is None: if self.chunk_length is None:
assert ( assert (
...@@ -1280,9 +1270,8 @@ class ReformerAttention(nn.Module): ...@@ -1280,9 +1270,8 @@ class ReformerAttention(nn.Module):
self.self_attention = LocalSelfAttention(config) self.self_attention = LocalSelfAttention(config)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( f"Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {self.attn_layers}. "
self.attn_layers "Select attn layer types from ['lsh', 'local'] only."
)
) )
self.output = ReformerSelfOutput(config) self.output = ReformerSelfOutput(config)
...@@ -2036,7 +2025,7 @@ class ReformerModel(ReformerPreTrainedModel): ...@@ -2036,7 +2025,7 @@ class ReformerModel(ReformerPreTrainedModel):
assert ( assert (
len(input_shape) == 2 len(input_shape) == 2
), "`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {}".format(input_shape) ), f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}"
if past_buckets_states is not None: if past_buckets_states is not None:
assert not self.training, "`past_buckets_states` can only be used for inference, not for training`." assert not self.training, "`past_buckets_states` can only be used for inference, not for training`."
...@@ -2062,9 +2051,9 @@ class ReformerModel(ReformerPreTrainedModel): ...@@ -2062,9 +2051,9 @@ class ReformerModel(ReformerPreTrainedModel):
if self.training is True: if self.training is True:
raise ValueError( raise ValueError(
"If training, sequence Length {} has to be a multiple of least common multiple chunk_length {}. Please consider padding the input to a length of {}.".format( f"If training, sequence length {input_shape[-1]} has to be a multiple of least common multiple "
input_shape[-1], least_common_mult_chunk_length, input_shape[-1] + padding_length f"chunk_length {least_common_mult_chunk_length}. Please consider padding the input to a length "
) f"of {input_shape[-1] + padding_length}."
) )
# pad input # pad input
...@@ -2134,9 +2123,8 @@ class ReformerModel(ReformerPreTrainedModel): ...@@ -2134,9 +2123,8 @@ class ReformerModel(ReformerPreTrainedModel):
device=None, device=None,
): ):
logger.info( logger.info(
"Input ids are automatically padded from {} to {} to be a multiple of `config.chunk_length`: {}".format( f"Input ids are automatically padded from {input_shape[-1]} to {input_shape[-1] + padding_length} to be a "
input_shape[-1], input_shape[-1] + padding_length, padded_seq_length f"multiple of `config.chunk_length`: {padded_seq_length}"
)
) )
padded_input_ids = torch.full( padded_input_ids = torch.full(
......
...@@ -131,7 +131,7 @@ class ReformerTokenizer(PreTrainedTokenizer): ...@@ -131,7 +131,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
out_vocab_file = os.path.join( out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
...@@ -107,7 +107,7 @@ class ReformerTokenizerFast(PreTrainedTokenizerFast): ...@@ -107,7 +107,7 @@ class ReformerTokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
out_vocab_file = os.path.join( out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
...@@ -149,8 +149,8 @@ class RobertaSelfAttention(nn.Module): ...@@ -149,8 +149,8 @@ class RobertaSelfAttention(nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
......
...@@ -396,7 +396,7 @@ class TFRobertaEncoder(tf.keras.layers.Layer): ...@@ -396,7 +396,7 @@ class TFRobertaEncoder(tf.keras.layers.Layer):
def __init__(self, config: RobertaConfig, **kwargs): def __init__(self, config: RobertaConfig, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.layer = [TFRobertaLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] self.layer = [TFRobertaLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call( def call(
self, self,
......
...@@ -172,8 +172,7 @@ class SqueezeBertSelfAttention(nn.Module): ...@@ -172,8 +172,7 @@ class SqueezeBertSelfAttention(nn.Module):
super().__init__() super().__init__()
if cin % config.num_attention_heads != 0: if cin % config.num_attention_heads != 0:
raise ValueError( raise ValueError(
"cin (%d) is not a multiple of the number of attention " f"cin ({cin}) is not a multiple of the number of attention heads ({config.num_attention_heads})"
"heads (%d)" % (cin, config.num_attention_heads)
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(cin / config.num_attention_heads) self.attention_head_size = int(cin / config.num_attention_heads)
......
...@@ -27,14 +27,14 @@ logging.set_verbosity_info() ...@@ -27,14 +27,14 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path): def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
# Initialise PyTorch model # Initialise PyTorch model
config = T5Config.from_json_file(config_file) config = T5Config.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config))) print(f"Building PyTorch model from configuration: {config}")
model = T5ForConditionalGeneration(config) model = T5ForConditionalGeneration(config)
# Load weights from tf checkpoint # Load weights from tf checkpoint
load_tf_weights_in_t5(model, config, tf_checkpoint_path) load_tf_weights_in_t5(model, config, tf_checkpoint_path)
# Save pytorch-model # Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path)) print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path) model.save_pretrained(pytorch_dump_path)
......
...@@ -82,13 +82,13 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ...@@ -82,13 +82,13 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
) )
raise raise
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model # Load weights from TF model
init_vars = tf.train.list_variables(tf_path) init_vars = tf.train.list_variables(tf_path)
names = [] names = []
tf_weights = {} tf_weights = {}
for name, shape in init_vars: for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape)) logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name) array = tf.train.load_variable(tf_path, name)
names.append(name) names.append(name)
tf_weights[name] = array tf_weights[name] = array
...@@ -101,11 +101,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ...@@ -101,11 +101,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
for n in name for n in name
): ):
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
tf_weights.pop(txt_name, None) tf_weights.pop(txt_name, None)
continue continue
if "_slot_" in name[-1]: if "_slot_" in name[-1]:
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
tf_weights.pop(txt_name, None) tf_weights.pop(txt_name, None)
continue continue
pointer = model pointer = model
...@@ -149,7 +149,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ...@@ -149,7 +149,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
try: try:
pointer = getattr(pointer, scope_names[0]) pointer = getattr(pointer, scope_names[0])
except AttributeError: except AttributeError:
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
if len(scope_names) >= 2: if len(scope_names) >= 2:
num = int(scope_names[1]) num = int(scope_names[1])
...@@ -157,7 +157,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ...@@ -157,7 +157,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
if scope_names[0] not in ["kernel", "scale", "embedding"]: if scope_names[0] not in ["kernel", "scale", "embedding"]:
pointer = getattr(pointer, "weight") pointer = getattr(pointer, "weight")
if scope_names[0] != "embedding": if scope_names[0] != "embedding":
logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, name)) logger.info(f"Transposing numpy weight of shape {array.shape} for {name}")
array = np.transpose(array) array = np.transpose(array)
try: try:
assert ( assert (
...@@ -166,11 +166,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ...@@ -166,11 +166,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise
logger.info("Initialize PyTorch weight {}".format(name)) logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array.astype(np.float32)) pointer.data = torch.from_numpy(array.astype(np.float32))
tf_weights.pop(txt_name, None) tf_weights.pop(txt_name, None)
logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys()))) logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}.")
return model return model
...@@ -428,9 +428,7 @@ class T5Attention(nn.Module): ...@@ -428,9 +428,7 @@ class T5Attention(nn.Module):
if past_key_value is not None: if past_key_value is not None:
assert ( assert (
len(past_key_value) == 2 len(past_key_value) == 2
), "past_key_value should have 2 past states: keys and values. Got {} past states".format( ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
len(past_key_value)
)
real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
key_length = real_seq_length if key_value_states is None else key_value_states.shape[1] key_length = real_seq_length if key_value_states is None else key_value_states.shape[1]
...@@ -618,12 +616,12 @@ class T5Block(nn.Module): ...@@ -618,12 +616,12 @@ class T5Block(nn.Module):
assert self.is_decoder, "Only decoder can use `past_key_values`" assert self.is_decoder, "Only decoder can use `past_key_values`"
expected_num_past_key_values = 2 if encoder_hidden_states is None else 4 expected_num_past_key_values = 2 if encoder_hidden_states is None else 4
error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format( if len(past_key_value) != expected_num_past_key_values:
expected_num_past_key_values, raise ValueError(
"2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "", f"There should be {expected_num_past_key_values} past states. "
len(past_key_value), f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}."
) f"Got {len(past_key_value)} past key / value states"
assert len(past_key_value) == expected_num_past_key_values, error_message )
self_attn_past_key_value = past_key_value[:2] self_attn_past_key_value = past_key_value[:2]
cross_attn_past_key_value = past_key_value[2:] cross_attn_past_key_value = past_key_value[2:]
...@@ -888,9 +886,7 @@ class T5Stack(T5PreTrainedModel): ...@@ -888,9 +886,7 @@ class T5Stack(T5PreTrainedModel):
mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length
if use_cache is True: if use_cache is True:
assert self.is_decoder, ":obj:`use_cache` can only be set to `True` if {} is used as a decoder".format( assert self.is_decoder, f":obj:`use_cache` can only be set to `True` if {self} is used as a decoder"
self
)
if attention_mask is None: if attention_mask is None:
attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device) attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device)
......
...@@ -273,9 +273,7 @@ class TFT5Attention(tf.keras.layers.Layer): ...@@ -273,9 +273,7 @@ class TFT5Attention(tf.keras.layers.Layer):
if past_key_value is not None: if past_key_value is not None:
assert ( assert (
len(past_key_value) == 2 len(past_key_value) == 2
), "past_key_value should have 2 past states: keys and values. Got {} past states".format( ), f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
len(past_key_value)
)
real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length
key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1] key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1]
...@@ -472,7 +470,7 @@ class TFT5Block(tf.keras.layers.Layer): ...@@ -472,7 +470,7 @@ class TFT5Block(tf.keras.layers.Layer):
) )
) )
self.layer.append(TFT5LayerFF(config, name="layer_._{}".format(len(self.layer)))) self.layer.append(TFT5LayerFF(config, name=f"layer_._{len(self.layer)}"))
def call( def call(
self, self,
...@@ -494,12 +492,12 @@ class TFT5Block(tf.keras.layers.Layer): ...@@ -494,12 +492,12 @@ class TFT5Block(tf.keras.layers.Layer):
assert self.is_decoder, "Only decoder can use `past_key_values`" assert self.is_decoder, "Only decoder can use `past_key_values`"
expected_num_past_key_values = 2 if encoder_hidden_states is None else 4 expected_num_past_key_values = 2 if encoder_hidden_states is None else 4
error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format( if len(past_key_value) != expected_num_past_key_values:
expected_num_past_key_values, raise ValueError(
"2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "", f"There should be {expected_num_past_key_values} past states. "
len(past_key_value), f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}."
) f"Got {len(past_key_value)} past key / value states"
assert len(past_key_value) == expected_num_past_key_values, error_message )
self_attn_past_key_value = past_key_value[:2] self_attn_past_key_value = past_key_value[:2]
cross_attn_past_key_value = past_key_value[2:] cross_attn_past_key_value = past_key_value[2:]
...@@ -579,11 +577,7 @@ class TFT5MainLayer(tf.keras.layers.Layer): ...@@ -579,11 +577,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
self.num_hidden_layers = config.num_layers self.num_hidden_layers = config.num_layers
self.block = [ self.block = [
TFT5Block( TFT5Block(config, has_relative_attention_bias=bool(i == 0), name=f"block_._{i}")
config,
has_relative_attention_bias=bool(i == 0),
name="block_._{}".format(i),
)
for i in range(config.num_layers) for i in range(config.num_layers)
] ]
self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm") self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm")
......
...@@ -104,7 +104,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -104,7 +104,7 @@ class T5Tokenizer(PreTrainedTokenizer):
): ):
# Add extra_ids to the special token list # Add extra_ids to the special token list
if extra_ids > 0 and additional_special_tokens is None: if extra_ids > 0 and additional_special_tokens is None:
additional_special_tokens = ["<extra_id_{}>".format(i) for i in range(extra_ids)] additional_special_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
elif extra_ids > 0 and additional_special_tokens is not None: elif extra_ids > 0 and additional_special_tokens is not None:
# Check that we have the right number of extra_id special tokens # Check that we have the right number of extra_id special tokens
extra_tokens = len(set(filter(lambda x: bool("extra_id" in x), additional_special_tokens))) extra_tokens = len(set(filter(lambda x: bool("extra_id" in x), additional_special_tokens)))
...@@ -257,7 +257,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -257,7 +257,7 @@ class T5Tokenizer(PreTrainedTokenizer):
if index < self.sp_model.get_piece_size(): if index < self.sp_model.get_piece_size():
token = self.sp_model.IdToPiece(index) token = self.sp_model.IdToPiece(index)
else: else:
token = "<extra_id_{}>".format(self.vocab_size - 1 - index) token = f"<extra_id_{self.vocab_size - 1 - index}>"
return token return token
def convert_tokens_to_string(self, tokens): def convert_tokens_to_string(self, tokens):
...@@ -276,7 +276,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -276,7 +276,7 @@ class T5Tokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
out_vocab_file = os.path.join( out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
...@@ -115,7 +115,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast): ...@@ -115,7 +115,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
): ):
# Add extra_ids to the special token list # Add extra_ids to the special token list
if extra_ids > 0 and additional_special_tokens is None: if extra_ids > 0 and additional_special_tokens is None:
additional_special_tokens = ["<extra_id_{}>".format(i) for i in range(extra_ids)] additional_special_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
elif extra_ids > 0 and additional_special_tokens is not None: elif extra_ids > 0 and additional_special_tokens is not None:
# Check that we have the right number of extra special tokens # Check that we have the right number of extra special tokens
extra_tokens = len(set(filter(lambda x: bool("extra_id_" in x), additional_special_tokens))) extra_tokens = len(set(filter(lambda x: bool("extra_id_" in x), additional_special_tokens)))
...@@ -141,7 +141,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast): ...@@ -141,7 +141,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
out_vocab_file = os.path.join( out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
...@@ -82,20 +82,20 @@ def convert_tf_checkpoint_to_pytorch( ...@@ -82,20 +82,20 @@ def convert_tf_checkpoint_to_pytorch(
elif task == "INTERMEDIATE_PRETRAINING": elif task == "INTERMEDIATE_PRETRAINING":
model = TapasModel(config=config) model = TapasModel(config=config)
print("Building PyTorch model from configuration: {}".format(str(config))) print(f"Building PyTorch model from configuration: {config}")
# Load weights from tf checkpoint # Load weights from tf checkpoint
load_tf_weights_in_tapas(model, config, tf_checkpoint_path) load_tf_weights_in_tapas(model, config, tf_checkpoint_path)
# Save pytorch-model (weights and configuration) # Save pytorch-model (weights and configuration)
print("Save PyTorch model to {}".format(pytorch_dump_path)) print(f"Save PyTorch model to {pytorch_dump_path}")
model.save_pretrained(pytorch_dump_path[:-17]) model.save_pretrained(pytorch_dump_path[:-17])
# Save tokenizer files # Save tokenizer files
dir_name = r"C:\Users\niels.rogge\Documents\Python projecten\tensorflow\Tensorflow models\SQA\Base\tapas_sqa_inter_masklm_base_reset" dir_name = r"C:\Users\niels.rogge\Documents\Python projecten\tensorflow\Tensorflow models\SQA\Base\tapas_sqa_inter_masklm_base_reset"
tokenizer = TapasTokenizer(vocab_file=dir_name + r"\vocab.txt", model_max_length=512) tokenizer = TapasTokenizer(vocab_file=dir_name + r"\vocab.txt", model_max_length=512)
print("Save tokenizer files to {}".format(pytorch_dump_path)) print(f"Save tokenizer files to {pytorch_dump_path}")
tokenizer.save_pretrained(pytorch_dump_path[:-17]) tokenizer.save_pretrained(pytorch_dump_path[:-17])
print("Used relative position embeddings:", model.config.reset_position_index_per_cell) print("Used relative position embeddings:", model.config.reset_position_index_per_cell)
......
...@@ -142,13 +142,13 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ...@@ -142,13 +142,13 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
) )
raise raise
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model # Load weights from TF model
init_vars = tf.train.list_variables(tf_path) init_vars = tf.train.list_variables(tf_path)
names = [] names = []
arrays = [] arrays = []
for name, shape in init_vars: for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape)) logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name) array = tf.train.load_variable(tf_path, name)
names.append(name) names.append(name)
arrays.append(array) arrays.append(array)
...@@ -169,19 +169,19 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ...@@ -169,19 +169,19 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
] ]
for n in name for n in name
): ):
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
# in case the model is TapasForSequenceClassification, we skip output_bias and output_weights # in case the model is TapasForSequenceClassification, we skip output_bias and output_weights
# since these are not used for classification # since these are not used for classification
if isinstance(model, TapasForSequenceClassification): if isinstance(model, TapasForSequenceClassification):
if any(n in ["output_bias", "output_weights"] for n in name): if any(n in ["output_bias", "output_weights"] for n in name):
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
# in case the model is TapasModel, we skip output_bias, output_weights, output_bias_cls and output_weights_cls # in case the model is TapasModel, we skip output_bias, output_weights, output_bias_cls and output_weights_cls
# since this model does not have MLM and NSP heads # since this model does not have MLM and NSP heads
if isinstance(model, TapasModel): if isinstance(model, TapasModel):
if any(n in ["output_bias", "output_weights", "output_bias_cls", "output_weights_cls"] for n in name): if any(n in ["output_bias", "output_weights", "output_bias_cls", "output_weights_cls"] for n in name):
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
# if first scope name starts with "bert", change it to "tapas" # if first scope name starts with "bert", change it to "tapas"
if name[0] == "bert": if name[0] == "bert":
...@@ -223,7 +223,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ...@@ -223,7 +223,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
try: try:
pointer = getattr(pointer, scope_names[0]) pointer = getattr(pointer, scope_names[0])
except AttributeError: except AttributeError:
logger.info("Skipping {}".format("/".join(name))) logger.info(f"Skipping {'/'.join(name)}")
continue continue
if len(scope_names) >= 2: if len(scope_names) >= 2:
num = int(scope_names[1]) num = int(scope_names[1])
...@@ -241,7 +241,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ...@@ -241,7 +241,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path):
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise
logger.info("Initialize PyTorch weight {}".format(name)) logger.info(f"Initialize PyTorch weight {name}")
# Added a check to see whether the array is a scalar (because bias terms in Tapas checkpoints can be # Added a check to see whether the array is a scalar (because bias terms in Tapas checkpoints can be
# scalar => should first be converted to numpy arrays) # scalar => should first be converted to numpy arrays)
if np.isscalar(array): if np.isscalar(array):
......
...@@ -324,8 +324,8 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -324,8 +324,8 @@ class TapasTokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file): if not os.path.isfile(vocab_file):
raise ValueError( raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
) )
self.vocab = load_vocab(vocab_file) self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
...@@ -1208,9 +1208,9 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -1208,9 +1208,9 @@ class TapasTokenizer(PreTrainedTokenizer):
if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose: if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose:
if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False): if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False):
logger.warning( logger.warning(
"Token indices sequence length is longer than the specified maximum sequence length " f"Token indices sequence length is longer than the specified maximum sequence length "
"for this model ({} > {}). Running this sequence through the model will result in " f"for this model ({len(encoded_inputs['input_ids'])} > {self.model_max_length}). Running this "
"indexing errors".format(len(encoded_inputs["input_ids"]), self.model_max_length) "sequence through the model will result in indexing errors."
) )
self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True
...@@ -1670,7 +1670,7 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -1670,7 +1670,7 @@ class TapasTokenizer(PreTrainedTokenizer):
def _find_tokens(self, text, segment): def _find_tokens(self, text, segment):
"""Return start index of segment in text or None.""" """Return start index of segment in text or None."""
logging.info("text: %s %s", text, segment) logging.info(f"text: {text} {segment}")
for index in range(1 + len(text) - len(segment)): for index in range(1 + len(text) - len(segment)):
for seg_index, seg_token in enumerate(segment): for seg_index, seg_token in enumerate(segment):
if text[index + seg_index].piece != seg_token.piece: if text[index + seg_index].piece != seg_token.piece:
...@@ -1685,7 +1685,7 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -1685,7 +1685,7 @@ class TapasTokenizer(PreTrainedTokenizer):
answer_text, answer_text,
): ):
"""Returns all occurrences of answer_text in the table.""" """Returns all occurrences of answer_text in the table."""
logging.info("answer text: %s", answer_text) logging.info(f"answer text: {answer_text}")
for row_index, row in enumerate(tokenized_table.rows): for row_index, row in enumerate(tokenized_table.rows):
if row_index == 0: if row_index == 0:
# We don't search for answers in the header. # We don't search for answers in the header.
...@@ -2347,7 +2347,7 @@ _INF = float("INF") ...@@ -2347,7 +2347,7 @@ _INF = float("INF")
def _get_numeric_value_from_date(date, mask): def _get_numeric_value_from_date(date, mask):
"""Converts date (datetime Python object) to a NumericValue object with a Date object value.""" """Converts date (datetime Python object) to a NumericValue object with a Date object value."""
if date.year < _MIN_YEAR or date.year > _MAX_YEAR: if date.year < _MIN_YEAR or date.year > _MAX_YEAR:
raise ValueError("Invalid year: %d" % date.year) raise ValueError(f"Invalid year: {date.year}")
new_date = Date() new_date = Date()
if mask.year: if mask.year:
...@@ -2523,7 +2523,7 @@ def _get_value_type(numeric_value): ...@@ -2523,7 +2523,7 @@ def _get_value_type(numeric_value):
return NUMBER_TYPE return NUMBER_TYPE
elif numeric_value.date is not None: elif numeric_value.date is not None:
return DATE_TYPE return DATE_TYPE
raise ValueError("Unknown type: %s" % numeric_value) raise ValueError(f"Unknown type: {numeric_value}")
def _get_value_as_primitive_value(numeric_value): def _get_value_as_primitive_value(numeric_value):
...@@ -2541,7 +2541,7 @@ def _get_value_as_primitive_value(numeric_value): ...@@ -2541,7 +2541,7 @@ def _get_value_as_primitive_value(numeric_value):
if date.day is not None: if date.day is not None:
value_tuple[2] = float(date.day) value_tuple[2] = float(date.day)
return tuple(value_tuple) return tuple(value_tuple)
raise ValueError("Unknown type: %s" % numeric_value) raise ValueError(f"Unknown type: {numeric_value}")
def _get_all_types(numeric_values): def _get_all_types(numeric_values):
...@@ -2567,7 +2567,7 @@ def get_numeric_sort_key_fn(numeric_values): ...@@ -2567,7 +2567,7 @@ def get_numeric_sort_key_fn(numeric_values):
""" """
value_types = _get_all_types(numeric_values) value_types = _get_all_types(numeric_values)
if len(value_types) != 1: if len(value_types) != 1:
raise ValueError("No common value type in %s" % numeric_values) raise ValueError(f"No common value type in {numeric_values}")
value_type = next(iter(value_types)) value_type = next(iter(value_types))
if value_type == NUMBER_TYPE: if value_type == NUMBER_TYPE:
...@@ -2586,7 +2586,7 @@ def get_numeric_sort_key_fn(numeric_values): ...@@ -2586,7 +2586,7 @@ def get_numeric_sort_key_fn(numeric_values):
valid_indexes.discard(tuple_index) valid_indexes.discard(tuple_index)
if not valid_indexes: if not valid_indexes:
raise ValueError("No common value in %s" % numeric_values) raise ValueError(f"No common value in {numeric_values}")
def _sort_key_fn(numeric_value): def _sort_key_fn(numeric_value):
value = _get_value_as_primitive_value(numeric_value) value = _get_value_as_primitive_value(numeric_value)
...@@ -2618,8 +2618,7 @@ def _consolidate_numeric_values(row_index_to_values, min_consolidation_fraction, ...@@ -2618,8 +2618,7 @@ def _consolidate_numeric_values(row_index_to_values, min_consolidation_fraction,
return {} return {}
max_count = max(type_counts.values()) max_count = max(type_counts.values())
if max_count < len(row_index_to_values) * min_consolidation_fraction: if max_count < len(row_index_to_values) * min_consolidation_fraction:
# logging.log_every_n(logging.INFO, 'Can\'t consolidate types: %s %s %d', 100, # logging.log_every_n(logging.INFO, f'Can\'t consolidate types: {debug_info} {row_index_to_values} {max_count}', 100)
# debug_info, row_index_to_values, max_count)
return {} return {}
valid_types = set() valid_types = set()
...@@ -2708,15 +2707,13 @@ def filter_invalid_unicode_from_table(table): ...@@ -2708,15 +2707,13 @@ def filter_invalid_unicode_from_table(table):
cell, is_invalid = filter_invalid_unicode(cell) cell, is_invalid = filter_invalid_unicode(cell)
if is_invalid: if is_invalid:
logging.warning( logging.warning(
"Scrub an invalid table body @ table_id: %s, row_index: %d, " "col_index: %d", f"Scrub an invalid table body @ table_id: {table.table_id}, row_index: {row_index}, "
table.table_id, f"col_index: {col_index}",
row_index,
col_index,
) )
for col_index, column in enumerate(table.columns): for col_index, column in enumerate(table.columns):
column, is_invalid = filter_invalid_unicode(column) column, is_invalid = filter_invalid_unicode(column)
if is_invalid: if is_invalid:
logging.warning("Scrub an invalid table header @ table_id: %s, col_index: %d", table.table_id, col_index) logging.warning(f"Scrub an invalid table header @ table_id: {table.table_id}, col_index: {col_index}")
def add_numeric_table_values(table, min_consolidation_fraction=0.7, debug_info=None): def add_numeric_table_values(table, min_consolidation_fraction=0.7, debug_info=None):
......
...@@ -48,14 +48,14 @@ def convert_transfo_xl_checkpoint_to_pytorch( ...@@ -48,14 +48,14 @@ def convert_transfo_xl_checkpoint_to_pytorch(
corpus = pickle.load(fp, encoding="latin1") corpus = pickle.load(fp, encoding="latin1")
# Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term) # Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term)
pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["pretrained_vocab_file"] pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["pretrained_vocab_file"]
print("Save vocabulary to {}".format(pytorch_vocab_dump_path)) print(f"Save vocabulary to {pytorch_vocab_dump_path}")
corpus_vocab_dict = corpus.vocab.__dict__ corpus_vocab_dict = corpus.vocab.__dict__
torch.save(corpus_vocab_dict, pytorch_vocab_dump_path) torch.save(corpus_vocab_dict, pytorch_vocab_dump_path)
corpus_dict_no_vocab = corpus.__dict__ corpus_dict_no_vocab = corpus.__dict__
corpus_dict_no_vocab.pop("vocab", None) corpus_dict_no_vocab.pop("vocab", None)
pytorch_dataset_dump_path = pytorch_dump_folder_path + "/" + CORPUS_NAME pytorch_dataset_dump_path = pytorch_dump_folder_path + "/" + CORPUS_NAME
print("Save dataset to {}".format(pytorch_dataset_dump_path)) print(f"Save dataset to {pytorch_dataset_dump_path}")
torch.save(corpus_dict_no_vocab, pytorch_dataset_dump_path) torch.save(corpus_dict_no_vocab, pytorch_dataset_dump_path)
if tf_checkpoint_path: if tf_checkpoint_path:
...@@ -63,22 +63,22 @@ def convert_transfo_xl_checkpoint_to_pytorch( ...@@ -63,22 +63,22 @@ def convert_transfo_xl_checkpoint_to_pytorch(
config_path = os.path.abspath(transfo_xl_config_file) config_path = os.path.abspath(transfo_xl_config_file)
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
print("Converting Transformer XL checkpoint from {} with config at {}".format(tf_path, config_path)) print(f"Converting Transformer XL checkpoint from {tf_path} with config at {config_path}.")
# Initialise PyTorch model # Initialise PyTorch model
if transfo_xl_config_file == "": if transfo_xl_config_file == "":
config = TransfoXLConfig() config = TransfoXLConfig()
else: else:
config = TransfoXLConfig.from_json_file(transfo_xl_config_file) config = TransfoXLConfig.from_json_file(transfo_xl_config_file)
print("Building PyTorch model from configuration: {}".format(str(config))) print(f"Building PyTorch model from configuration: {config}")
model = TransfoXLLMHeadModel(config) model = TransfoXLLMHeadModel(config)
model = load_tf_weights_in_transfo_xl(model, config, tf_path) model = load_tf_weights_in_transfo_xl(model, config, tf_path)
# Save pytorch-model # Save pytorch-model
pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME) pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME) pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path))) print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}")
torch.save(model.state_dict(), pytorch_weights_dump_path) torch.save(model.state_dict(), pytorch_weights_dump_path)
print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path))) print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}")
with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
f.write(config.to_json_string()) f.write(config.to_json_string())
......
...@@ -368,7 +368,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer): ...@@ -368,7 +368,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer):
r_idx - l_idx, r_idx - l_idx,
d_emb_i, d_emb_i,
init_std, init_std,
name="emb_layers_._{}".format(i), name=f"emb_layers_._{i}",
) )
) )
...@@ -380,7 +380,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer): ...@@ -380,7 +380,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer):
shape=(d_emb_i, self.d_proj), shape=(d_emb_i, self.d_proj),
initializer=get_initializer(self.init_std), initializer=get_initializer(self.init_std),
trainable=True, trainable=True,
name="emb_projs_._{}".format(i), name=f"emb_projs_._{i}",
) )
) )
...@@ -467,7 +467,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): ...@@ -467,7 +467,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
layer_norm_epsilon=config.layer_norm_epsilon, layer_norm_epsilon=config.layer_norm_epsilon,
init_std=config.init_std, init_std=config.init_std,
output_attentions=self.output_attentions, output_attentions=self.output_attentions,
name="layers_._{}".format(i), name=f"layers_._{i}",
) )
) )
else: # learnable embeddings and absolute embeddings else: # learnable embeddings and absolute embeddings
......
...@@ -59,25 +59,22 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): ...@@ -59,25 +59,22 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
shape=(self.d_embed, self.d_proj), shape=(self.d_embed, self.d_proj),
initializer="zeros", initializer="zeros",
trainable=True, trainable=True,
name="out_projs_._{}".format(i), name=f"out_projs_._{i}",
) )
self.out_projs.append(weight) self.out_projs.append(weight)
else: else:
self.out_projs.append(None) self.out_projs.append(None)
weight = self.add_weight( weight = self.add_weight(
shape=( shape=(self.vocab_size, self.d_embed),
self.vocab_size,
self.d_embed,
),
initializer="zeros", initializer="zeros",
trainable=True, trainable=True,
name="out_layers_._{}_._weight".format(i), name=f"out_layers_._{i}_._weight",
) )
bias = self.add_weight( bias = self.add_weight(
shape=(self.vocab_size,), shape=(self.vocab_size,),
initializer="zeros", initializer="zeros",
trainable=True, trainable=True,
name="out_layers_._{}_._bias".format(i), name=f"out_layers_._{i}_._bias",
) )
self.out_layers.append((weight, bias)) self.out_layers.append((weight, bias))
else: else:
...@@ -86,23 +83,20 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): ...@@ -86,23 +83,20 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
d_emb_i = self.d_embed // (self.div_val ** i) d_emb_i = self.d_embed // (self.div_val ** i)
weight = self.add_weight( weight = self.add_weight(
shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name="out_projs_._{}".format(i) shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name=f"out_projs_._{i}"
) )
self.out_projs.append(weight) self.out_projs.append(weight)
weight = self.add_weight( weight = self.add_weight(
shape=( shape=(r_idx - l_idx, d_emb_i),
r_idx - l_idx,
d_emb_i,
),
initializer="zeros", initializer="zeros",
trainable=True, trainable=True,
name="out_layers_._{}_._weight".format(i), name=f"out_layers_._{i}_._weight",
) )
bias = self.add_weight( bias = self.add_weight(
shape=(r_idx - l_idx,), shape=(r_idx - l_idx,),
initializer="zeros", initializer="zeros",
trainable=True, trainable=True,
name="out_layers_._{}_._bias".format(i), name=f"out_layers_._{i}_._bias",
) )
self.out_layers.append((weight, bias)) self.out_layers.append((weight, bias))
super().build(input_shape) super().build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment