"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "186c0775132fecdc571f3996f75c7e1377d5fb9b"
Unverified Commit acc3bd9d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enforce string-formatting with f-strings (#10980)



* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre Debut <lysandre@huggingface.co>
parent d0b3797a
...@@ -217,7 +217,7 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast): ...@@ -217,7 +217,7 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
out_vocab_file = os.path.join( out_vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
......
...@@ -70,12 +70,12 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path): ...@@ -70,12 +70,12 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path):
) )
raise raise
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model # Load weights from TF model
init_vars = tf.train.list_variables(tf_path) init_vars = tf.train.list_variables(tf_path)
tf_data = {} tf_data = {}
for name, shape in init_vars: for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape)) logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name) array = tf.train.load_variable(tf_path, name)
tf_data[name] = array tf_data[name] = array
...@@ -285,8 +285,8 @@ class ConvBertSelfAttention(nn.Module): ...@@ -285,8 +285,8 @@ class ConvBertSelfAttention(nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
new_num_attention_heads = config.num_attention_heads // config.head_ratio new_num_attention_heads = config.num_attention_heads // config.head_ratio
......
...@@ -147,8 +147,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer): ...@@ -147,8 +147,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer):
if config.hidden_size % config.num_attention_heads != 0: if config.hidden_size % config.num_attention_heads != 0:
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
new_num_attention_heads = int(config.num_attention_heads / config.head_ratio) new_num_attention_heads = int(config.num_attention_heads / config.head_ratio)
...@@ -442,7 +442,7 @@ class TFConvBertEncoder(tf.keras.layers.Layer): ...@@ -442,7 +442,7 @@ class TFConvBertEncoder(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.layer = [TFConvBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] self.layer = [TFConvBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call( def call(
self, self,
......
...@@ -234,7 +234,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): ...@@ -234,7 +234,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
config.resid_pdrop, config.resid_pdrop,
config.layer_norm_epsilon, config.layer_norm_epsilon,
self.output_attentions, self.output_attentions,
name="h_._{}".format(i), name=f"h_._{i}",
) )
for i in range(config.n_layer) for i in range(config.n_layer)
] ]
......
...@@ -226,7 +226,7 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -226,7 +226,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return return
vocab_file = os.path.join( vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
...@@ -244,8 +244,8 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -244,8 +244,8 @@ class CTRLTokenizer(PreTrainedTokenizer):
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
if index != token_index: if index != token_index:
logger.warning( logger.warning(
"Saving vocabulary to {}: BPE merge indices are not consecutive." f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!".format(merge_file) " Please check that the tokenizer is not corrupted!"
) )
index = token_index index = token_index
writer.write(" ".join(bpe_tokens) + "\n") writer.write(" ".join(bpe_tokens) + "\n")
......
...@@ -492,8 +492,8 @@ class DisentangledSelfAttention(torch.nn.Module): ...@@ -492,8 +492,8 @@ class DisentangledSelfAttention(torch.nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0: if config.hidden_size % config.num_attention_heads != 0:
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(config.hidden_size / config.num_attention_heads) self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
......
...@@ -549,8 +549,8 @@ class DebertaTokenizer(PreTrainedTokenizer): ...@@ -549,8 +549,8 @@ class DebertaTokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file): if not os.path.isfile(vocab_file):
raise ValueError( raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) "model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
) )
self.do_lower_case = do_lower_case self.do_lower_case = do_lower_case
self.gpt2_tokenizer = GPT2Tokenizer(vocab_file) self.gpt2_tokenizer = GPT2Tokenizer(vocab_file)
......
...@@ -561,8 +561,8 @@ class DisentangledSelfAttention(torch.nn.Module): ...@@ -561,8 +561,8 @@ class DisentangledSelfAttention(torch.nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0: if config.hidden_size % config.num_attention_heads != 0:
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
_attention_head_size = config.hidden_size // config.num_attention_heads _attention_head_size = config.hidden_size // config.num_attention_heads
......
...@@ -107,8 +107,8 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): ...@@ -107,8 +107,8 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
if not os.path.isfile(vocab_file): if not os.path.isfile(vocab_file):
raise ValueError( raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) "model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
) )
self.do_lower_case = do_lower_case self.do_lower_case = do_lower_case
self.split_by_punct = split_by_punct self.split_by_punct = split_by_punct
...@@ -481,11 +481,11 @@ def convert_to_unicode(text): ...@@ -481,11 +481,11 @@ def convert_to_unicode(text):
elif isinstance(text, bytes): elif isinstance(text, bytes):
return text.decode("utf-8", "ignore") return text.decode("utf-8", "ignore")
else: else:
raise ValueError("Unsupported string type: %s" % (type(text))) raise ValueError(f"Unsupported string type: {type(text)}")
elif six.PY2: elif six.PY2:
if isinstance(text, str): if isinstance(text, str):
return text.decode("utf-8", "ignore") return text.decode("utf-8", "ignore")
else: else:
raise ValueError("Unsupported string type: %s" % (type(text))) raise ValueError(f"Unsupported string type: {type(text)}")
else: else:
raise ValueError("Not running on Python2 or Python 3?") raise ValueError("Not running on Python2 or Python 3?")
...@@ -159,7 +159,7 @@ class MultiHeadSelfAttention(nn.Module): ...@@ -159,7 +159,7 @@ class MultiHeadSelfAttention(nn.Module):
""" """
bs, q_length, dim = query.size() bs, q_length, dim = query.size()
k_length = key.size(1) k_length = key.size(1)
# assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
# assert key.size() == value.size() # assert key.size() == value.size()
dim_per_head = self.dim // self.n_heads dim_per_head = self.dim // self.n_heads
...@@ -208,9 +208,7 @@ class FFN(nn.Module): ...@@ -208,9 +208,7 @@ class FFN(nn.Module):
self.seq_len_dim = 1 self.seq_len_dim = 1
self.lin1 = nn.Linear(in_features=config.dim, out_features=config.hidden_dim) self.lin1 = nn.Linear(in_features=config.dim, out_features=config.hidden_dim)
self.lin2 = nn.Linear(in_features=config.hidden_dim, out_features=config.dim) self.lin2 = nn.Linear(in_features=config.hidden_dim, out_features=config.dim)
assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format( assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']"
config.activation
)
self.activation = gelu if config.activation == "gelu" else nn.ReLU() self.activation = gelu if config.activation == "gelu" else nn.ReLU()
def forward(self, input): def forward(self, input):
......
...@@ -168,7 +168,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer): ...@@ -168,7 +168,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
""" """
bs, q_length, dim = shape_list(query) bs, q_length, dim = shape_list(query)
k_length = shape_list(key)[1] k_length = shape_list(key)[1]
# assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
# assert key.size() == value.size() # assert key.size() == value.size()
dim_per_head = tf.math.divide(self.dim, self.n_heads) dim_per_head = tf.math.divide(self.dim, self.n_heads)
dim_per_head = tf.cast(dim_per_head, dtype=tf.int32) dim_per_head = tf.cast(dim_per_head, dtype=tf.int32)
...@@ -221,9 +221,7 @@ class TFFFN(tf.keras.layers.Layer): ...@@ -221,9 +221,7 @@ class TFFFN(tf.keras.layers.Layer):
self.lin2 = tf.keras.layers.Dense( self.lin2 = tf.keras.layers.Dense(
config.dim, kernel_initializer=get_initializer(config.initializer_range), name="lin2" config.dim, kernel_initializer=get_initializer(config.initializer_range), name="lin2"
) )
assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format( assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']"
config.activation
)
self.activation = get_tf_activation(config.activation) self.activation = get_tf_activation(config.activation)
def call(self, input, training=False): def call(self, input, training=False):
...@@ -290,7 +288,7 @@ class TFTransformer(tf.keras.layers.Layer): ...@@ -290,7 +288,7 @@ class TFTransformer(tf.keras.layers.Layer):
self.output_hidden_states = config.output_hidden_states self.output_hidden_states = config.output_hidden_states
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.layer = [TFTransformerBlock(config, name="layer_._{}".format(i)) for i in range(config.n_layers)] self.layer = [TFTransformerBlock(config, name=f"layer_._{i}") for i in range(config.n_layers)]
def call(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict, training=False): def call(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict, training=False):
# docstyle-ignore # docstyle-ignore
......
...@@ -28,7 +28,7 @@ CheckpointState = collections.namedtuple( ...@@ -28,7 +28,7 @@ CheckpointState = collections.namedtuple(
def load_states_from_checkpoint(model_file: str) -> CheckpointState: def load_states_from_checkpoint(model_file: str) -> CheckpointState:
print("Reading saved model from %s", model_file) print(f"Reading saved model from {model_file}")
state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu")) state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
return CheckpointState(**state_dict) return CheckpointState(**state_dict)
...@@ -55,7 +55,7 @@ class DPRState: ...@@ -55,7 +55,7 @@ class DPRState:
class DPRContextEncoderState(DPRState): class DPRContextEncoderState(DPRState):
def load_dpr_model(self): def load_dpr_model(self):
model = DPRContextEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) model = DPRContextEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
print("Loading DPR biencoder from {}".format(self.src_file)) print(f"Loading DPR biencoder from {self.src_file}")
saved_state = load_states_from_checkpoint(self.src_file) saved_state = load_states_from_checkpoint(self.src_file)
encoder, prefix = model.ctx_encoder, "ctx_model." encoder, prefix = model.ctx_encoder, "ctx_model."
# Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
...@@ -73,7 +73,7 @@ class DPRContextEncoderState(DPRState): ...@@ -73,7 +73,7 @@ class DPRContextEncoderState(DPRState):
class DPRQuestionEncoderState(DPRState): class DPRQuestionEncoderState(DPRState):
def load_dpr_model(self): def load_dpr_model(self):
model = DPRQuestionEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) model = DPRQuestionEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
print("Loading DPR biencoder from {}".format(self.src_file)) print(f"Loading DPR biencoder from {self.src_file}")
saved_state = load_states_from_checkpoint(self.src_file) saved_state = load_states_from_checkpoint(self.src_file)
encoder, prefix = model.question_encoder, "question_model." encoder, prefix = model.question_encoder, "question_model."
# Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
...@@ -91,7 +91,7 @@ class DPRQuestionEncoderState(DPRState): ...@@ -91,7 +91,7 @@ class DPRQuestionEncoderState(DPRState):
class DPRReaderState(DPRState): class DPRReaderState(DPRState):
def load_dpr_model(self): def load_dpr_model(self):
model = DPRReader(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) model = DPRReader(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0]))
print("Loading DPR reader from {}".format(self.src_file)) print(f"Loading DPR reader from {self.src_file}")
saved_state = load_states_from_checkpoint(self.src_file) saved_state = load_states_from_checkpoint(self.src_file)
# Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3
state_dict = { state_dict = {
......
...@@ -239,7 +239,7 @@ class CustomDPRReaderTokenizerMixin: ...@@ -239,7 +239,7 @@ class CustomDPRReaderTokenizerMixin:
questions = questions if not isinstance(questions, str) else [questions] * n_passages questions = questions if not isinstance(questions, str) else [questions] * n_passages
assert len(titles) == len( assert len(titles) == len(
texts texts
), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts)) ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
encoded_inputs = { encoded_inputs = {
...@@ -350,9 +350,9 @@ class CustomDPRReaderTokenizerMixin: ...@@ -350,9 +350,9 @@ class CustomDPRReaderTokenizerMixin:
scores = sorted(scores, key=lambda x: x[1], reverse=True) scores = sorted(scores, key=lambda x: x[1], reverse=True)
chosen_span_intervals = [] chosen_span_intervals = []
for (start_index, end_index), score in scores: for (start_index, end_index), score in scores:
assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index) assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]"
length = end_index - start_index + 1 length = end_index - start_index + 1
assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length) assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}"
if any( if any(
[ [
start_index <= prev_start_index <= prev_end_index <= end_index start_index <= prev_start_index <= prev_end_index <= end_index
......
...@@ -240,7 +240,7 @@ class CustomDPRReaderTokenizerMixin: ...@@ -240,7 +240,7 @@ class CustomDPRReaderTokenizerMixin:
questions = questions if not isinstance(questions, str) else [questions] * n_passages questions = questions if not isinstance(questions, str) else [questions] * n_passages
assert len(titles) == len( assert len(titles) == len(
texts texts
), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts)) ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
encoded_inputs = { encoded_inputs = {
...@@ -351,9 +351,9 @@ class CustomDPRReaderTokenizerMixin: ...@@ -351,9 +351,9 @@ class CustomDPRReaderTokenizerMixin:
scores = sorted(scores, key=lambda x: x[1], reverse=True) scores = sorted(scores, key=lambda x: x[1], reverse=True)
chosen_span_intervals = [] chosen_span_intervals = []
for (start_index, end_index), score in scores: for (start_index, end_index), score in scores:
assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index) assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]"
length = end_index - start_index + 1 length = end_index - start_index + 1
assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length) assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}"
if any( if any(
[ [
start_index <= prev_start_index <= prev_end_index <= end_index start_index <= prev_start_index <= prev_end_index <= end_index
......
...@@ -29,7 +29,7 @@ logging.set_verbosity_info() ...@@ -29,7 +29,7 @@ logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, discriminator_or_generator): def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, discriminator_or_generator):
# Initialise PyTorch model # Initialise PyTorch model
config = ElectraConfig.from_json_file(config_file) config = ElectraConfig.from_json_file(config_file)
print("Building PyTorch model from configuration: {}".format(str(config))) print(f"Building PyTorch model from configuration: {config}")
if discriminator_or_generator == "discriminator": if discriminator_or_generator == "discriminator":
model = ElectraForPreTraining(config) model = ElectraForPreTraining(config)
...@@ -44,7 +44,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du ...@@ -44,7 +44,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du
) )
# Save pytorch-model # Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path)) print(f"Save PyTorch model to {pytorch_dump_path}")
torch.save(model.state_dict(), pytorch_dump_path) torch.save(model.state_dict(), pytorch_dump_path)
......
...@@ -83,13 +83,13 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ ...@@ -83,13 +83,13 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
) )
raise raise
tf_path = os.path.abspath(tf_checkpoint_path) tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model # Load weights from TF model
init_vars = tf.train.list_variables(tf_path) init_vars = tf.train.list_variables(tf_path)
names = [] names = []
arrays = [] arrays = []
for name, shape in init_vars: for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape)) logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name) array = tf.train.load_variable(tf_path, name)
names.append(name) names.append(name)
arrays.append(array) arrays.append(array)
...@@ -112,7 +112,7 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ ...@@ -112,7 +112,7 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
# adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
# which are not required for using pretrained model # which are not required for using pretrained model
if any(n in ["global_step", "temperature"] for n in name): if any(n in ["global_step", "temperature"] for n in name):
logger.info("Skipping {}".format(original_name)) logger.info(f"Skipping {original_name}")
continue continue
pointer = model pointer = model
for m_name in name: for m_name in name:
...@@ -144,10 +144,10 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ ...@@ -144,10 +144,10 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise
print("Initialize PyTorch weight {}".format(name), original_name) print(f"Initialize PyTorch weight {name}", original_name)
pointer.data = torch.from_numpy(array) pointer.data = torch.from_numpy(array)
except AttributeError as e: except AttributeError as e:
print("Skipping {}".format(original_name), name, e) print(f"Skipping {original_name}", name, e)
continue continue
return model return model
...@@ -206,8 +206,8 @@ class ElectraSelfAttention(nn.Module): ...@@ -206,8 +206,8 @@ class ElectraSelfAttention(nn.Module):
super().__init__() super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError( raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention " f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads) f"heads ({config.num_attention_heads})"
) )
self.num_attention_heads = config.num_attention_heads self.num_attention_heads = config.num_attention_heads
......
...@@ -285,7 +285,7 @@ class TFElectraEncoder(tf.keras.layers.Layer): ...@@ -285,7 +285,7 @@ class TFElectraEncoder(tf.keras.layers.Layer):
def __init__(self, config: ElectraConfig, **kwargs): def __init__(self, config: ElectraConfig, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.layer = [TFElectraLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] self.layer = [TFElectraLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]
def call( def call(
self, self,
......
...@@ -159,9 +159,7 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -159,9 +159,7 @@ class EncoderDecoderModel(PreTrainedModel):
if config is None: if config is None:
config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config) config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config)
else: else:
assert isinstance(config, self.config_class), "config: {} has to be of type {}".format( assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}"
config, self.config_class
)
# initialize with config # initialize with config
super().__init__(config) super().__init__(config)
......
...@@ -337,7 +337,7 @@ class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer): ...@@ -337,7 +337,7 @@ class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer):
else: else:
klen = shape_list(kv)[1] klen = shape_list(kv)[1]
# assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
dim_per_head = self.dim // self.n_heads dim_per_head = self.dim // self.n_heads
mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen) mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen)
...@@ -450,21 +450,19 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): ...@@ -450,21 +450,19 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
for i in range(self.n_layers): for i in range(self.n_layers):
self.attentions.append( self.attentions.append(
TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name="attentions_._{}".format(i)) TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name=f"attentions_._{i}")
) )
self.layer_norm1.append( self.layer_norm1.append(
tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm1_._{}".format(i)) tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm1_._{i}")
) )
# if self.is_decoder: # if self.is_decoder:
# self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) # self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps))
# self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) # self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout))
self.ffns.append( self.ffns.append(
TFFlaubertTransformerFFN( TFFlaubertTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name=f"ffns_._{i}")
self.dim, self.hidden_dim, self.dim, config=config, name="ffns_._{}".format(i)
)
) )
self.layer_norm2.append( self.layer_norm2.append(
tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm2_._{}".format(i)) tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm2_._{i}")
) )
def build(self, input_shape): def build(self, input_shape):
......
...@@ -71,7 +71,7 @@ def convert_to_unicode(text): ...@@ -71,7 +71,7 @@ def convert_to_unicode(text):
elif isinstance(s, six.text_type): elif isinstance(s, six.text_type):
return s return s
else: else:
raise TypeError("not expecting type '%s'" % type(s)) raise TypeError(f"not expecting type '{type(s)}'")
return six_ensure_text(text, encoding="utf-8", errors="ignore") return six_ensure_text(text, encoding="utf-8", errors="ignore")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment