Unverified Commit 5e8c8eb5 authored by Aaron Gokaslan's avatar Aaron Gokaslan Committed by GitHub
Browse files

Apply ruff flake8-comprehensions (#21694)

parent df06fb1f
......@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
words = re.findall(r"\S+\n?", text)
for token in words:
split_tokens.extend([t for t in self.bpe(token).split(" ")])
split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def _convert_token_to_id(self, token: str) -> int:
......
......@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
if shard_model:
file_names = os.listdir(bloom_checkpoint_path)
file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)))
file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))
index_dict = {"weight_map": {}, "metadata": {}}
total_size = 0
......@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
model = BloomModel(config)
file_names = os.listdir(bloom_checkpoint_path)
file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)))
file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))
missing_keys = None
for i, file in enumerate(file_names):
......
......@@ -85,7 +85,7 @@ def duplicate_interleave(m):
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
def apply_rotary_pos_emb(x, sincos, offset=0):
sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos)
sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return (x * cos) + (rotate_every_two(x) * sin)
......
......@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
return [x for x in runs]
return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
......
......@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
self.out_feature_channels = out_feature_channels
# Add layer norms to hidden states of out_features
hidden_states_norms = dict()
hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = ConvNextLayerNorm(num_channels, data_format="channels_first")
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
......
......@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
words = re.findall(r"\S+\n?", text)
for token in words:
split_tokens.extend([t for t in self.bpe(token).split(" ")])
split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def _convert_token_to_id(self, token):
......
......@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
self.relative_position_bias = None
# stochastic depth decay rule
dpr = [x for x in tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers)]
dpr = list(tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers))
self.layer = [
TFData2VecVisionLayer(
config,
......
......@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
return [x for x in runs]
return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
......
......@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
return [x for x in runs]
return list(runs)
# TODO - (Amy) make compatible with other frameworks
......
......@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
self.out_feature_channels[stage] = num_features[i]
# Add layer norms to hidden states of out_features
hidden_states_norms = dict()
hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = nn.LayerNorm(num_channels)
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
......
......@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
if added_vocab is None:
added_vocab = self.tokenizer.get_added_vocab()
output = dict()
output = {}
while tokens:
start_token = re.search(r"<s_(.*?)>", tokens, re.IGNORECASE)
......
......@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
if vocab_file is not None:
self.vocab = self.load_vocab(filepath=vocab_file)
else:
self.vocab = dict((self.sp_model.id_to_piece(id), id) for id in range(self.sp_model.get_piece_size()))
self.reverse_vocab = dict((v, k) for k, v in self.vocab.items())
self.vocab = {self.sp_model.id_to_piece(id): id for id in range(self.sp_model.get_piece_size())}
self.reverse_vocab = {v: k for k, v in self.vocab.items()}
def get_offset_mapping(self, text):
if text is None:
......@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0]
if token_ids_1 is not None:
return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
......
......@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
"""
if len(samples) == 0:
return torch.Tensor()
if len(set(x.dim() for x in samples)) != 1:
if len({x.dim() for x in samples}) != 1:
raise RuntimeError(f"Samples has varying dimensions: {[x.dim() for x in samples]}")
(device,) = tuple(set(x.device for x in samples)) # assumes all on same device
(device,) = tuple({x.device for x in samples}) # assumes all on same device
max_shape = [max(lst) for lst in zip(*[x.shape for x in samples])]
result = torch.empty(len(samples), *max_shape, dtype=samples[0].dtype, device=device)
result.fill_(pad_v)
......
......@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
# Base cases. Either start/end are empty and we're done, or the final,
# one-dimensional tensor can be simply sliced
if len(start) == 0:
return [tuple()]
return [()]
elif len(start) == 1:
return [(slice(start[0], end[0] + 1),)]
......
......@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
self.sm = sacremoses
# cache of sm.MosesPunctNormalizer instance
self.cache_moses_punct_normalizer = dict()
self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
self.cache_moses_tokenizer = dict()
self.lang_with_custom_tokenizer = set(["zh", "th", "ja"])
self.cache_moses_tokenizer = {}
self.lang_with_custom_tokenizer = {"zh", "th", "ja"}
self.lang2id = lang2id
self.id2lang = id2lang
if lang2id is not None and id2lang is not None:
......@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
split_tokens.extend([t for t in self.bpe(token).split(" ")])
split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
......
......@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
self.do_lower_case = do_lower_case
# cache of sm.MosesPunctNormalizer instance
self.cache_moses_punct_normalizer = dict()
self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
self.cache_moses_tokenizer = dict()
self.cache_moses_detokenizer = dict()
self.cache_moses_tokenizer = {}
self.cache_moses_detokenizer = {}
if langs and len(langs) == 2:
self.src_lang, self.tgt_lang = langs
......@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
split_tokens.extend([t for t in self.bpe(token).split(" ")])
split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
......
......@@ -78,7 +78,7 @@ def duplicate_interleave(m):
def apply_rotary_pos_emb(x, sincos, offset=0):
sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos)
sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return (x * cos) + (rotate_every_two(x) * sin)
......
......@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
self.sm = sacremoses
# cache of sm.MosesPunctNormalizer instance
self.cache_moses_punct_normalizer = dict()
self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
self.cache_moses_tokenizer = dict()
self.lang_with_custom_tokenizer = set(["zh", "th", "ja"])
self.cache_moses_tokenizer = {}
self.lang_with_custom_tokenizer = {"zh", "th", "ja"}
# True for current supported model (v1.2.0), False for XLM-17 & 100
self.do_lowercase_and_remove_accent = do_lowercase_and_remove_accent
self.lang2id = lang2id
......@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in pre_tokens:
if token:
split_tokens.extend([t for t in self.bpe(token).split(" ")])
split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
......
......@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
hop_length = int(config.hop_fraction[-level - 1] * prior.n_ctx)
alignment_head, alignment_layer = config.prior_alignment_head[0], config.prior_alignment_layer[0]
attn_layers = set([alignment_layer])
attn_layers = {alignment_layer}
alignment_hops = {}
indices_hops = {}
for start in tqdm(get_starts(total_length, n_ctx, hop_length), desc="Computing lyric to music alignment "):
......@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
used_curr = (_codebook_elem >= self.threshold).sum()
usage = torch.sum(usage)
dk = torch.norm(self.codebook - old_codebook) / np.sqrt(np.prod(old_codebook.shape))
return dict(entropy=entropy, used_curr=used_curr, usage=usage, dk=dk)
return {"entropy": entropy, "used_curr": used_curr, "usage": usage, "dk": dk}
def preprocess(self, hidden_states):
hidden_states = hidden_states.permute(0, 2, 1).contiguous()
......@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
loss = self.encoder_loss_fraction * encoder_loss * self.nb_relevant_lyric_tokens / self.total_loss_dims
loss += next_token_prediction_loss * self.next_token_prediction_loss_dims / self.total_loss_dims
metrics = dict(
bpd=next_token_prediction_loss.clone().detach(),
encoder_loss=encoder_loss.clone().detach(),
next_token_prediction_loss=next_token_prediction_loss.clone().detach(),
)
metrics = {
"bpd": next_token_prediction_loss.clone().detach(),
"encoder_loss": encoder_loss.clone().detach(),
"next_token_prediction_loss": next_token_prediction_loss.clone().detach(),
}
if get_preds:
metrics["preds"] = preds.clone().detach()
if get_attn_weights:
......@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
# total length of the signal, might be bit different from the actual generated length
self.total_length = total_length
for level in sample_levels:
sampling_kwargs = dict(
temp=0.99 if level == len(self.priors) - 1 else sampling_temperature,
chunk_size=chunk_size,
sample_tokens=sample_tokens,
)
sampling_kwargs = {
"temp": 0.99 if level == len(self.priors) - 1 else sampling_temperature,
"chunk_size": chunk_size,
"sample_tokens": sample_tokens,
}
# Set correct total_length, hop_length, labels and sampling_kwargs for level
total_token_to_sample = total_length // self.priors[level].raw_to_tokens
......
......@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
"""
# only lyrics are not tokenized, but character based is easily handled
return [character for character in lyrics]
return list(lyrics)
def tokenize(self, artist, genre, lyrics, **kwargs):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment