Unverified Commit 5e8c8eb5 authored by Aaron Gokaslan's avatar Aaron Gokaslan Committed by GitHub
Browse files

Apply ruff flake8-comprehensions (#21694)

parent df06fb1f
......@@ -362,9 +362,7 @@ class TFSegformerModelTest(TFModelTesterMixin, unittest.TestCase):
_, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(
for_segmentation=for_segmentation
)
added_label = prepared_for_class[
sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
]
added_label = prepared_for_class[sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]]
loss_size = tf.size(added_label)
# Test that model correctly compute the loss with kwargs
......
......@@ -372,7 +372,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
)
self.assertIn("attention_mask", processed_pad)
self.assertListEqual(
list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length))
list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length]
)
self.assertListEqual(
processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs]
......
......@@ -387,7 +387,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_get_sentinel_token_ids(self):
tokenizer = T5Tokenizer(SAMPLE_VOCAB, extra_ids=10)
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)]))
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
def test_get_sentinel_tokens_for_fasttokenizer(self):
tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10)
......@@ -398,4 +398,4 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_get_sentinel_token_ids_for_fasttokenizer(self):
tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10)
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)]))
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
......@@ -347,7 +347,7 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
# Retrieve the cutoffs and copy them
copied_cutoffs = copy.copy(model_embed.cutoffs)
test_layers = [x for x in range(config.div_val)]
test_layers = list(range(config.div_val))
for layer in test_layers:
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size + 10, layer)
......
......@@ -581,7 +581,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
audio = prepare_audio()
video_inputs = image_processor(video, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt").to(torch_device)
inputs = dict()
inputs = {}
inputs.update(video_inputs)
inputs.update(audio_inputs)
......@@ -606,7 +606,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
video_mixed_inputs = image_processor(video_mixed, is_mixed=True, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt", mask_audio=True).to(torch_device)
labels = torch.tensor([[0.0]], device=torch_device)
inputs = dict()
inputs = {}
inputs.update(video_inputs)
inputs.update(video_mixed_inputs)
inputs.update(audio_inputs)
......
......@@ -333,7 +333,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
tf_main_layer_classes = set(
tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
......@@ -345,7 +345,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
)
}
num_patches = int((config.image_size // config.patch_size) ** 2)
noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches))
......
......@@ -231,7 +231,7 @@ class Wav2Vec2TokenizerTest(unittest.TestCase):
tokenizer_files = tokenizer.save_pretrained(tmpdirname2)
self.assertSequenceEqual(
sorted(tuple(VOCAB_FILES_NAMES.values()) + ("special_tokens_map.json", "added_tokens.json")),
sorted(tuple(x.split(os.path.sep)[-1] for x in tokenizer_files)),
sorted(x.split(os.path.sep)[-1] for x in tokenizer_files),
)
# Checks everything loads correctly in the same way
......@@ -456,7 +456,7 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
def test_special_characters_in_vocab(self):
sent = "ʈʰ æ æ̃ ˧ kʰ"
vocab_dict = {k: v for v, k in enumerate({phoneme for phoneme in sent.split()})}
vocab_dict = {k: v for v, k in enumerate(set(sent.split()))}
vocab_file = os.path.join(self.tmpdirname, "vocab_special.json")
with open(vocab_file, "w") as f:
......
......@@ -215,7 +215,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
with get_context(pool_context).Pool() as pool:
decoded_processor = processor.batch_decode(logits, pool)
logits_list = [array for array in logits]
logits_list = list(logits)
with get_context("fork").Pool() as p:
decoded_beams = decoder.decode_beams_batch(p, logits_list)
......@@ -252,7 +252,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor = decoded_processor_out.text
logits_list = [array for array in logits]
logits_list = list(logits)
with get_context("fork").Pool() as pool:
decoded_decoder_out = decoder.decode_beams_batch(
......@@ -299,7 +299,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor = decoded_processor_out.text
logits_list = [array for array in logits]
logits_list = list(logits)
decoder.reset_params(
alpha=alpha,
beta=beta,
......
......@@ -400,7 +400,7 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
added_label = prepared_for_class[
sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
]
expected_loss_size = added_label.shape.as_list()[:1]
......
......@@ -606,7 +606,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
......@@ -624,7 +624,7 @@ class PipelineUtilsTest(unittest.TestCase):
with self.assertRaises(TypeError):
len(dataset)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
......@@ -638,7 +638,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
@require_torch
......@@ -654,7 +654,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(
nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
)
......@@ -671,7 +671,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [0, 1, 0, 1, 2])
......@@ -692,7 +692,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dataset, pack, {})
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(
outputs,
[
......@@ -719,7 +719,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]])
# is_false Across batch
......@@ -730,7 +730,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = [item for item in dataset]
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
@slow
......
......@@ -281,7 +281,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
def run_test_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
targets = list(sorted(vocab.keys()))[:2]
targets = sorted(vocab.keys())[:2]
# Pipeline argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
......@@ -293,8 +293,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual(set(el["token"] for el in outputs), target_ids)
self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
self.assertEqual({el["token"] for el in outputs}, target_ids)
self.assertEqual({el["token_str"] for el in outputs}, set(targets))
# Call argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
......@@ -307,8 +307,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual(set(el["token"] for el in outputs), target_ids)
self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
self.assertEqual({el["token"] for el in outputs}, target_ids)
self.assertEqual({el["token_str"] for el in outputs}, set(targets))
# Score equivalence
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
......@@ -354,7 +354,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
# top_k=2, ntargets=3
targets = list(sorted(vocab.keys()))[:3]
targets = sorted(vocab.keys())[:3]
outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets)
# If we use the most probably targets, and filter differently, we should still
......@@ -369,7 +369,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
vocab = tokenizer.get_vocab()
# String duplicates + id duplicates
targets = list(sorted(vocab.keys()))[:3]
targets = sorted(vocab.keys())[:3]
targets = [targets[0], targets[1], targets[0], targets[2], targets[1]]
outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10)
......
......@@ -63,7 +63,7 @@ class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification"
small_feature_extractor = VideoMAEFeatureExtractor(
size=dict(shortest_edge=10), crop_size=dict(height=10, width=10)
size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10}
)
video_classifier = pipeline(
"video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4
......
......@@ -56,9 +56,9 @@ class CheckDummiesTester(unittest.TestCase):
"pytorch_utils.py",
"models/bert/configuration_bert.py",
]
expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps)
expected_deps = {os.path.join(transformers_path, f) for f in expected_deps}
repo = Repo(git_repo_path)
with checkout_commit(repo, GIT_TEST_SHA):
deps = get_module_dependencies(bert_module)
deps = set(os.path.expanduser(f) for f in deps)
deps = {os.path.expanduser(f) for f in deps}
self.assertEqual(deps, expected_deps)
......@@ -362,12 +362,12 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
elif data_args.task_name is None and not is_regression:
......
......@@ -1643,7 +1643,7 @@ class ModelTesterMixin:
params = dict(model_reloaded.named_parameters())
params.update(dict(model_reloaded.named_buffers()))
# param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys())
param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys())
param_names = {k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys()}
missing_keys = set(infos["missing_keys"])
......@@ -1770,8 +1770,8 @@ class ModelTesterMixin:
def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
tf_keys = set([k for k, v in tf_outputs.items() if v is not None])
pt_keys = set([k for k, v in pt_outputs.items() if v is not None])
tf_keys = {k for k, v in tf_outputs.items() if v is not None}
pt_keys = {k for k, v in pt_outputs.items() if v is not None}
key_differences = tf_keys.symmetric_difference(pt_keys)
......@@ -2995,7 +2995,7 @@ class ModelUtilsTest(TestCasePlus):
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".bin"))
shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".bin")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
......
......@@ -1099,7 +1099,7 @@ class FlaxModelTesterMixin:
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".msgpack"))
shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".msgpack")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
......
......@@ -398,7 +398,7 @@ class TFModelTesterMixin:
def test_keras_save_load(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
tf_main_layer_classes = set(
tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
......@@ -410,7 +410,7 @@ class TFModelTesterMixin:
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
)
}
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
......@@ -498,8 +498,8 @@ class TFModelTesterMixin:
def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
tf_keys = set([k for k, v in tf_outputs.items() if v is not None])
pt_keys = set([k for k, v in pt_outputs.items() if v is not None])
tf_keys = {k for k, v in tf_outputs.items() if v is not None}
pt_keys = {k for k, v in pt_outputs.items() if v is not None}
key_differences = tf_keys.symmetric_difference(pt_keys)
......@@ -1455,7 +1455,7 @@ class TFModelTesterMixin:
continue
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
added_label_names = sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)
added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)
if not added_label_names:
continue # This test is only for models with easily-separable labels
added_label = prepared_for_class[added_label_names[0]]
......@@ -1713,7 +1713,7 @@ class TFModelTesterMixin:
}
signature = inspect.signature(model.call)
if set(head_masking.keys()) < set([*signature.parameters.keys()]):
if set(head_masking.keys()) < {*signature.parameters.keys()}:
continue
for attn_name, (name, mask) in zip(attention_names, head_masking.items()):
......@@ -2274,7 +2274,7 @@ class UtilsFunctionsTest(unittest.TestCase):
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".h5"))
shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".h5")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
......
......@@ -417,7 +417,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
)
self.assertIn("attention_mask", processed_pad)
self.assertListEqual(
list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length))
list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length]
)
self.assertListEqual(
processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs]
......
......@@ -1148,7 +1148,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# won't be the same since the training dataloader is shuffled).
with tempfile.TemporaryDirectory() as tmpdir:
kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, logging_steps=5)
kwargs = {
"output_dir": tmpdir,
"train_len": 128,
"save_steps": 5,
"learning_rate": 0.1,
"logging_steps": 5,
}
trainer = get_regression_trainer(**kwargs)
trainer.train()
(a, b) = trainer.model.a.item(), trainer.model.b.item()
......@@ -1181,7 +1187,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# With a regular model that is not a PreTrainedModel
with tempfile.TemporaryDirectory() as tmpdir:
kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, pretrained=False)
kwargs = {
"output_dir": tmpdir,
"train_len": 128,
"save_steps": 5,
"learning_rate": 0.1,
"pretrained": False,
}
trainer = get_regression_trainer(**kwargs)
trainer.train()
......
......@@ -108,8 +108,8 @@ class TrainerCallbackTest(unittest.TestCase):
self.assertEqual(len(cbs1), len(cbs2))
# Order doesn't matter
cbs1 = list(sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__))
cbs2 = list(sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__))
cbs1 = sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)
cbs2 = sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)
for cb1, cb2 in zip(cbs1, cbs2):
if isinstance(cb1, type) and isinstance(cb2, type):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment