Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
5e8c8eb5
Unverified
Commit
5e8c8eb5
authored
Feb 22, 2023
by
Aaron Gokaslan
Committed by
GitHub
Feb 22, 2023
Browse files
Apply ruff flake8-comprehensions (#21694)
parent
df06fb1f
Changes
230
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
64 additions
and
54 deletions
+64
-54
tests/models/segformer/test_modeling_tf_segformer.py
tests/models/segformer/test_modeling_tf_segformer.py
+1
-3
tests/models/speecht5/test_feature_extraction_speecht5.py
tests/models/speecht5/test_feature_extraction_speecht5.py
+1
-1
tests/models/t5/test_tokenization_t5.py
tests/models/t5/test_tokenization_t5.py
+2
-2
tests/models/transfo_xl/test_modeling_transfo_xl.py
tests/models/transfo_xl/test_modeling_transfo_xl.py
+1
-1
tests/models/tvlt/test_modeling_tvlt.py
tests/models/tvlt/test_modeling_tvlt.py
+2
-2
tests/models/vit_mae/test_modeling_tf_vit_mae.py
tests/models/vit_mae/test_modeling_tf_vit_mae.py
+2
-2
tests/models/wav2vec2/test_tokenization_wav2vec2.py
tests/models/wav2vec2/test_tokenization_wav2vec2.py
+2
-2
tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
...odels/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
+3
-3
tests/models/xlnet/test_modeling_tf_xlnet.py
tests/models/xlnet/test_modeling_tf_xlnet.py
+1
-1
tests/pipelines/test_pipelines_common.py
tests/pipelines/test_pipelines_common.py
+8
-8
tests/pipelines/test_pipelines_fill_mask.py
tests/pipelines/test_pipelines_fill_mask.py
+7
-7
tests/pipelines/test_pipelines_video_classification.py
tests/pipelines/test_pipelines_video_classification.py
+1
-1
tests/repo_utils/test_tests_fetcher.py
tests/repo_utils/test_tests_fetcher.py
+2
-2
tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
...s/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
+2
-2
tests/test_modeling_common.py
tests/test_modeling_common.py
+4
-4
tests/test_modeling_flax_common.py
tests/test_modeling_flax_common.py
+1
-1
tests/test_modeling_tf_common.py
tests/test_modeling_tf_common.py
+7
-7
tests/test_sequence_feature_extraction_common.py
tests/test_sequence_feature_extraction_common.py
+1
-1
tests/trainer/test_trainer.py
tests/trainer/test_trainer.py
+14
-2
tests/trainer/test_trainer_callback.py
tests/trainer/test_trainer_callback.py
+2
-2
No files found.
tests/models/segformer/test_modeling_tf_segformer.py
View file @
5e8c8eb5
...
...
@@ -362,9 +362,7 @@ class TFSegformerModelTest(TFModelTesterMixin, unittest.TestCase):
_
,
prepared_for_class
=
self
.
model_tester
.
prepare_config_and_inputs_for_keras_fit
(
for_segmentation
=
for_segmentation
)
added_label
=
prepared_for_class
[
sorted
(
list
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
()),
reverse
=
True
)[
0
]
]
added_label
=
prepared_for_class
[
sorted
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
(),
reverse
=
True
)[
0
]]
loss_size
=
tf
.
size
(
added_label
)
# Test that model correctly compute the loss with kwargs
...
...
tests/models/speecht5/test_feature_extraction_speecht5.py
View file @
5e8c8eb5
...
...
@@ -372,7 +372,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
)
self
.
assertIn
(
"attention_mask"
,
processed_pad
)
self
.
assertListEqual
(
list
(
processed_pad
.
attention_mask
.
shape
),
list
((
processed_pad
[
input_name
].
shape
[
0
],
max_length
))
list
(
processed_pad
.
attention_mask
.
shape
),
[
processed_pad
[
input_name
].
shape
[
0
],
max_length
]
)
self
.
assertListEqual
(
processed_pad
.
attention_mask
[:,
:
max_length
].
sum
(
-
1
).
tolist
(),
[
max_length
for
x
in
speech_inputs
]
...
...
tests/models/t5/test_tokenization_t5.py
View file @
5e8c8eb5
...
...
@@ -387,7 +387,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_get_sentinel_token_ids
(
self
):
tokenizer
=
T5Tokenizer
(
SAMPLE_VOCAB
,
extra_ids
=
10
)
self
.
assertListEqual
(
sorted
(
tokenizer
.
get_sentinel_token_ids
()),
sorted
(
[
i
for
i
in
range
(
1000
,
1010
)
]
))
self
.
assertListEqual
(
sorted
(
tokenizer
.
get_sentinel_token_ids
()),
sorted
(
range
(
1000
,
1010
)))
def
test_get_sentinel_tokens_for_fasttokenizer
(
self
):
tokenizer
=
T5TokenizerFast
(
SAMPLE_VOCAB
,
extra_ids
=
10
)
...
...
@@ -398,4 +398,4 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_get_sentinel_token_ids_for_fasttokenizer
(
self
):
tokenizer
=
T5TokenizerFast
(
SAMPLE_VOCAB
,
extra_ids
=
10
)
self
.
assertListEqual
(
sorted
(
tokenizer
.
get_sentinel_token_ids
()),
sorted
(
[
i
for
i
in
range
(
1000
,
1010
)
]
))
self
.
assertListEqual
(
sorted
(
tokenizer
.
get_sentinel_token_ids
()),
sorted
(
range
(
1000
,
1010
)))
tests/models/transfo_xl/test_modeling_transfo_xl.py
View file @
5e8c8eb5
...
...
@@ -347,7 +347,7 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
# Retrieve the cutoffs and copy them
copied_cutoffs
=
copy
.
copy
(
model_embed
.
cutoffs
)
test_layers
=
[
x
for
x
in
range
(
config
.
div_val
)
]
test_layers
=
list
(
range
(
config
.
div_val
)
)
for
layer
in
test_layers
:
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
+
10
,
layer
)
...
...
tests/models/tvlt/test_modeling_tvlt.py
View file @
5e8c8eb5
...
...
@@ -581,7 +581,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
audio
=
prepare_audio
()
video_inputs
=
image_processor
(
video
,
return_tensors
=
"pt"
).
to
(
torch_device
)
audio_inputs
=
audio_feature_extractor
(
audio
,
return_tensors
=
"pt"
).
to
(
torch_device
)
inputs
=
dict
()
inputs
=
{}
inputs
.
update
(
video_inputs
)
inputs
.
update
(
audio_inputs
)
...
...
@@ -606,7 +606,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
video_mixed_inputs
=
image_processor
(
video_mixed
,
is_mixed
=
True
,
return_tensors
=
"pt"
).
to
(
torch_device
)
audio_inputs
=
audio_feature_extractor
(
audio
,
return_tensors
=
"pt"
,
mask_audio
=
True
).
to
(
torch_device
)
labels
=
torch
.
tensor
([[
0.0
]],
device
=
torch_device
)
inputs
=
dict
()
inputs
=
{}
inputs
.
update
(
video_inputs
)
inputs
.
update
(
video_mixed_inputs
)
inputs
.
update
(
audio_inputs
)
...
...
tests/models/vit_mae/test_modeling_tf_vit_mae.py
View file @
5e8c8eb5
...
...
@@ -333,7 +333,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
tf_main_layer_classes
=
set
(
tf_main_layer_classes
=
{
module_member
for
model_class
in
self
.
all_model_classes
for
module
in
(
import_module
(
model_class
.
__module__
),)
...
...
@@ -345,7 +345,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
if
isinstance
(
module_member
,
type
)
and
tf
.
keras
.
layers
.
Layer
in
module_member
.
__bases__
and
getattr
(
module_member
,
"_keras_serializable"
,
False
)
)
}
num_patches
=
int
((
config
.
image_size
//
config
.
patch_size
)
**
2
)
noise
=
np
.
random
.
uniform
(
size
=
(
self
.
model_tester
.
batch_size
,
num_patches
))
...
...
tests/models/wav2vec2/test_tokenization_wav2vec2.py
View file @
5e8c8eb5
...
...
@@ -231,7 +231,7 @@ class Wav2Vec2TokenizerTest(unittest.TestCase):
tokenizer_files
=
tokenizer
.
save_pretrained
(
tmpdirname2
)
self
.
assertSequenceEqual
(
sorted
(
tuple
(
VOCAB_FILES_NAMES
.
values
())
+
(
"special_tokens_map.json"
,
"added_tokens.json"
)),
sorted
(
tuple
(
x
.
split
(
os
.
path
.
sep
)[
-
1
]
for
x
in
tokenizer_files
)
)
,
sorted
(
x
.
split
(
os
.
path
.
sep
)[
-
1
]
for
x
in
tokenizer_files
),
)
# Checks everything loads correctly in the same way
...
...
@@ -456,7 +456,7 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
def
test_special_characters_in_vocab
(
self
):
sent
=
"ʈʰ æ æ̃ ˧ kʰ"
vocab_dict
=
{
k
:
v
for
v
,
k
in
enumerate
(
{
phoneme
for
phoneme
in
sent
.
split
()
}
)}
vocab_dict
=
{
k
:
v
for
v
,
k
in
enumerate
(
set
(
sent
.
split
()
)
)}
vocab_file
=
os
.
path
.
join
(
self
.
tmpdirname
,
"vocab_special.json"
)
with
open
(
vocab_file
,
"w"
)
as
f
:
...
...
tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
View file @
5e8c8eb5
...
...
@@ -215,7 +215,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
with
get_context
(
pool_context
).
Pool
()
as
pool
:
decoded_processor
=
processor
.
batch_decode
(
logits
,
pool
)
logits_list
=
[
array
for
array
in
logits
]
logits_list
=
list
(
logits
)
with
get_context
(
"fork"
).
Pool
()
as
p
:
decoded_beams
=
decoder
.
decode_beams_batch
(
p
,
logits_list
)
...
...
@@ -252,7 +252,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor
=
decoded_processor_out
.
text
logits_list
=
[
array
for
array
in
logits
]
logits_list
=
list
(
logits
)
with
get_context
(
"fork"
).
Pool
()
as
pool
:
decoded_decoder_out
=
decoder
.
decode_beams_batch
(
...
...
@@ -299,7 +299,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor
=
decoded_processor_out
.
text
logits_list
=
[
array
for
array
in
logits
]
logits_list
=
list
(
logits
)
decoder
.
reset_params
(
alpha
=
alpha
,
beta
=
beta
,
...
...
tests/models/xlnet/test_modeling_tf_xlnet.py
View file @
5e8c8eb5
...
...
@@ -400,7 +400,7 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class
=
self
.
_prepare_for_class
(
inputs_dict
.
copy
(),
model_class
,
return_labels
=
True
)
added_label
=
prepared_for_class
[
sorted
(
list
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
()
)
,
reverse
=
True
)[
0
]
sorted
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
(),
reverse
=
True
)[
0
]
]
expected_loss_size
=
added_label
.
shape
.
as_list
()[:
1
]
...
...
tests/pipelines/test_pipelines_common.py
View file @
5e8c8eb5
...
...
@@ -606,7 +606,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelineIterator
(
dummy_dataset
,
add
,
{
"extra"
:
2
})
self
.
assertEqual
(
len
(
dataset
),
4
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[
2
,
3
,
4
,
5
])
@
require_torch
...
...
@@ -624,7 +624,7 @@ class PipelineUtilsTest(unittest.TestCase):
with
self
.
assertRaises
(
TypeError
):
len
(
dataset
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[
2
,
3
,
4
,
5
])
@
require_torch
...
...
@@ -638,7 +638,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelineIterator
(
dummy_dataset
,
add
,
{
"extra"
:
2
},
loader_batch_size
=
3
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[{
"id"
:
2
},
{
"id"
:
3
},
{
"id"
:
4
},
{
"id"
:
5
}])
@
require_torch
...
...
@@ -654,7 +654,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelineIterator
(
dummy_dataset
,
add
,
{
"extra"
:
2
},
loader_batch_size
=
3
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
nested_simplify
(
outputs
),
[{
"id"
:
[[
12
,
22
]]},
{
"id"
:
[[
2
,
3
]]},
{
"id"
:
[[
2
,
4
]]},
{
"id"
:
[[
5
]]}]
)
...
...
@@ -671,7 +671,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelineChunkIterator
(
dataset
,
preprocess_chunk
,
{},
loader_batch_size
=
3
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[
0
,
1
,
0
,
1
,
2
])
...
...
@@ -692,7 +692,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelinePackIterator
(
dataset
,
pack
,
{})
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[
...
...
@@ -719,7 +719,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelinePackIterator
(
dummy_dataset
,
add
,
{
"extra"
:
2
},
loader_batch_size
=
3
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[[{
"id"
:
2
},
{
"id"
:
3
}],
[{
"id"
:
4
},
{
"id"
:
5
}]])
# is_false Across batch
...
...
@@ -730,7 +730,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset
=
PipelinePackIterator
(
dummy_dataset
,
add
,
{
"extra"
:
2
},
loader_batch_size
=
3
)
outputs
=
[
item
for
item
in
dataset
]
outputs
=
list
(
dataset
)
self
.
assertEqual
(
outputs
,
[[{
"id"
:
2
},
{
"id"
:
3
},
{
"id"
:
4
},
{
"id"
:
5
}]])
@
slow
...
...
tests/pipelines/test_pipelines_fill_mask.py
View file @
5e8c8eb5
...
...
@@ -281,7 +281,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
def
run_test_targets
(
self
,
model
,
tokenizer
):
vocab
=
tokenizer
.
get_vocab
()
targets
=
list
(
sorted
(
vocab
.
keys
())
)
[:
2
]
targets
=
sorted
(
vocab
.
keys
())[:
2
]
# Pipeline argument
fill_masker
=
FillMaskPipeline
(
model
=
model
,
tokenizer
=
tokenizer
,
targets
=
targets
)
outputs
=
fill_masker
(
f
"This is a
{
tokenizer
.
mask_token
}
"
)
...
...
@@ -293,8 +293,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids
=
{
vocab
[
el
]
for
el
in
targets
}
self
.
assertEqual
(
set
(
el
[
"token"
]
for
el
in
outputs
)
,
target_ids
)
self
.
assertEqual
(
set
(
el
[
"token_str"
]
for
el
in
outputs
)
,
set
(
targets
))
self
.
assertEqual
(
{
el
[
"token"
]
for
el
in
outputs
}
,
target_ids
)
self
.
assertEqual
(
{
el
[
"token_str"
]
for
el
in
outputs
}
,
set
(
targets
))
# Call argument
fill_masker
=
FillMaskPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
...
...
@@ -307,8 +307,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids
=
{
vocab
[
el
]
for
el
in
targets
}
self
.
assertEqual
(
set
(
el
[
"token"
]
for
el
in
outputs
)
,
target_ids
)
self
.
assertEqual
(
set
(
el
[
"token_str"
]
for
el
in
outputs
)
,
set
(
targets
))
self
.
assertEqual
(
{
el
[
"token"
]
for
el
in
outputs
}
,
target_ids
)
self
.
assertEqual
(
{
el
[
"token_str"
]
for
el
in
outputs
}
,
set
(
targets
))
# Score equivalence
outputs
=
fill_masker
(
f
"This is a
{
tokenizer
.
mask_token
}
"
,
targets
=
targets
)
...
...
@@ -354,7 +354,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker
=
FillMaskPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
# top_k=2, ntargets=3
targets
=
list
(
sorted
(
vocab
.
keys
())
)
[:
3
]
targets
=
sorted
(
vocab
.
keys
())[:
3
]
outputs
=
fill_masker
(
f
"This is a
{
tokenizer
.
mask_token
}
"
,
top_k
=
2
,
targets
=
targets
)
# If we use the most probably targets, and filter differently, we should still
...
...
@@ -369,7 +369,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker
=
FillMaskPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
vocab
=
tokenizer
.
get_vocab
()
# String duplicates + id duplicates
targets
=
list
(
sorted
(
vocab
.
keys
())
)
[:
3
]
targets
=
sorted
(
vocab
.
keys
())[:
3
]
targets
=
[
targets
[
0
],
targets
[
1
],
targets
[
0
],
targets
[
2
],
targets
[
1
]]
outputs
=
fill_masker
(
f
"My name is
{
tokenizer
.
mask_token
}
"
,
targets
=
targets
,
top_k
=
10
)
...
...
tests/pipelines/test_pipelines_video_classification.py
View file @
5e8c8eb5
...
...
@@ -63,7 +63,7 @@ class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
def
test_small_model_pt
(
self
):
small_model
=
"hf-internal-testing/tiny-random-VideoMAEForVideoClassification"
small_feature_extractor
=
VideoMAEFeatureExtractor
(
size
=
dict
(
shortest_edge
=
10
)
,
crop_size
=
dict
(
height
=
10
,
width
=
10
)
size
=
{
"
shortest_edge
"
:
10
}
,
crop_size
=
{
"
height
"
:
10
,
"
width
"
:
10
}
)
video_classifier
=
pipeline
(
"video-classification"
,
model
=
small_model
,
feature_extractor
=
small_feature_extractor
,
frame_sampling_rate
=
4
...
...
tests/repo_utils/test_tests_fetcher.py
View file @
5e8c8eb5
...
...
@@ -56,9 +56,9 @@ class CheckDummiesTester(unittest.TestCase):
"pytorch_utils.py"
,
"models/bert/configuration_bert.py"
,
]
expected_deps
=
set
(
os
.
path
.
join
(
transformers_path
,
f
)
for
f
in
expected_deps
)
expected_deps
=
{
os
.
path
.
join
(
transformers_path
,
f
)
for
f
in
expected_deps
}
repo
=
Repo
(
git_repo_path
)
with
checkout_commit
(
repo
,
GIT_TEST_SHA
):
deps
=
get_module_dependencies
(
bert_module
)
deps
=
set
(
os
.
path
.
expanduser
(
f
)
for
f
in
deps
)
deps
=
{
os
.
path
.
expanduser
(
f
)
for
f
in
deps
}
self
.
assertEqual
(
deps
,
expected_deps
)
tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
View file @
5e8c8eb5
...
...
@@ -362,12 +362,12 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id
=
{
k
.
lower
():
v
for
k
,
v
in
model
.
config
.
label2id
.
items
()}
if
list
(
sorted
(
label_name_to_id
.
keys
())
)
==
list
(
sorted
(
label_list
)
)
:
if
sorted
(
label_name_to_id
.
keys
())
==
sorted
(
label_list
):
label_to_id
=
{
i
:
int
(
label_name_to_id
[
label_list
[
i
]])
for
i
in
range
(
num_labels
)}
else
:
logger
.
warning
(
"Your model seems to have been trained with labels, but they don't match the dataset: "
,
f
"model labels:
{
list
(
sorted
(
label_name_to_id
.
keys
())
)
}
, dataset labels:
{
list
(
sorted
(
label_list
)
)
}
."
f
"model labels:
{
sorted
(
label_name_to_id
.
keys
())
}
, dataset labels:
{
sorted
(
label_list
)
}
."
"
\n
Ignoring the model labels as a result."
,
)
elif
data_args
.
task_name
is
None
and
not
is_regression
:
...
...
tests/test_modeling_common.py
View file @
5e8c8eb5
...
...
@@ -1643,7 +1643,7 @@ class ModelTesterMixin:
params
=
dict
(
model_reloaded
.
named_parameters
())
params
.
update
(
dict
(
model_reloaded
.
named_buffers
()))
# param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys())
param_names
=
set
(
k
[
len
(
prefix
)
:]
if
k
.
startswith
(
prefix
)
else
k
for
k
in
params
.
keys
()
)
param_names
=
{
k
[
len
(
prefix
)
:]
if
k
.
startswith
(
prefix
)
else
k
for
k
in
params
.
keys
()
}
missing_keys
=
set
(
infos
[
"missing_keys"
])
...
...
@@ -1770,8 +1770,8 @@ class ModelTesterMixin:
def
_postprocessing_to_ignore_test_cases
(
self
,
tf_outputs
,
pt_outputs
,
model_class
):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
tf_keys
=
set
([
k
for
k
,
v
in
tf_outputs
.
items
()
if
v
is
not
None
])
pt_keys
=
set
([
k
for
k
,
v
in
pt_outputs
.
items
()
if
v
is
not
None
])
tf_keys
=
{
k
for
k
,
v
in
tf_outputs
.
items
()
if
v
is
not
None
}
pt_keys
=
{
k
for
k
,
v
in
pt_outputs
.
items
()
if
v
is
not
None
}
key_differences
=
tf_keys
.
symmetric_difference
(
pt_keys
)
...
...
@@ -2995,7 +2995,7 @@ class ModelUtilsTest(TestCasePlus):
index
=
json
.
loads
(
f
.
read
())
all_shards
=
set
(
index
[
"weight_map"
].
values
())
shards_found
=
set
(
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".bin"
)
)
shards_found
=
{
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".bin"
)
}
self
.
assertSetEqual
(
all_shards
,
shards_found
)
# Finally, check the model can be reloaded
...
...
tests/test_modeling_flax_common.py
View file @
5e8c8eb5
...
...
@@ -1099,7 +1099,7 @@ class FlaxModelTesterMixin:
index
=
json
.
loads
(
f
.
read
())
all_shards
=
set
(
index
[
"weight_map"
].
values
())
shards_found
=
set
(
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".msgpack"
)
)
shards_found
=
{
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".msgpack"
)
}
self
.
assertSetEqual
(
all_shards
,
shards_found
)
# Finally, check the model can be reloaded
...
...
tests/test_modeling_tf_common.py
View file @
5e8c8eb5
...
...
@@ -398,7 +398,7 @@ class TFModelTesterMixin:
def
test_keras_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
tf_main_layer_classes
=
set
(
tf_main_layer_classes
=
{
module_member
for
model_class
in
self
.
all_model_classes
for
module
in
(
import_module
(
model_class
.
__module__
),)
...
...
@@ -410,7 +410,7 @@ class TFModelTesterMixin:
if
isinstance
(
module_member
,
type
)
and
tf
.
keras
.
layers
.
Layer
in
module_member
.
__bases__
and
getattr
(
module_member
,
"_keras_serializable"
,
False
)
)
}
for
main_layer_class
in
tf_main_layer_classes
:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if
"T5"
in
main_layer_class
.
__name__
:
...
...
@@ -498,8 +498,8 @@ class TFModelTesterMixin:
def
_postprocessing_to_ignore_test_cases
(
self
,
tf_outputs
,
pt_outputs
,
model_class
):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
tf_keys
=
set
([
k
for
k
,
v
in
tf_outputs
.
items
()
if
v
is
not
None
])
pt_keys
=
set
([
k
for
k
,
v
in
pt_outputs
.
items
()
if
v
is
not
None
])
tf_keys
=
{
k
for
k
,
v
in
tf_outputs
.
items
()
if
v
is
not
None
}
pt_keys
=
{
k
for
k
,
v
in
pt_outputs
.
items
()
if
v
is
not
None
}
key_differences
=
tf_keys
.
symmetric_difference
(
pt_keys
)
...
...
@@ -1455,7 +1455,7 @@ class TFModelTesterMixin:
continue
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class
=
self
.
_prepare_for_class
(
inputs_dict
.
copy
(),
model_class
,
return_labels
=
True
)
added_label_names
=
sorted
(
list
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
()
)
,
reverse
=
True
)
added_label_names
=
sorted
(
prepared_for_class
.
keys
()
-
inputs_dict
.
keys
(),
reverse
=
True
)
if
not
added_label_names
:
continue
# This test is only for models with easily-separable labels
added_label
=
prepared_for_class
[
added_label_names
[
0
]]
...
...
@@ -1713,7 +1713,7 @@ class TFModelTesterMixin:
}
signature
=
inspect
.
signature
(
model
.
call
)
if
set
(
head_masking
.
keys
())
<
set
([
*
signature
.
parameters
.
keys
()
])
:
if
set
(
head_masking
.
keys
())
<
{
*
signature
.
parameters
.
keys
()
}
:
continue
for
attn_name
,
(
name
,
mask
)
in
zip
(
attention_names
,
head_masking
.
items
()):
...
...
@@ -2274,7 +2274,7 @@ class UtilsFunctionsTest(unittest.TestCase):
index
=
json
.
loads
(
f
.
read
())
all_shards
=
set
(
index
[
"weight_map"
].
values
())
shards_found
=
set
(
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".h5"
)
)
shards_found
=
{
f
for
f
in
os
.
listdir
(
tmp_dir
)
if
f
.
endswith
(
".h5"
)
}
self
.
assertSetEqual
(
all_shards
,
shards_found
)
# Finally, check the model can be reloaded
...
...
tests/test_sequence_feature_extraction_common.py
View file @
5e8c8eb5
...
...
@@ -417,7 +417,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
)
self
.
assertIn
(
"attention_mask"
,
processed_pad
)
self
.
assertListEqual
(
list
(
processed_pad
.
attention_mask
.
shape
),
list
((
processed_pad
[
input_name
].
shape
[
0
],
max_length
))
list
(
processed_pad
.
attention_mask
.
shape
),
[
processed_pad
[
input_name
].
shape
[
0
],
max_length
]
)
self
.
assertListEqual
(
processed_pad
.
attention_mask
[:,
:
max_length
].
sum
(
-
1
).
tolist
(),
[
max_length
for
x
in
speech_inputs
]
...
...
tests/trainer/test_trainer.py
View file @
5e8c8eb5
...
...
@@ -1148,7 +1148,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# won't be the same since the training dataloader is shuffled).
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
kwargs
=
dict
(
output_dir
=
tmpdir
,
train_len
=
128
,
save_steps
=
5
,
learning_rate
=
0.1
,
logging_steps
=
5
)
kwargs
=
{
"output_dir"
:
tmpdir
,
"train_len"
:
128
,
"save_steps"
:
5
,
"learning_rate"
:
0.1
,
"logging_steps"
:
5
,
}
trainer
=
get_regression_trainer
(
**
kwargs
)
trainer
.
train
()
(
a
,
b
)
=
trainer
.
model
.
a
.
item
(),
trainer
.
model
.
b
.
item
()
...
...
@@ -1181,7 +1187,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# With a regular model that is not a PreTrainedModel
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
kwargs
=
dict
(
output_dir
=
tmpdir
,
train_len
=
128
,
save_steps
=
5
,
learning_rate
=
0.1
,
pretrained
=
False
)
kwargs
=
{
"output_dir"
:
tmpdir
,
"train_len"
:
128
,
"save_steps"
:
5
,
"learning_rate"
:
0.1
,
"pretrained"
:
False
,
}
trainer
=
get_regression_trainer
(
**
kwargs
)
trainer
.
train
()
...
...
tests/trainer/test_trainer_callback.py
View file @
5e8c8eb5
...
...
@@ -108,8 +108,8 @@ class TrainerCallbackTest(unittest.TestCase):
self
.
assertEqual
(
len
(
cbs1
),
len
(
cbs2
))
# Order doesn't matter
cbs1
=
list
(
sorted
(
cbs1
,
key
=
lambda
cb
:
cb
.
__name__
if
isinstance
(
cb
,
type
)
else
cb
.
__class__
.
__name__
)
)
cbs2
=
list
(
sorted
(
cbs2
,
key
=
lambda
cb
:
cb
.
__name__
if
isinstance
(
cb
,
type
)
else
cb
.
__class__
.
__name__
)
)
cbs1
=
sorted
(
cbs1
,
key
=
lambda
cb
:
cb
.
__name__
if
isinstance
(
cb
,
type
)
else
cb
.
__class__
.
__name__
)
cbs2
=
sorted
(
cbs2
,
key
=
lambda
cb
:
cb
.
__name__
if
isinstance
(
cb
,
type
)
else
cb
.
__class__
.
__name__
)
for
cb1
,
cb2
in
zip
(
cbs1
,
cbs2
):
if
isinstance
(
cb1
,
type
)
and
isinstance
(
cb2
,
type
):
...
...
Prev
1
…
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment