Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
83439012
Unverified
Commit
83439012
authored
Jun 03, 2022
by
Sylvain Gugger
Committed by
GitHub
Jun 03, 2022
Browse files
Fix all offload and MP tests (#17533)
parent
1c57242d
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
21 deletions
+9
-21
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+2
-1
tests/models/opt/test_modeling_opt.py
tests/models/opt/test_modeling_opt.py
+1
-1
tests/models/t5/test_modeling_t5.py
tests/models/t5/test_modeling_t5.py
+2
-0
tests/test_modeling_common.py
tests/test_modeling_common.py
+4
-19
No files found.
src/transformers/modeling_utils.py
View file @
83439012
...
...
@@ -574,7 +574,6 @@ def _load_state_dict_into_meta_model(
for
param_name
,
param
in
state_dict
.
items
():
# First part of the test is always true as load_state_dict_keys always contains state_dict keys.
if
param_name
not
in
loaded_state_dict_keys
or
param_name
not
in
expected_keys
:
print
(
param_name
)
continue
if
param_name
.
startswith
(
start_prefix
):
...
...
@@ -2124,6 +2123,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if
model
.
_no_split_modules
is
None
:
raise
ValueError
(
f
"
{
model
.
__class__
.
__name__
}
does not support `device_map='auto'` yet."
)
no_split_modules
=
model
.
_no_split_modules
# Make sure tied weights are tied before creating the device map.
model
.
tie_weights
()
device_map
=
infer_auto_device_map
(
model
,
no_split_module_classes
=
no_split_modules
,
dtype
=
torch_dtype
,
max_memory
=
max_memory
)
...
...
tests/models/opt/test_modeling_opt.py
View file @
83439012
...
...
@@ -63,7 +63,7 @@ class OPTModelTester:
use_labels
=
False
,
vocab_size
=
99
,
hidden_size
=
16
,
num_hidden_layers
=
2
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
4
,
hidden_act
=
"gelu"
,
...
...
tests/models/t5/test_modeling_t5.py
View file @
83439012
...
...
@@ -515,6 +515,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
test_resize_embeddings
=
True
test_model_parallel
=
True
is_encoder_decoder
=
True
# The small T5 model needs higher percentages for CPU/MP tests
model_split_percents
=
[
0.8
,
0.9
]
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTester
(
self
)
...
...
tests/test_modeling_common.py
View file @
83439012
...
...
@@ -153,6 +153,7 @@ class ModelTesterMixin:
test_model_parallel
=
False
is_encoder_decoder
=
False
has_attentions
=
True
model_split_percents
=
[
0.5
,
0.7
,
0.9
]
def
_prepare_for_class
(
self
,
inputs_dict
,
model_class
,
return_labels
=
False
):
inputs_dict
=
copy
.
deepcopy
(
inputs_dict
)
...
...
@@ -2217,12 +2218,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_torch_gpu
def
test_disk_offload
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
...
...
@@ -2234,8 +2230,7 @@ class ModelTesterMixin:
base_output
=
model
(
**
inputs_dict
)
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
max_size
=
int
(
0.4
*
model_size
)
max_size
=
int
(
self
.
model_split_percents
[
0
]
*
model_size
)
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
...
@@ -2256,12 +2251,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_torch_gpu
def
test_cpu_offload
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
...
...
@@ -2274,7 +2264,7 @@ class ModelTesterMixin:
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
[
0.5
,
0.7
,
0.9
]
]
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
self
.
model_split_percents
]
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
...
@@ -2292,12 +2282,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_torch_multi_gpu
def
test_model_parallelism
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
...
...
@@ -2310,7 +2295,7 @@ class ModelTesterMixin:
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
[
0.5
,
0.7
,
0.9
]
]
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
self
.
model_split_percents
]
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment