Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
83439012
Unverified
Commit
83439012
authored
Jun 03, 2022
by
Sylvain Gugger
Committed by
GitHub
Jun 03, 2022
Browse files
Fix all offload and MP tests (#17533)
parent
1c57242d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
21 deletions
+9
-21
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+2
-1
tests/models/opt/test_modeling_opt.py
tests/models/opt/test_modeling_opt.py
+1
-1
tests/models/t5/test_modeling_t5.py
tests/models/t5/test_modeling_t5.py
+2
-0
tests/test_modeling_common.py
tests/test_modeling_common.py
+4
-19
No files found.
src/transformers/modeling_utils.py
View file @
83439012
...
@@ -574,7 +574,6 @@ def _load_state_dict_into_meta_model(
...
@@ -574,7 +574,6 @@ def _load_state_dict_into_meta_model(
for
param_name
,
param
in
state_dict
.
items
():
for
param_name
,
param
in
state_dict
.
items
():
# First part of the test is always true as load_state_dict_keys always contains state_dict keys.
# First part of the test is always true as load_state_dict_keys always contains state_dict keys.
if
param_name
not
in
loaded_state_dict_keys
or
param_name
not
in
expected_keys
:
if
param_name
not
in
loaded_state_dict_keys
or
param_name
not
in
expected_keys
:
print
(
param_name
)
continue
continue
if
param_name
.
startswith
(
start_prefix
):
if
param_name
.
startswith
(
start_prefix
):
...
@@ -2124,6 +2123,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
...
@@ -2124,6 +2123,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if
model
.
_no_split_modules
is
None
:
if
model
.
_no_split_modules
is
None
:
raise
ValueError
(
f
"
{
model
.
__class__
.
__name__
}
does not support `device_map='auto'` yet."
)
raise
ValueError
(
f
"
{
model
.
__class__
.
__name__
}
does not support `device_map='auto'` yet."
)
no_split_modules
=
model
.
_no_split_modules
no_split_modules
=
model
.
_no_split_modules
# Make sure tied weights are tied before creating the device map.
model
.
tie_weights
()
device_map
=
infer_auto_device_map
(
device_map
=
infer_auto_device_map
(
model
,
no_split_module_classes
=
no_split_modules
,
dtype
=
torch_dtype
,
max_memory
=
max_memory
model
,
no_split_module_classes
=
no_split_modules
,
dtype
=
torch_dtype
,
max_memory
=
max_memory
)
)
...
...
tests/models/opt/test_modeling_opt.py
View file @
83439012
...
@@ -63,7 +63,7 @@ class OPTModelTester:
...
@@ -63,7 +63,7 @@ class OPTModelTester:
use_labels
=
False
,
use_labels
=
False
,
vocab_size
=
99
,
vocab_size
=
99
,
hidden_size
=
16
,
hidden_size
=
16
,
num_hidden_layers
=
2
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
num_attention_heads
=
4
,
intermediate_size
=
4
,
intermediate_size
=
4
,
hidden_act
=
"gelu"
,
hidden_act
=
"gelu"
,
...
...
tests/models/t5/test_modeling_t5.py
View file @
83439012
...
@@ -515,6 +515,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
...
@@ -515,6 +515,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
test_resize_embeddings
=
True
test_resize_embeddings
=
True
test_model_parallel
=
True
test_model_parallel
=
True
is_encoder_decoder
=
True
is_encoder_decoder
=
True
# The small T5 model needs higher percentages for CPU/MP tests
model_split_percents
=
[
0.8
,
0.9
]
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTester
(
self
)
self
.
model_tester
=
T5ModelTester
(
self
)
...
...
tests/test_modeling_common.py
View file @
83439012
...
@@ -153,6 +153,7 @@ class ModelTesterMixin:
...
@@ -153,6 +153,7 @@ class ModelTesterMixin:
test_model_parallel
=
False
test_model_parallel
=
False
is_encoder_decoder
=
False
is_encoder_decoder
=
False
has_attentions
=
True
has_attentions
=
True
model_split_percents
=
[
0.5
,
0.7
,
0.9
]
def
_prepare_for_class
(
self
,
inputs_dict
,
model_class
,
return_labels
=
False
):
def
_prepare_for_class
(
self
,
inputs_dict
,
model_class
,
return_labels
=
False
):
inputs_dict
=
copy
.
deepcopy
(
inputs_dict
)
inputs_dict
=
copy
.
deepcopy
(
inputs_dict
)
...
@@ -2217,12 +2218,7 @@ class ModelTesterMixin:
...
@@ -2217,12 +2218,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_accelerate
@
require_torch_gpu
@
require_torch_gpu
def
test_disk_offload
(
self
):
def
test_disk_offload
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
if
model_class
.
_no_split_modules
is
None
:
...
@@ -2234,8 +2230,7 @@ class ModelTesterMixin:
...
@@ -2234,8 +2230,7 @@ class ModelTesterMixin:
base_output
=
model
(
**
inputs_dict
)
base_output
=
model
(
**
inputs_dict
)
model_size
=
compute_module_sizes
(
model
)[
""
]
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
max_size
=
int
(
self
.
model_split_percents
[
0
]
*
model_size
)
max_size
=
int
(
0.4
*
model_size
)
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
@@ -2256,12 +2251,7 @@ class ModelTesterMixin:
...
@@ -2256,12 +2251,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_accelerate
@
require_torch_gpu
@
require_torch_gpu
def
test_cpu_offload
(
self
):
def
test_cpu_offload
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
if
model_class
.
_no_split_modules
is
None
:
...
@@ -2274,7 +2264,7 @@ class ModelTesterMixin:
...
@@ -2274,7 +2264,7 @@ class ModelTesterMixin:
model_size
=
compute_module_sizes
(
model
)[
""
]
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
# We test several splits of sizes to make sure it works.
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
[
0.5
,
0.7
,
0.9
]
]
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
self
.
model_split_percents
]
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
@@ -2292,12 +2282,7 @@ class ModelTesterMixin:
...
@@ -2292,12 +2282,7 @@ class ModelTesterMixin:
@
require_accelerate
@
require_accelerate
@
require_torch_multi_gpu
@
require_torch_multi_gpu
def
test_model_parallelism
(
self
):
def
test_model_parallelism
(
self
):
if
all
([
model_class
.
_no_split_modules
is
None
for
model_class
in
self
.
all_model_classes
]):
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
isinstance
(
getattr
(
config
,
"num_hidden_layers"
,
None
),
int
)
and
config
.
num_hidden_layers
<
4
:
config
.
num_hidden_layers
=
4
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
if
model_class
.
_no_split_modules
is
None
:
if
model_class
.
_no_split_modules
is
None
:
...
@@ -2310,7 +2295,7 @@ class ModelTesterMixin:
...
@@ -2310,7 +2295,7 @@ class ModelTesterMixin:
model_size
=
compute_module_sizes
(
model
)[
""
]
model_size
=
compute_module_sizes
(
model
)[
""
]
# We test several splits of sizes to make sure it works.
# We test several splits of sizes to make sure it works.
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
[
0.5
,
0.7
,
0.9
]
]
max_gpu_sizes
=
[
int
(
p
*
model_size
)
for
p
in
self
.
model_split_percents
]
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
model
.
cpu
().
save_pretrained
(
tmp_dir
)
model
.
cpu
().
save_pretrained
(
tmp_dir
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment