Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d14e0af2
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "bf64b8cf095a23303315e1347d8eac0bce9d73be"
Unverified
Commit
d14e0af2
authored
Jun 09, 2021
by
Stas Bekman
Committed by
GitHub
Jun 09, 2021
Browse files
sync LayerDrop for Wav2Vec2Encoder + tests (#12076)
parent
82a2b76c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
15 deletions
+38
-15
examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
...les/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
+29
-12
src/transformers/models/wav2vec2/modeling_wav2vec2.py
src/transformers/models/wav2vec2/modeling_wav2vec2.py
+9
-3
No files found.
examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
View file @
d14e0af2
...
@@ -24,6 +24,7 @@ sys.path.insert(1, str(git_repo_path))
...
@@ -24,6 +24,7 @@ sys.path.insert(1, str(git_repo_path))
import
dataclasses
# noqa
import
dataclasses
# noqa
import
io
# noqa
import
io
# noqa
import
itertools
# noqa
import
json
# noqa
import
json
# noqa
import
os
# noqa
import
os
# noqa
import
unittest
# noqa
import
unittest
# noqa
...
@@ -50,48 +51,62 @@ from transformers.trainer_utils import set_seed # noqa
...
@@ -50,48 +51,62 @@ from transformers.trainer_utils import set_seed # noqa
set_seed
(
42
)
set_seed
(
42
)
WAV2VEC2_TINY
=
"patrickvonplaten/wav2vec2_tiny_random_robust"
models
=
dict
(
base
=
"patrickvonplaten/wav2vec2_tiny_random"
,
robust
=
"patrickvonplaten/wav2vec2_tiny_random_robust"
)
ZERO2
=
"zero2"
ZERO2
=
"zero2"
ZERO3
=
"zero3"
ZERO3
=
"zero3"
stages
=
[
ZERO2
,
ZERO3
]
stages
=
[
ZERO2
,
ZERO3
]
def
custom_name_func
(
func
,
param_num
,
param
):
# customize the test name generator function as we want both params to appear in the sub-test
# name, as by default it shows only the first param
param_based_name
=
parameterized
.
to_safe_name
(
"_"
.
join
(
str
(
x
)
for
x
in
param
.
args
))
return
f
"
{
func
.
__name__
}
_
{
param_based_name
}
"
# Cartesian-product of zero stages with models to test
params
=
list
(
itertools
.
product
(
stages
,
models
.
keys
()))
@
slow
@
slow
@
require_deepspeed
@
require_deepspeed
@
require_torch_gpu
@
require_torch_gpu
class
TestDeepSpeedWav2Vec2
(
TestCasePlus
):
class
TestDeepSpeedWav2Vec2
(
TestCasePlus
):
@
parameterized
.
expand
(
stages
)
@
parameterized
.
expand
(
params
,
name_func
=
custom_name_func
)
def
test_fp32_non_distributed
(
self
,
stage
):
def
test_fp32_non_distributed
(
self
,
stage
,
model
):
self
.
run_and_check
(
self
.
run_and_check
(
stage
=
stage
,
stage
=
stage
,
model
=
model
,
distributed
=
False
,
distributed
=
False
,
fp16
=
False
,
fp16
=
False
,
)
)
@
require_torch_multi_gpu
@
require_torch_multi_gpu
@
parameterized
.
expand
(
stages
)
@
parameterized
.
expand
(
params
,
name_func
=
custom_name_func
)
def
test_fp32_distributed
(
self
,
stage
):
def
test_fp32_distributed
(
self
,
stage
,
model
):
self
.
run_and_check
(
self
.
run_and_check
(
stage
=
stage
,
stage
=
stage
,
model
=
model
,
distributed
=
True
,
distributed
=
True
,
fp16
=
False
,
fp16
=
False
,
)
)
@
parameterized
.
expand
(
stages
)
@
parameterized
.
expand
(
params
,
name_func
=
custom_name_func
)
def
test_fp16_non_distributed
(
self
,
stage
):
def
test_fp16_non_distributed
(
self
,
stage
,
model
):
self
.
run_and_check
(
self
.
run_and_check
(
stage
=
stage
,
stage
=
stage
,
model
=
model
,
distributed
=
False
,
distributed
=
False
,
fp16
=
True
,
fp16
=
True
,
)
)
@
require_torch_multi_gpu
@
require_torch_multi_gpu
@
parameterized
.
expand
(
stages
)
@
parameterized
.
expand
(
params
,
name_func
=
custom_name_func
)
def
test_fp16_distributed
(
self
,
stage
):
def
test_fp16_distributed
(
self
,
stage
,
model
):
self
.
run_and_check
(
self
.
run_and_check
(
stage
=
stage
,
stage
=
stage
,
model
=
model
,
distributed
=
True
,
distributed
=
True
,
fp16
=
True
,
fp16
=
True
,
)
)
...
@@ -104,14 +119,16 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
...
@@ -104,14 +119,16 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
# XXX: need to do better validation beyond just that the run was successful
# XXX: need to do better validation beyond just that the run was successful
def
run_and_check
(
def
run_and_check
(
self
,
self
,
stage
,
stage
:
str
,
model
_name
:
str
=
WAV2VEC2_TINY
,
model
:
str
,
eval_steps
:
int
=
10
,
eval_steps
:
int
=
10
,
distributed
:
bool
=
True
,
distributed
:
bool
=
True
,
quality_checks
:
bool
=
True
,
quality_checks
:
bool
=
True
,
fp16
:
bool
=
True
,
fp16
:
bool
=
True
,
):
):
model_name
=
models
[
model
]
output_dir
=
self
.
run_trainer
(
output_dir
=
self
.
run_trainer
(
stage
=
stage
,
stage
=
stage
,
model_name
=
model_name
,
model_name
=
model_name
,
...
...
src/transformers/models/wav2vec2/modeling_wav2vec2.py
View file @
d14e0af2
...
@@ -548,15 +548,18 @@ class Wav2Vec2Encoder(nn.Module):
...
@@ -548,15 +548,18 @@ class Wav2Vec2Encoder(nn.Module):
hidden_states
=
self
.
layer_norm
(
hidden_states
)
hidden_states
=
self
.
layer_norm
(
hidden_states
)
hidden_states
=
self
.
dropout
(
hidden_states
)
hidden_states
=
self
.
dropout
(
hidden_states
)
deepspeed_zero3_is_enabled
=
is_deepspeed_zero3_enabled
()
for
layer
in
self
.
layers
:
for
layer
in
self
.
layers
:
if
output_hidden_states
:
if
output_hidden_states
:
all_hidden_states
=
all_hidden_states
+
(
hidden_states
,)
all_hidden_states
=
all_hidden_states
+
(
hidden_states
,)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability
=
np
.
random
.
uniform
(
0
,
1
)
dropout_probability
=
np
.
random
.
uniform
(
0
,
1
)
if
self
.
training
and
(
dropout_probability
<
self
.
config
.
layerdrop
):
# skip the layer
layer_outputs
=
(
None
,
None
)
skip_the_layer
=
True
if
self
.
training
and
(
dropout_probability
<
self
.
config
.
layerdrop
)
else
False
else
:
if
not
skip_the_layer
or
deepspeed_zero3_is_enabled
:
# under deepspeed zero3 all gpus must run in sync
if
getattr
(
self
.
config
,
"gradient_checkpointing"
,
False
)
and
self
.
training
:
if
getattr
(
self
.
config
,
"gradient_checkpointing"
,
False
)
and
self
.
training
:
# create gradient checkpointing function
# create gradient checkpointing function
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
...
@@ -576,6 +579,9 @@ class Wav2Vec2Encoder(nn.Module):
...
@@ -576,6 +579,9 @@ class Wav2Vec2Encoder(nn.Module):
)
)
hidden_states
=
layer_outputs
[
0
]
hidden_states
=
layer_outputs
[
0
]
if
skip_the_layer
:
layer_outputs
=
(
None
,
None
)
if
output_attentions
:
if
output_attentions
:
all_self_attentions
=
all_self_attentions
+
(
layer_outputs
[
1
],)
all_self_attentions
=
all_self_attentions
+
(
layer_outputs
[
1
],)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment