Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
a76572dc
Unverified
Commit
a76572dc
authored
Mar 25, 2020
by
Shaden Smith
Committed by
GitHub
Mar 25, 2020
Browse files
Adding static loss scaling for ZeRO. (#166)
parent
012d91df
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
48 additions
and
4 deletions
+48
-4
.gitignore
.gitignore
+2
-0
deepspeed/pt/deepspeed_zero_optimizer.py
deepspeed/pt/deepspeed_zero_optimizer.py
+3
-4
tests/unit/test_fp16.py
tests/unit/test_fp16.py
+43
-0
No files found.
.gitignore
View file @
a76572dc
...
@@ -16,3 +16,5 @@ docs/code-docs/build
...
@@ -16,3 +16,5 @@ docs/code-docs/build
.sass-cache/
.sass-cache/
.jekyll-cache/
.jekyll-cache/
.jekyll-metadata
.jekyll-metadata
tests/unit/saved_checkpoint/
deepspeed/pt/deepspeed_zero_optimizer.py
View file @
a76572dc
...
@@ -12,7 +12,7 @@ import torch.distributed as dist
...
@@ -12,7 +12,7 @@ import torch.distributed as dist
import
math
import
math
from
torch._six
import
inf
from
torch._six
import
inf
from
deepspeed.pt.loss_scaler
import
DynamicLossScaler
from
deepspeed.pt.loss_scaler
import
LossScaler
,
DynamicLossScaler
from
deepspeed.pt.deepspeed_utils
import
get_grad_norm
,
CheckOverflow
from
deepspeed.pt.deepspeed_utils
import
get_grad_norm
,
CheckOverflow
...
@@ -175,15 +175,14 @@ class FP16_DeepSpeedZeroOptimizer(object):
...
@@ -175,15 +175,14 @@ class FP16_DeepSpeedZeroOptimizer(object):
# we may have a way of fusing dynamic scale. Do not support for now
# we may have a way of fusing dynamic scale. Do not support for now
if
dynamic_loss_scale
:
if
dynamic_loss_scale
:
self
.
dynamic_loss_scale
=
True
if
dynamic_loss_args
is
None
:
if
dynamic_loss_args
is
None
:
self
.
loss_scaler
=
DynamicLossScaler
()
self
.
loss_scaler
=
DynamicLossScaler
()
else
:
else
:
self
.
loss_scaler
=
DynamicLossScaler
(
**
dynamic_loss_args
)
self
.
loss_scaler
=
DynamicLossScaler
(
**
dynamic_loss_args
)
self
.
dynamic_loss_scale
=
True
else
:
else
:
self
.
dynamic_loss_scale
=
False
self
.
dynamic_loss_scale
=
False
self
.
loss_scaler
=
LossScaler
(
scale
=
static_loss_scale
)
self
.
cur_iter
=
0
self
.
cur_iter
=
0
self
.
mpu
=
mpu
self
.
mpu
=
mpu
...
...
tests/unit/test_fp16.py
View file @
a76572dc
...
@@ -246,3 +246,46 @@ def test_adam_fp16_zero_onecycle_compatibility(tmpdir):
...
@@ -246,3 +246,46 @@ def test_adam_fp16_zero_onecycle_compatibility(tmpdir):
_test_adam_fp16_zero_onecycle_compatibility
(
args
=
args
,
_test_adam_fp16_zero_onecycle_compatibility
(
args
=
args
,
model
=
model
,
model
=
model
,
hidden_dim
=
hidden_dim
)
hidden_dim
=
hidden_dim
)
def
test_zero_static_scale
(
tmpdir
):
config_dict
=
{
"train_batch_size"
:
4
,
"steps_per_print"
:
1
,
"optimizer"
:
{
"type"
:
"Adam"
,
"params"
:
{
"lr"
:
0.00015
}
},
"fp16"
:
{
"enabled"
:
True
,
"loss_scale"
:
138.
},
"zero_optimization"
:
True
}
args
=
args_from_dict
(
tmpdir
,
config_dict
)
@
distributed_test
(
world_size
=
2
)
def
_test_zero_static_scale
(
args
):
hidden_dim
=
10
model
=
SimpleModel
(
hidden_dim
,
empty_grad
=
True
)
model
,
optim
,
_
,
_
=
deepspeed
.
initialize
(
args
=
args
,
model
=
model
,
model_parameters
=
model
.
parameters
())
# Ensure the static scaler is configured.
assert
optim
.
dynamic_loss_scale
==
False
assert
optim
.
loss_scaler
.
loss_scale
==
138.
# Now make sure things work..
data_loader
=
random_dataloader
(
model
=
model
,
total_samples
=
10
,
hidden_dim
=
hidden_dim
,
device
=
model
.
device
)
for
n
,
batch
in
enumerate
(
data_loader
):
loss
=
model
(
batch
[
0
],
batch
[
1
])
model
.
backward
(
loss
)
model
.
step
()
_test_zero_static_scale
(
args
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment