Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
adad5996
Commit
adad5996
authored
Aug 12, 2019
by
Deyu Fu
Browse files
keep old fused* name and rename new optimizers without prefix
parent
4d6ed501
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
35 additions
and
30 deletions
+35
-30
apex/optimizers/__init__.py
apex/optimizers/__init__.py
+4
-4
apex/optimizers/adam.py
apex/optimizers/adam.py
+4
-4
apex/optimizers/fp16_optimizer.py
apex/optimizers/fp16_optimizer.py
+2
-1
apex/optimizers/fused_adam.py
apex/optimizers/fused_adam.py
+5
-4
apex/optimizers/fused_sgd.py
apex/optimizers/fused_sgd.py
+3
-0
apex/optimizers/novograd.py
apex/optimizers/novograd.py
+6
-6
apex/optimizers/sgd.py
apex/optimizers/sgd.py
+3
-3
tests/L0/run_mixed_adam/test_fp16_optimizer.py
tests/L0/run_mixed_adam/test_fp16_optimizer.py
+4
-4
tests/L0/run_mixed_adam/test_mixed_adam.py
tests/L0/run_mixed_adam/test_mixed_adam.py
+3
-3
tests/L0/run_optimizers/__init__.py
tests/L0/run_optimizers/__init__.py
+0
-0
tests/L0/run_optimizers/test_adam.py
tests/L0/run_optimizers/test_adam.py
+1
-1
No files found.
apex/optimizers/__init__.py
View file @
adad5996
from
.fused_sgd
import
FusedSGD
from
.novograd
import
FusedNovoGrad
from
.fused_adam_v1
import
FusedAdam_v1
from
.adam
import
FusedAdam
#from .sgd import FusedSGD
from
.fused_adam
import
FusedAdam
from
.fp16_optimizer
import
FP16_Optimizer
from
.sgd
import
SGD
from
.adam
import
Adam
from
.novograd
import
NovoGrad
apex/optimizers/adam.py
View file @
adad5996
...
...
@@ -2,7 +2,7 @@ import torch
from
apex.multi_tensor_apply
import
multi_tensor_applier
from
amp_C
import
multi_tensor_adam
class
Fused
Adam
(
torch
.
optim
.
Optimizer
):
class
Adam
(
torch
.
optim
.
Optimizer
):
"""Implements Adam algorithm. Currently GPU-only. Requires Apex to be installed via
``python setup.py install --cuda_ext --cpp_ext``.
...
...
@@ -45,7 +45,7 @@ class FusedAdam(torch.optim.Optimizer):
raise
RuntimeError
(
'FusedAdam does not support the AMSGrad variant.'
)
defaults
=
dict
(
lr
=
lr
,
bias_correction
=
bias_correction
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
)
super
(
Fused
Adam
,
self
).
__init__
(
params
,
defaults
)
super
(
Adam
,
self
).
__init__
(
params
,
defaults
)
self
.
eps_mode
=
0
if
eps_inside_sqrt
else
1
self
.
dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
])
...
...
@@ -57,8 +57,8 @@ class FusedAdam(torch.optim.Optimizer):
and returns the loss.
"""
if
any
(
p
is
not
None
for
p
in
[
grads
,
output_params
,
scale
,
grad_norms
]):
raise
RuntimeError
(
'
Fused
Adam has been updated, please use with AMP for mixed precision. '
'For legacy code using fp16_optimizer, use FusedAdam
_v1
.'
)
raise
RuntimeError
(
'Adam has been updated, please use with AMP for mixed precision. '
'For legacy code using fp16_optimizer, use FusedAdam.'
)
loss
=
None
if
closure
is
not
None
:
loss
=
closure
()
...
...
apex/optimizers/fp16_optimizer.py
View file @
adad5996
...
...
@@ -35,7 +35,8 @@ class FP16_Optimizer(object):
dynamic_loss_args
=
None
,
verbose
=
True
):
print
(
"
\n
fp16_optimizer will be removed in future. To update, use fused optimizers with AMP."
)
print
(
"
\n
fp16_optimizer is designed to work with apex.optimizers.Fused*, and will be removed in future"
)
print
(
"To update, use updated optimizers without Fused prefix with AMP."
)
# The fused optimizer does all the work. We need this layer for two reason:
# 1. maintain same user API from apex.fp16_utils
# 2. keep common stuff here in case we need to add new fused optimizer later
...
...
apex/optimizers/fused_adam
_v1
.py
→
apex/optimizers/fused_adam.py
View file @
adad5996
...
...
@@ -2,9 +2,9 @@ import types
import
torch
import
importlib
from
.
.multi_tensor_apply
import
multi_tensor_applier
from
apex
.multi_tensor_apply
import
multi_tensor_applier
class
FusedAdam
_v1
(
torch
.
optim
.
Optimizer
):
class
FusedAdam
(
torch
.
optim
.
Optimizer
):
"""Implements Adam algorithm. Currently GPU-only. Requires Apex to be installed via
``python setup.py install --cuda_ext --cpp_ext``.
...
...
@@ -40,6 +40,8 @@ class FusedAdam_v1(torch.optim.Optimizer):
betas
=
(
0.9
,
0.999
),
eps
=
1e-8
,
eps_inside_sqrt
=
False
,
weight_decay
=
0.
,
max_grad_norm
=
0.
,
amsgrad
=
False
,
use_mt
=
False
,
amp_scale_adjustment
=
1.0
):
print
(
"
\n
FusedAdam will be removed in future. To update, use apex.optimizers.Adam with AMP."
)
global
fused_adam_cuda
fused_adam_cuda
=
importlib
.
import_module
(
"fused_adam_cuda"
)
...
...
@@ -58,7 +60,7 @@ class FusedAdam_v1(torch.optim.Optimizer):
defaults
=
dict
(
lr
=
lr
,
bias_correction
=
bias_correction
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
max_grad_norm
=
max_grad_norm
)
super
(
FusedAdam
_v1
,
self
).
__init__
(
params
,
defaults
)
super
(
FusedAdam
,
self
).
__init__
(
params
,
defaults
)
self
.
eps_mode
=
0
if
eps_inside_sqrt
else
1
def
step
(
self
,
closure
=
None
,
grads
=
None
,
output_params
=
None
,
scale
=
1.
,
grad_norms
=
None
):
...
...
@@ -195,4 +197,3 @@ class FusedAdam_v1(torch.optim.Optimizer):
group
[
'weight_decay'
])
return
loss
apex/optimizers/fused_sgd.py
View file @
adad5996
...
...
@@ -53,6 +53,9 @@ class FusedSGD(Optimizer):
weight_decay
=
0
,
nesterov
=
False
,
wd_after_momentum
=
False
,
materialize_master_grads
=
True
):
print
(
"
\n
FusedSGD will be removed in future. To update, use apex.optimizers.SGD with AMP."
)
if
lr
is
not
required
and
lr
<
0.0
:
raise
ValueError
(
"Invalid learning rate: {}"
.
format
(
lr
))
if
momentum
<
0.0
:
...
...
apex/optimizers/novograd.py
View file @
adad5996
...
...
@@ -2,7 +2,7 @@ import torch
from
apex.multi_tensor_apply
import
multi_tensor_applier
from
amp_C
import
multi_tensor_novograd
class
Fused
NovoGrad
(
torch
.
optim
.
Optimizer
):
class
NovoGrad
(
torch
.
optim
.
Optimizer
):
"""Implements NovoGrad algorithm. Currently GPU-only. Requires Apex to be installed via
``python setup.py install --cuda_ext --cpp_ext``.
...
...
@@ -48,12 +48,12 @@ class FusedNovoGrad(torch.optim.Optimizer):
grad_averaging
=
True
,
norm_type
=
2
,
init_zero
=
False
,
set_grad_none
=
True
):
if
amsgrad
:
raise
RuntimeError
(
'
Fused
NovoGrad does not support the AMSGrad variant.'
)
raise
RuntimeError
(
'NovoGrad does not support the AMSGrad variant.'
)
defaults
=
dict
(
lr
=
lr
,
bias_correction
=
bias_correction
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
grad_averaging
=
grad_averaging
,
norm_type
=
norm_type
,
init_zero
=
init_zero
)
super
(
Fused
NovoGrad
,
self
).
__init__
(
params
,
defaults
)
super
(
NovoGrad
,
self
).
__init__
(
params
,
defaults
)
self
.
moment_mode
=
0
if
reg_inside_moment
else
1
self
.
dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
])
self
.
set_grad_none
=
set_grad_none
...
...
@@ -64,7 +64,7 @@ class FusedNovoGrad(torch.optim.Optimizer):
for
p
in
group
[
'params'
]:
p
.
grad
=
None
else
:
super
(
Fused
NovoGrad
,
self
).
zero_grad
()
super
(
NovoGrad
,
self
).
zero_grad
()
def
step
(
self
,
closure
=
None
):
"""Performs a single optimization step.
...
...
@@ -96,7 +96,7 @@ class FusedNovoGrad(torch.optim.Optimizer):
if
p
.
grad
is
None
:
continue
if
p
.
grad
.
data
.
is_sparse
:
raise
RuntimeError
(
'
Fused
NovoGrad does not support sparse gradients, please consider SparseAdam instead'
)
raise
RuntimeError
(
'NovoGrad does not support sparse gradients, please consider SparseAdam instead'
)
state
=
self
.
state
[
p
]
# State initialization
...
...
@@ -119,7 +119,7 @@ class FusedNovoGrad(torch.optim.Optimizer):
elif
group
[
'norm_type'
]
==
2
:
m2
=
[
torch
.
sum
(
torch
.
pow
(
g
,
2
)).
sqrt
().
item
()
for
g
in
g_list
]
else
:
raise
RuntimeError
(
'
Fused
NovoGrad only support l2/inf norm now.'
)
raise
RuntimeError
(
'NovoGrad only support l2/inf norm now.'
)
group
[
'exp_avg_sq'
]
=
torch
.
cuda
.
FloatTensor
(
m2
)
else
:
assert
(
len
(
g_list
)
==
group
[
'exp_avg_sq'
].
numel
())
...
...
apex/optimizers/sgd.py
View file @
adad5996
...
...
@@ -4,7 +4,7 @@ from torch.optim import Optimizer
from
amp_C
import
multi_tensor_axpby
from
apex.multi_tensor_apply
import
multi_tensor_applier
class
Fused
SGD
(
Optimizer
):
class
SGD
(
Optimizer
):
r
"""Implements stochastic gradient descent (optionally with momentum).
Nesterov momentum is based on the formula from
`On the importance of initialization and momentum in deep learning`__.
...
...
@@ -52,10 +52,10 @@ class FusedSGD(Optimizer):
weight_decay
=
weight_decay
,
nesterov
=
nesterov
)
if
nesterov
and
(
momentum
<=
0
or
dampening
!=
0
):
raise
ValueError
(
"Nesterov momentum requires a momentum and zero dampening"
)
super
(
Fused
SGD
,
self
).
__init__
(
params
,
defaults
)
super
(
SGD
,
self
).
__init__
(
params
,
defaults
)
def
__setstate__
(
self
,
state
):
super
(
Fused
SGD
,
self
).
__setstate__
(
state
)
super
(
SGD
,
self
).
__setstate__
(
state
)
for
group
in
self
.
param_groups
:
group
.
setdefault
(
'nesterov'
,
False
)
...
...
tests/L0/run_mixed_adam/test_fp16_optimizer.py
View file @
adad5996
...
...
@@ -36,7 +36,7 @@ class TestFP16Optimizer(unittest.TestCase):
ref_optim
=
torch
.
optim
.
Adam
(
self
.
ref_model
.
parameters
())
ref_optim
=
apex
.
fp16_utils
.
FP16_Optimizer
(
ref_optim
,
verbose
=
False
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
self
.
tst_model
.
parameters
())
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
self
.
tst_model
.
parameters
())
tst_optim
=
apex
.
optimizers
.
FP16_Optimizer
(
tst_optim
)
for
i
in
range
(
self
.
iters
):
...
...
@@ -58,7 +58,7 @@ class TestFP16Optimizer(unittest.TestCase):
ref_optim
=
torch
.
optim
.
Adam
(
self
.
ref_model
.
parameters
())
ref_optim
=
apex
.
fp16_utils
.
FP16_Optimizer
(
ref_optim
,
static_loss_scale
=
128.0
,
verbose
=
False
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
self
.
tst_model
.
parameters
())
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
self
.
tst_model
.
parameters
())
tst_optim
=
apex
.
optimizers
.
FP16_Optimizer
(
tst_optim
,
static_loss_scale
=
128.0
)
for
i
in
range
(
self
.
iters
):
...
...
@@ -81,7 +81,7 @@ class TestFP16Optimizer(unittest.TestCase):
ref_optim
=
apex
.
fp16_utils
.
FP16_Optimizer
(
ref_optim
,
verbose
=
False
)
tst_groups
=
[{
'params'
:
[
self
.
tst_model
.
weight
]},{
'params'
:
[
self
.
tst_model
.
bias
]}]
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
tst_groups
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
tst_groups
)
tst_optim
=
apex
.
optimizers
.
FP16_Optimizer
(
tst_optim
)
for
i
in
range
(
self
.
iters
):
...
...
@@ -101,7 +101,7 @@ class TestFP16Optimizer(unittest.TestCase):
ref_optim
=
torch
.
optim
.
Adam
(
self
.
ref_model
.
parameters
())
ref_optim
=
apex
.
fp16_utils
.
FP16_Optimizer
(
ref_optim
,
verbose
=
False
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
self
.
tst_model
.
parameters
(),
max_grad_norm
=
0.01
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
self
.
tst_model
.
parameters
(),
max_grad_norm
=
0.01
)
tst_optim
=
apex
.
optimizers
.
FP16_Optimizer
(
tst_optim
)
for
i
in
range
(
self
.
iters
):
...
...
tests/L0/run_mixed_adam/test_mixed_adam.py
View file @
adad5996
...
...
@@ -24,10 +24,10 @@ class TestFusedAdam(unittest.TestCase):
ref_optim
=
torch
.
optim
.
Adam
(
ref_param
,
**
ref_adam_option
)
if
tst_adam_option
:
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
tst_param
,
**
tst_adam_option
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
tst_param
,
**
tst_adam_option
)
else
:
tst_optim
=
apex
.
optimizers
.
FusedAdam
_v1
(
tst_param
,
**
ref_adam_option
)
tst_optim
=
apex
.
optimizers
.
FusedAdam
(
tst_param
,
**
ref_adam_option
)
return
(
ref_param
,
tst_param
,
ref_optim
,
tst_optim
)
def
gen_grad
(
self
,
ref_param
,
tst_param
):
...
...
tests/L0/run_
fused_
optimizers/__init__.py
→
tests/L0/run_optimizers/__init__.py
View file @
adad5996
File moved
tests/L0/run_
fused_
optimizers/test_
fused_
adam.py
→
tests/L0/run_optimizers/test_adam.py
View file @
adad5996
...
...
@@ -23,7 +23,7 @@ class TestFusedAdam(unittest.TestCase):
tst_param
.
append
(
torch
.
nn
.
Parameter
(
tensor
.
clone
()))
ref_optim
=
torch
.
optim
.
Adam
(
ref_param
,
**
adam_option
)
tst_optim
=
apex
.
optimizers
.
Fused
Adam
(
tst_param
,
**
adam_option
)
tst_optim
=
apex
.
optimizers
.
Adam
(
tst_param
,
**
adam_option
)
return
(
ref_param
,
tst_param
,
ref_optim
,
tst_optim
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment