Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
59e992da
".github/git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "25e96f4246115a3deeec0afbd4ac52c47c0fa934"
Commit
59e992da
authored
Mar 08, 2019
by
Michael Carilli
Browse files
Stashing to test on the cluster
parent
40555b3a
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
73 additions
and
38 deletions
+73
-38
apex/amp/_amp_state.py
apex/amp/_amp_state.py
+25
-0
apex/amp/frontend.py
apex/amp/frontend.py
+19
-15
apex/amp/handle.py
apex/amp/handle.py
+4
-7
apex/amp/opt.py
apex/amp/opt.py
+2
-3
apex/amp/scaler.py
apex/amp/scaler.py
+8
-8
apex/fp16_utils/fp16_optimizer.py
apex/fp16_utils/fp16_optimizer.py
+6
-2
tests/L1/common/run_test.sh
tests/L1/common/run_test.sh
+8
-2
tests/L1/cross_product_distributed/run.sh
tests/L1/cross_product_distributed/run.sh
+1
-1
No files found.
apex/amp/_amp_state.py
View file @
59e992da
...
...
@@ -2,13 +2,19 @@
# I'm a C++ guy, not a python guy. I decided this approach because it seemed most C++-like.
# But apparently it's ok:
# http://effbot.org/pyfaq/how-do-i-share-global-variables-across-modules.htm
import
os
import
torch
class
AmpState
(
object
):
def
__init__
(
self
):
self
.
hard_override
=
False
self
.
verbosity
=
1
# Attribute stash. Could also just stash things as global module attributes.
_amp_state
=
AmpState
()
def
warn_or_err
(
msg
):
if
_amp_state
.
hard_override
:
print
(
"Warning: "
+
msg
)
...
...
@@ -18,11 +24,30 @@ def warn_or_err(msg):
# + " If you're sure you know what you're doing, supply " +
# "hard_override=True to amp.initialize.")
distributed
=
False
if
'WORLD_SIZE'
in
os
.
environ
:
distributed
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
>
1
def
maybe_print
(
msg
,
rank0
=
False
):
if
_amp_state
.
verbosity
>
0
:
if
rank0
:
if
distributed
:
if
torch
.
distributed
.
get_rank
()
==
0
:
print
(
msg
)
else
:
print
(
msg
)
else
:
print
(
msg
)
# def iter_params(param_groups):
# for group in param_groups:
# for p in group['params']:
# yield p
def
master_params
(
optimizer
):
"""
Generator expression that iterates over the params owned by ``optimizer``.
...
...
apex/amp/frontend.py
View file @
59e992da
import
torch
from
._initialize
import
_initialize
from
._amp_state
import
_amp_state
,
warn_or_err
from
._amp_state
import
_amp_state
,
warn_or_err
,
maybe_print
class
Properties
(
object
):
...
...
@@ -199,7 +199,8 @@ def initialize(
patch_torch_functions
=
None
,
keep_batchnorm_fp32
=
None
,
master_weights
=
None
,
loss_scale
=
None
loss_scale
=
None
,
verbosity
=
1
,
):
"""
Initialize your models, optimizers, and the Torch tensor and functional namespace according to the
...
...
@@ -219,7 +220,7 @@ def initialize(
optimizers (torch.optim.Optimizer or list of torch.optim.Optimizers): Optimizers to modify/cast.
enabled (bool, optional, default=True): If False, renders all Amp calls no-ops, so your script
should run as if Amp were not present.
opt_level(str, required): Pure or mixed precision optimization level. Accepted values are
opt_level
(str, required): Pure or mixed precision optimization level. Accepted values are
"O0", "O1", "O2", and "O3", explained in detail above.
cast_model_type (``torch.dtype``, optional, default=None): Optional property override, see
above.
...
...
@@ -227,8 +228,9 @@ def initialize(
keep_batchnorm_fp32 (bool or str, optional, default=None): Optional property override. If
passed as a string, must be the string "True" or "False".
master_weights (bool, optional, default=None): Optional property override.
loss_scale(float or str, default=None): Optional property override. If passed as a string,
loss_scale
(float or str, default=None): Optional property override. If passed as a string,
must be a string representing a number, e.g., "128.0", or the string "dynamic".
verbosity (int, default=1): Set to 0 to suppress Amp-related output.
Returns:
Model(s) and optimizer(s) modified according to the ``opt_level``.
...
...
@@ -266,8 +268,10 @@ def initialize(
.. _`Imagenet example`:
https://github.com/NVIDIA/apex/tree/master/examples/imagenet
"""
_amp_state
.
opt_properties
=
Properties
()
_amp_state
.
opt_properties
.
verbosity
=
verbosity
if
not
enabled
:
_amp_state
.
opt_properties
=
Properties
()
return
models
,
optimizers
if
opt_level
not
in
opt_levels
:
...
...
@@ -275,16 +279,16 @@ def initialize(
"Unexpected optimization level {}. "
.
format
(
opt_level
)
+
"Options are 'O0', 'O1', 'O2', 'O3'."
)
else
:
_amp_state
.
opt_properties
=
opt_levels
[
opt_level
](
P
roperties
()
)
print
(
"Selected optimization level {}"
.
format
(
opt_levels
[
opt_level
].
brief
))
print
(
"Defaults for this optimization level are:"
)
print
(
_amp_state
.
opt_properties
.
options
)
_amp_state
.
opt_properties
=
opt_levels
[
opt_level
](
_amp_state
.
opt_p
roperties
)
maybe_
print
(
"Selected optimization level {}"
.
format
(
opt_levels
[
opt_level
].
brief
)
,
True
)
maybe_
print
(
"Defaults for this optimization level are:"
,
True
)
maybe_
print
(
_amp_state
.
opt_properties
.
options
,
True
)
for
k
,
v
in
_amp_state
.
opt_properties
.
options
.
items
():
print
(
"{:22} : {}"
.
format
(
k
,
v
))
maybe_
print
(
"{:22} : {}"
.
format
(
k
,
v
)
,
True
)
print
(
"Processing user overrides (additional kwargs that are not None)..."
)
# I chose to have the keyword arguments listed directly in the argument list,
so I
# can't use kwargs.items() here.
maybe_
print
(
"Processing user overrides (additional kwargs that are not None)..."
,
True
)
# I chose to have the keyword arguments listed directly in the argument list,
#
instead of **kwargs, so I
can't use kwargs.items() here.
if
enabled
is
not
None
:
_amp_state
.
opt_properties
.
enabled
=
enabled
if
opt_level
is
not
None
:
...
...
@@ -300,9 +304,9 @@ def initialize(
if
loss_scale
is
not
None
:
_amp_state
.
opt_properties
.
loss_scale
=
loss_scale
print
(
"After processing overrides, optimization options are:"
)
maybe_
print
(
"After processing overrides, optimization options are:"
,
True
)
for
k
,
v
in
_amp_state
.
opt_properties
.
options
.
items
():
print
(
"{:22} : {}"
.
format
(
k
,
v
))
maybe_
print
(
"{:22} : {}"
.
format
(
k
,
v
)
,
True
)
return
_initialize
(
models
,
optimizers
,
_amp_state
.
opt_properties
)
...
...
apex/amp/handle.py
View file @
59e992da
import
contextlib
import
logging
import
warnings
import
torch
from
.
import
utils
from
.opt
import
OptimWrapper
from
.scaler
import
LossScaler
from
._amp_state
import
_amp_state
,
master_params
from
._amp_state
import
_amp_state
,
master_params
,
maybe_print
from
..fp16_utils
import
FP16_Optimizer
as
FP16_Optimizer_general
from
..optimizers
import
FP16_Optimizer
as
FP16_Optimizer_for_fused
...
...
@@ -106,9 +105,8 @@ def scale_loss(loss,
if
should_skip
:
optimizer_step
=
optimizer
.
step
def
skip_step
():
logger
=
logging
.
getLogger
(
'apex.amp'
)
logger
.
warning
(
"Gradient overflow. Skipping step, reducing "
+
"loss scale to {}"
.
format
(
optimizer
.
loss_scaler
.
loss_scale
()))
maybe_print
(
"Gradient overflow. Skipping step, reducing "
+
"loss scale to {}"
.
format
(
optimizer
.
loss_scaler
.
loss_scale
()))
optimizer
.
step
=
optimizer_step
optimizer
.
step
=
skip_step
# Probably ok to skip this if not delay_unscale
...
...
@@ -171,8 +169,7 @@ class AmpHandle(object):
if
should_skip
:
optimizer_step
=
optimizer
.
step
def
skip_step
():
logger
=
logging
.
getLogger
(
'apex.amp'
)
logger
.
warning
(
'Gradient overflow, skipping update'
)
maybe_print
(
'Gradient overflow, skipping update'
)
optimizer
.
step
=
optimizer_step
optimizer
.
step
=
skip_step
...
...
apex/amp/opt.py
View file @
59e992da
import
contextlib
import
logging
import
warnings
from
.scaler
import
LossScaler
,
master_params
from
._amp_state
import
maybe_print
import
numpy
as
np
...
...
@@ -71,8 +71,7 @@ class OptimWrapper(object):
'The `closure` argument is unsupported by the amp '
+
'optimizer wrapper.'
)
if
any
(
self
.
_skip_next
):
logger
=
logging
.
getLogger
(
'apex.amp'
)
logger
.
info
(
'Gradient overflow, skipping update'
)
maybe_print
(
'Gradient overflow, skipping update'
)
self
.
_skip_next
=
[
False
]
*
self
.
_num_loss
else
:
return
self
.
_optimizer
.
step
(
closure
=
closure
)
...
...
apex/amp/scaler.py
View file @
59e992da
import
torch
import
logging
from
..multi_tensor_apply
import
multi_tensor_applier
from
._amp_state
import
_amp_state
,
master_params
from
._amp_state
import
_amp_state
,
master_params
,
maybe_print
from
itertools
import
product
# from apex_C import scale_check_overflow
...
...
@@ -46,10 +45,12 @@ class LossScaler(object):
LossScaler
.
multi_tensor_scale_cuda
=
amp_C
.
multi_tensor_scale
else
:
if
not
LossScaler
.
warned_no_fused_kernel
:
print
(
"Warning: multi_tensor_applier fused unscale kernel is unavailable, "
"possibly because apex was installed without --cuda_ext --cpp_ext. "
"Using Python fallback. Original ImportError was: "
,
multi_tensor_applier
.
import_err
)
maybe_print
(
"Warning: multi_tensor_applier fused unscale kernel is unavailable, "
"possibly because apex was installed without --cuda_ext --cpp_ext. "
"Using Python fallback. Original ImportError was: "
+
multi_tensor_applier
.
import_err
,
True
)
LossScaler
.
has_fused_kernel
=
False
LossScaler
.
warned_no_fused_kernel
=
True
...
...
@@ -61,8 +62,7 @@ class LossScaler(object):
if
model
is
not
None
:
if
not
LossScaler
.
warned_unscaling_non_fp32_grad
:
if
master
.
type
()
!=
"torch.cuda.FloatTensor"
:
logger
=
logging
.
getLogger
(
"apex.amp"
)
logger
.
warning
(
maybe_print
(
"Attempting to unscale a grad with type {} "
.
format
(
master
.
type
())
+
"Unscaling non-fp32 grads may indicate an error. "
"When using Amp, you don't need to call .half() on your model."
)
...
...
apex/fp16_utils/fp16_optimizer.py
View file @
59e992da
...
...
@@ -4,6 +4,7 @@ from torch.autograd import Variable
from
torch.nn.parameter
import
Parameter
from
torch._utils
import
_flatten_dense_tensors
,
_unflatten_dense_tensors
from
..amp._amp_state
import
_amp_state
,
maybe_print
from
..amp.scaler
import
LossScaler
from
..multi_tensor_apply
import
multi_tensor_applier
from
.fp16util
import
model_grads_to_master_grads
,
master_params_to_model_params
,
clip_grad_norm
...
...
@@ -193,6 +194,8 @@ class FP16_Optimizer(object):
self
.
multi_tensor_scale
=
amp_C
.
multi_tensor_scale
self
.
_dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
]);
# Having self.maybe_print distinct from _amp_state.maybe_print is another artifact
# of having to support FP16_Optimizer separately, for the time being.
def
maybe_print
(
self
,
msg
):
if
self
.
verbose
:
print
(
msg
)
...
...
@@ -401,8 +404,9 @@ class FP16_Optimizer(object):
# self._update_scale(self.overflow)
if
self
.
overflow
:
print
(
"Gradient overflow. Skipping step, reducing "
+
"loss scale to {}"
.
format
(
self
.
loss_scaler
.
loss_scale
()))
# Using _amp_state.maybe_print instead of self.print here is intentional.
maybe_print
(
"Gradient overflow. Skipping step, reducing "
+
"loss scale to {}"
.
format
(
self
.
loss_scaler
.
loss_scale
()))
return
if
closure
is
not
None
:
...
...
tests/L1/common/run_test.sh
View file @
59e992da
...
...
@@ -6,8 +6,14 @@ print_banner() {
print_banner
"Distributed status:
$1
"
# DATADIR="/home/mcarilli/Desktop/pt18data/apex/examples/imagenet/bare_metal_train_val/"
DATADIR
=
"/opt/home/apex/examples/imagenet/"
echo
$2
if
[
-n
"
$2
"
]
then
DATADIR
=
"
$2
"
else
# DATADIR="/home/mcarilli/Desktop/pt18data/apex/examples/imagenet/bare_metal_train_val/"
DATADIR
=
"/opt/home/apex/examples/imagenet/"
fi
if
[
"
$1
"
==
"single_gpu"
]
then
...
...
tests/L1/cross_product_distributed/run.sh
View file @
59e992da
#!/bin/bash
cp
../common/
*
.
bash run_test.sh distributed
bash run_test.sh distributed
$1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment