Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
2f0bf594
Unverified
Commit
2f0bf594
authored
Mar 08, 2019
by
Deyu Fu
Committed by
GitHub
Mar 08, 2019
Browse files
Merge branch 'master' into deyuf/update_norm
parents
99495376
40555b3a
Changes
91
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
145 additions
and
161 deletions
+145
-161
tests/L1/common/run_test.sh
tests/L1/common/run_test.sh
+137
-0
tests/L1/cross_product/run.sh
tests/L1/cross_product/run.sh
+4
-0
tests/L1/cross_product_distributed/run.sh
tests/L1/cross_product_distributed/run.sh
+4
-0
tests/RNN/RNN_tests.py
tests/RNN/RNN_tests.py
+0
-118
tests/distributed/DDP/ddp_race_condition_test.py
tests/distributed/DDP/ddp_race_condition_test.py
+0
-0
tests/distributed/DDP/run_race_test.sh
tests/distributed/DDP/run_race_test.sh
+0
-0
tests/distributed/synced_batchnorm/single_gpu_unit_test.py
tests/distributed/synced_batchnorm/single_gpu_unit_test.py
+0
-0
tests/distributed/synced_batchnorm/two_gpu_unit_test.py
tests/distributed/synced_batchnorm/two_gpu_unit_test.py
+0
-0
tests/distributed/synced_batchnorm/unit_test.sh
tests/distributed/synced_batchnorm/unit_test.sh
+0
-0
tests/run_fp16_optimizer/test_fp16_optimizer.py
tests/run_fp16_optimizer/test_fp16_optimizer.py
+0
-43
tests/run_mixed_adam/__init__.py
tests/run_mixed_adam/__init__.py
+0
-0
No files found.
tests/L1/common/run_test.sh
0 → 100644
View file @
2f0bf594
#!/bin/bash
print_banner
()
{
printf
"
\n\n\n\e
[30m
\e
[42m
$1
\e
[0m
\n\n\n\n
"
}
print_banner
"Distributed status:
$1
"
# DATADIR="/home/mcarilli/Desktop/pt18data/apex/examples/imagenet/bare_metal_train_val/"
DATADIR
=
"/opt/home/apex/examples/imagenet/"
if
[
"
$1
"
==
"single_gpu"
]
then
BASE_CMD
=
"python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi
if
[
"
$1
"
==
"distributed"
]
then
BASE_CMD
=
"python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi
ADAM_ARGS
=
"--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"
keep_batchnorms
=(
""
"--keep-batchnorm-fp32 True"
"--keep-batchnorm-fp32 False"
)
loss_scales
=(
""
"--loss-scale 1.0"
"--loss-scale 128.0"
"--loss-scale dynamic"
)
opt_levels
=(
"O0"
"O1"
"O2"
"O3"
)
rm
True
*
rm
False
*
set
-e
print_banner
"Installing Apex with --cuda_ext and --cpp_ext"
pushd
../../..
python setup.py
install
--cuda_ext
--cpp_ext
popd
for
opt_level
in
"
${
opt_levels
[@]
}
"
do
for
loss_scale
in
"
${
loss_scales
[@]
}
"
do
for
keep_batchnorm
in
"
${
keep_batchnorms
[@]
}
"
do
if
[
"
$opt_level
"
==
"O1"
]
&&
[
-n
"
${
keep_batchnorm
}
"
]
then
print_banner
"Skipping
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
"
continue
fi
print_banner
"
${
BASE_CMD
}
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
--has-ext
$DATADIR
"
set
-x
${
BASE_CMD
}
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
--has-ext
$DATADIR
set
+x
done
done
done
# Handle FusedAdam separately due to limited support.
# FusedAdam will not be tested for bitwise accuracy against the Python implementation.
# The L0 tests already do so. These tests are here to ensure that it actually runs,
# and get an idea of performance.
for
loss_scale
in
"
${
loss_scales
[@]
}
"
do
print_banner
"
${
BASE_CMD
}
${
ADAM_ARGS
}
${
loss_scale
}
--has-ext
$DATADIR
"
set
-x
${
BASE_CMD
}
${
ADAM_ARGS
}
${
loss_scale
}
--has-ext
$DATADIR
set
+x
done
print_banner
"Reinstalling apex without extensions"
pushd
../../..
python setup.py
install
popd
for
opt_level
in
"
${
opt_levels
[@]
}
"
do
for
loss_scale
in
"
${
loss_scales
[@]
}
"
do
for
keep_batchnorm
in
"
${
keep_batchnorms
[@]
}
"
do
if
[
"
$opt_level
"
==
"O1"
]
&&
[
-n
"
${
keep_batchnorm
}
"
]
then
print_banner
"Skipping
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
"
continue
fi
print_banner
"
${
BASE_CMD
}
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
$DATADIR
"
set
-x
${
BASE_CMD
}
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
$DATADIR
set
+x
done
done
done
print_banner
"Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"
for
opt_level
in
"
${
opt_levels
[@]
}
"
do
for
loss_scale
in
"
${
loss_scales
[@]
}
"
do
for
keep_batchnorm
in
"
${
keep_batchnorms
[@]
}
"
do
echo
""
if
[
"
$opt_level
"
==
"O1"
]
&&
[
-n
"
${
keep_batchnorm
}
"
]
then
echo
"Skipping
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
"
continue
fi
echo
"
${
BASE_CMD
}
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
[--has-ext]
$DATADIR
"
set
-x
python compare.py
--opt-level
${
opt_level
}
${
loss_scale
}
${
keep_batchnorm
}
set
+x
done
done
done
print_banner
"Reinstalling Apex with --cuda_ext and --cpp_ext"
pushd
../../..
python setup.py
install
--cuda_ext
--cpp_ext
popd
tests/L1/cross_product/run.sh
0 → 100644
View file @
2f0bf594
#!/bin/bash
cp
../common/
*
.
bash run_test.sh single_gpu
tests/L1/cross_product_distributed/run.sh
0 → 100644
View file @
2f0bf594
#!/bin/bash
cp
../common/
*
.
bash run_test.sh distributed
tests/RNN/RNN_tests.py
deleted
100644 → 0
View file @
99495376
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
apex
from
apex.RNN.models
import
bidirectionalRNN
,
stackedRNN
,
RNNCell
from
torch.nn._functions.rnn
import
LSTMCell
import
itertools
torch
.
backends
.
cudnn
.
enabled
=
False
batch_first
=
False
#not implemented yet
dropout
=
0.0
#How to validate?
bidirectional
=
False
#True works, but differs in definition to PyTorch
rnn_types
=
[
'LSTM'
,
'GRU'
,
'ReLU'
,
'Tanh'
]
sizes
=
[
8
,
4
,
2
]
seq_sizes
=
sizes
hidden_sizes
=
sizes
inp_sizes
=
sizes
batch_sizes
=
sizes
num_layerss
=
sizes
biases
=
[
True
]
def
copy_param_set
(
pyt_rnn
,
my_rnn
,
layer
=
0
,
reverse
=
False
):
my_params
=
None
rnn
=
None
if
isinstance
(
my_rnn
,
bidirectionalRNN
):
rnn
=
my_rnn
.
fwd
.
rnns
[
layer
]
if
not
reverse
else
my_rnn
.
bckwrd
.
rnns
[
layer
]
elif
isinstance
(
my_rnn
,
stackedRNN
):
rnn
=
my_rnn
.
rnns
[
layer
]
else
:
raise
RuntimeError
()
param_names
=
[
'w_ih'
,
'w_hh'
,
'b_ih'
,
'b_hh'
]
if
not
hasattr
(
rnn
,
'b_hh'
):
param_names
=
param_names
[:
2
]
my_params
=
[
getattr
(
rnn
,
param_name
)
for
param_name
in
param_names
]
pyt_params
=
None
param_names
=
[
'weight_ih_'
,
'weight_hh_'
,
'bias_ih_'
,
'bias_hh_'
]
reverse_str
=
'_reverse'
if
reverse
else
''
if
not
hasattr
(
pyt_rnn
,
'bias_hh_l0'
):
param_names
=
param_names
[:
2
]
pyt_params
=
[
getattr
(
pyt_rnn
,
param_name
+
'l'
+
str
(
layer
)
+
reverse_str
)
for
param_name
in
param_names
]
for
pyt_param
,
my_param
in
zip
(
pyt_params
,
my_params
):
pyt_param
.
data
.
copy_
(
my_param
.
data
)
def
copy_all_params
(
pyt_rnn
,
my_rnn
):
for
layer
in
range
(
num_layers
):
copy_param_set
(
pyt_rnn
,
my_rnn
,
layer
)
if
bidirectional
:
copy_param_set
(
pyt_rnn
,
my_rnn
,
layer
,
bidirectional
)
def
compare_variables
(
v1
,
v2
,
msg
,
params
):
diff
=
float
((
v1
.
data
-
v2
.
data
).
abs
().
max
())
if
diff
>
1e-5
:
print
(
"Error of "
,
diff
,
" found for "
,
msg
,
" for case: "
,
str
(
params
))
def
compare_tuple_variables
(
t1
,
t2
,
msg
,
params
):
for
var1
,
var2
in
zip
(
t1
,
t2
):
compare_variables
(
var1
,
var2
,
msg
,
params
)
def
maybe_compare
(
v1
,
v2
,
msg
,
params
):
if
isinstance
(
v1
,
Variable
)
and
isinstance
(
v2
,
Variable
):
compare_variables
(
v1
,
v2
,
msg
,
params
)
else
:
compare_tuple_variables
(
v1
,
v2
,
msg
,
params
)
product
=
list
(
itertools
.
product
(
rnn_types
,
seq_sizes
,
hidden_sizes
,
inp_sizes
,
batch_sizes
,
num_layerss
,
biases
))
for
test_case
in
product
:
rnn_type
,
seq_size
,
hidden_size
,
inp_size
,
batch_size
,
num_layers
,
bias
=
test_case
inp
=
torch
.
cuda
.
FloatTensor
(
seq_size
,
batch_size
,
inp_size
).
uniform_
()
if
rnn_type
==
'ReLU'
or
rnn_type
==
'Tanh'
:
pytorch_rnn
=
nn
.
RNN
(
inp_size
,
hidden_size
,
num_layers
,
bias
,
batch_first
,
dropout
,
bidirectional
,
nonlinearity
=
rnn_type
.
lower
()).
cuda
()
else
:
pytorch_rnn
=
getattr
(
nn
,
rnn_type
)(
inp_size
,
hidden_size
,
num_layers
,
bias
,
batch_first
,
dropout
,
bidirectional
).
cuda
()
my_rnn
=
getattr
(
apex
.
RNN
.
models
,
rnn_type
)(
inp_size
,
hidden_size
,
num_layers
,
bias
,
batch_first
,
dropout
,
bidirectional
).
cuda
()
copy_all_params
(
pytorch_rnn
,
my_rnn
)
pyt_inp
=
Variable
(
inp
,
requires_grad
=
True
)
my_inp
=
Variable
(
inp
,
requires_grad
=
True
)
my_out
,
my_hiddens
=
my_rnn
(
my_inp
)
pyt_out
,
pyt_hiddens
=
pytorch_rnn
(
pyt_inp
)
pyt_out
.
sum
().
backward
()
my_out
.
sum
().
backward
()
maybe_compare
(
pyt_out
,
my_out
,
"out"
,
test_case
)
#If there's only one hidden state PyTorch doesn't return it in a tuple,
#apex does, so we wrap PyTorch's returned hidden state in a tuple.
if
not
isinstance
(
pyt_hiddens
,
tuple
):
pyt_hiddens
=
(
pyt_hiddens
,)
try
:
for
i
,
(
pyt_hid
,
my_hid
)
in
enumerate
(
zip
(
pyt_hiddens
,
my_hiddens
)):
maybe_compare
(
pyt_hid
,
my_hid
,
"hx_"
+
str
(
i
),
test_case
)
except
ValueError
:
maybe_compare
(
pyt_hiddens
,
my_hiddens
,
"hx_0"
,
test_case
)
maybe_compare
(
pyt_inp
.
grad
,
my_inp
.
grad
,
"inp.grad"
,
test_case
)
print
(
"Test passed."
)
tests/distributed/ddp_race_condition_test.py
→
tests/distributed/
DDP/
ddp_race_condition_test.py
View file @
2f0bf594
File moved
tests/distributed/run_race_test.sh
→
tests/distributed/
DDP/
run_race_test.sh
View file @
2f0bf594
File moved
tests/synced_batchnorm/single_gpu_unit_test.py
→
tests/
distributed/
synced_batchnorm/single_gpu_unit_test.py
View file @
2f0bf594
File moved
tests/synced_batchnorm/two_gpu_unit_test.py
→
tests/
distributed/
synced_batchnorm/two_gpu_unit_test.py
View file @
2f0bf594
File moved
tests/synced_batchnorm/unit_test.sh
→
tests/
distributed/
synced_batchnorm/unit_test.sh
View file @
2f0bf594
File moved
tests/run_fp16_optimizer/test_fp16_optimizer.py
deleted
100644 → 0
View file @
99495376
import
unittest
import
functools
as
ft
import
itertools
as
it
import
torch
from
apex.fp16_utils
import
FP16_Optimizer
# Currently no-ops (tested via examples).
# FP16_Optimizer to be deprecated and moved under unified Amp API.
class
TestFP16Optimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
N
,
D_in
,
D_out
=
64
,
1024
,
16
self
.
N
=
N
self
.
D_in
=
D_in
self
.
D_out
=
D_out
self
.
x
=
torch
.
randn
((
N
,
D_in
),
dtype
=
torch
.
float16
,
device
=
'cuda'
)
self
.
y
=
torch
.
randn
((
N
,
D_out
),
dtype
=
torch
.
float16
,
device
=
'cuda'
)
self
.
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
# def tearDown(self):
# pass
def
test_minimal
(
self
):
pass
def
test_minimal_static
(
self
):
pass
def
test_minimal_dynamic
(
self
):
pass
def
test_closure
(
self
):
pass
def
test_closure_dynamic
(
self
):
pass
def
test_save_load
(
self
):
pass
if
__name__
==
'__main__'
:
unittest
.
main
()
tests/run_mixed_adam/__init__.py
deleted
100644 → 0
View file @
99495376
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment