Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
cc8f03c8
Commit
cc8f03c8
authored
May 14, 2018
by
Michael Carilli
Browse files
Multi-op sequence for ddp_race_condition_test.py
parent
47ac5c2b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
30 deletions
+22
-30
apex/fp16_utils/fp16util.py
apex/fp16_utils/fp16util.py
+3
-11
tests/distributed/ddp_race_condition_test.py
tests/distributed/ddp_race_condition_test.py
+19
-19
No files found.
apex/fp16_utils/fp16util.py
View file @
cc8f03c8
...
...
@@ -73,18 +73,10 @@ def prep_param_lists(model, flat_master=False):
# flatten_dense_tensors returns a contiguous flat array.
# http://pytorch.org/docs/master/_modules/torch/_utils.html
master_params
=
_flatten_dense_tensors
([
param
.
data
for
param
in
model_params
]).
float
()
except
TypeError
as
instance
:
# This is brittle, and depends on how cat chooses to word its error message.
if
"cat received an invalid combination of arguments"
not
in
instance
.
args
[
0
]:
raise
else
:
# If you append a message to the exception instance, via
# instance.args = instance.args + ("Error...",)
# this messes up the terminal-formatted printing of the instance's original message.
# Basic solution for now:
print
(
"Error in prep_param_lists: model likely contains a mixture of parameters "
except
:
print
(
"Error in prep_param_lists: model may contain a mixture of parameters "
"of different types. Use flat_master=False, or use F16_Optimizer."
)
raise
raise
master_params
=
torch
.
nn
.
Parameter
(
master_params
)
master_params
.
requires_grad
=
True
# master_params.register_hook(backwards_debug_hook)
...
...
tests/distributed/ddp_race_condition_test.py
View file @
cc8f03c8
...
...
@@ -24,40 +24,40 @@ args.distributed = args.world_size > 1
if
args
.
distributed
:
torch
.
cuda
.
set_device
(
args
.
rank
%
torch
.
cuda
.
device_count
())
dist
.
init_process_group
(
args
.
dist_backend
,
init_method
=
args
.
dist_url
,
world_size
=
args
.
world_size
)
rank
=
torch
.
distributed
.
get_rank
()
dist
.
init_process_group
(
args
.
dist_backend
,
init_method
=
args
.
dist_url
,
world_size
=
args
.
world_size
,
rank
=
args
.
rank
)
torch
.
set_printoptions
(
precision
=
10
)
class
Model
(
Module
):
def
__init__
(
self
):
super
(
Model
,
self
).
__init__
()
self
.
x
=
Parameter
(
torch
.
cuda
.
FloatTensor
(
1
,
4096
*
4096
).
fill_
(
1.0
))
self
.
a
=
Parameter
(
torch
.
cuda
.
FloatTensor
(
4096
*
4096
).
fill_
(
1.0
))
self
.
b
=
Parameter
(
torch
.
cuda
.
FloatTensor
(
4096
*
4096
).
fill_
(
2.0
))
def
forward
(
self
,
input
):
return
self
.
x
*
input
return
(
input
*
self
.
a
)
*
self
.
b
model
=
DDP
(
Model
(),
message_size
=
1
)
z
=
torch
.
cuda
.
FloatTensor
(
4096
*
4096
)
x
=
torch
.
cuda
.
FloatTensor
(
4096
*
4096
)
for
i
in
range
(
10
):
z
.
fill_
(
i
+
rank
)
# fill
z
with new values every iteration for sanity
x
.
fill_
(
i
+
args
.
rank
)
# fill
x
with new values every iteration for sanity
model
.
zero_grad
()
out
=
model
(
z
)
out
=
model
(
x
)
loss
=
out
.
sum
()
torch
.
cuda
.
nvtx
.
range_push
(
"backward"
)
loss
.
backward
()
torch
.
cuda
.
nvtx
.
range_pop
()
torch
.
cuda
.
nvtx
.
range_push
(
"synchronize() +
sum
"
)
torch
.
cuda
.
nvtx
.
range_push
(
"synchronize() +
info
"
)
torch
.
cuda
.
synchronize
()
for
param
in
model
.
parameters
():
print
(
"i = {},
\n
"
"param.grad.data_ptr() = {}
\n
"
"expected {},
\n
"
" got {}
\n
"
.
format
(
i
,
param
.
grad
.
data_ptr
(),
4096
*
4096
*
(
2.
*
i
+
1
)
/
2.
,
param
.
grad
.
data
.
sum
().
item
()))
print
(
"i = {}"
.
format
(
i
))
def
info
(
name
,
param
,
val
):
print
(
name
+
": grad.data_ptr() = {}, expected sum {}, got {}"
.
format
(
param
.
grad
.
data_ptr
(),
val
*
4096
*
4096
*
(
2.
*
i
+
1
)
/
2.
,
param
.
grad
.
data
.
sum
().
item
()))
info
(
"model.a"
,
model
.
module
.
a
,
2.
)
info
(
"model.b"
,
model
.
module
.
b
,
1.
)
torch
.
cuda
.
nvtx
.
range_pop
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment