Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
f29b3f8d
Commit
f29b3f8d
authored
Jun 21, 2019
by
Michael Carilli
Browse files
Make main_amp.py more profiling-friendly
parent
4b9858ec
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
14 deletions
+29
-14
examples/imagenet/README.md
examples/imagenet/README.md
+6
-0
examples/imagenet/main_amp.py
examples/imagenet/main_amp.py
+23
-14
No files found.
examples/imagenet/README.md
View file @
f29b3f8d
...
@@ -173,3 +173,9 @@ Running with the `--deterministic` flag should produce bitwise identical outputs
...
@@ -173,3 +173,9 @@ Running with the `--deterministic` flag should produce bitwise identical outputs
regardless of what other options are used (see
[
Pytorch docs on reproducibility
](
https://pytorch.org/docs/stable/notes/randomness.html
)
).
regardless of what other options are used (see
[
Pytorch docs on reproducibility
](
https://pytorch.org/docs/stable/notes/randomness.html
)
).
Since
`--deterministic`
disables
`torch.backends.cudnn.benchmark`
,
`--deterministic`
may
Since
`--deterministic`
disables
`torch.backends.cudnn.benchmark`
,
`--deterministic`
may
cause a modest performance decrease.
cause a modest performance decrease.
## Profiling
If you're curious how the network actually looks on the CPU and GPU timelines (for example, how good is the overall utilization?
Is the prefetcher really overlapping data transfers?) try profiling
`main_amp.py`
.
[
Detailed instructions can be found here
](
https://gist.github.com/mcarilli/213a4e698e4a0ae2234ddee56f4f3f95
)
.
examples/imagenet/main_amp.py
View file @
f29b3f8d
...
@@ -60,7 +60,7 @@ parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
...
@@ -60,7 +60,7 @@ parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
parser
.
add_argument
(
'--pretrained'
,
dest
=
'pretrained'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--pretrained'
,
dest
=
'pretrained'
,
action
=
'store_true'
,
help
=
'use pre-trained model'
)
help
=
'use pre-trained model'
)
parser
.
add_argument
(
'--prof'
,
de
st
=
'prof'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--prof'
,
de
fault
=-
1
,
type
=
int
,
help
=
'Only run 10 iterations for profiling.'
)
help
=
'Only run 10 iterations for profiling.'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
)
...
@@ -236,8 +236,7 @@ def main():
...
@@ -236,8 +236,7 @@ def main():
# train for one epoch
# train for one epoch
train
(
train_loader
,
model
,
criterion
,
optimizer
,
epoch
)
train
(
train_loader
,
model
,
criterion
,
optimizer
,
epoch
)
if
args
.
prof
:
break
# evaluate on validation set
# evaluate on validation set
prec1
=
validate
(
val_loader
,
model
,
criterion
)
prec1
=
validate
(
val_loader
,
model
,
criterion
)
...
@@ -323,33 +322,34 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -323,33 +322,34 @@ def train(train_loader, model, criterion, optimizer, epoch):
i
=
0
i
=
0
while
input
is
not
None
:
while
input
is
not
None
:
i
+=
1
i
+=
1
if
args
.
prof
>=
0
and
i
==
args
.
prof
:
print
(
"Profiling begun at iteration {}"
.
format
(
i
))
torch
.
cuda
.
cudart
().
cudaProfilerStart
()
adjust_learning_rate
(
optimizer
,
epoch
,
i
,
len
(
train_loader
))
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_push
(
"Body of iteration {}"
.
format
(
i
))
if
args
.
prof
:
adjust_learning_rate
(
optimizer
,
epoch
,
i
,
len
(
train_loader
))
if
i
>
10
:
break
# compute output
# compute output
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_push
(
"forward"
)
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_push
(
"forward"
)
output
=
model
(
input
)
output
=
model
(
input
)
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_pop
()
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_pop
()
loss
=
criterion
(
output
,
target
)
loss
=
criterion
(
output
,
target
)
# compute gradient and do SGD step
# compute gradient and do SGD step
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_push
(
"backward"
)
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_push
(
"backward"
)
with
amp
.
scale_loss
(
loss
,
optimizer
)
as
scaled_loss
:
with
amp
.
scale_loss
(
loss
,
optimizer
)
as
scaled_loss
:
scaled_loss
.
backward
()
scaled_loss
.
backward
()
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_pop
()
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_pop
()
# for param in model.parameters():
# for param in model.parameters():
# print(param.data.double().sum().item(), param.grad.data.double().sum().item())
# print(param.data.double().sum().item(), param.grad.data.double().sum().item())
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_push
(
"step"
)
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_push
(
"
optimizer.
step
()
"
)
optimizer
.
step
()
optimizer
.
step
()
if
args
.
prof
:
torch
.
cuda
.
nvtx
.
range_pop
()
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_pop
()
if
i
%
args
.
print_freq
==
0
:
if
i
%
args
.
print_freq
==
0
:
# Every print_freq iterations, check the loss, accuracy, and speed.
# Every print_freq iterations, check the loss, accuracy, and speed.
...
@@ -388,8 +388,17 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -388,8 +388,17 @@ def train(train_loader, model, criterion, optimizer, epoch):
args
.
world_size
*
args
.
batch_size
/
batch_time
.
avg
,
args
.
world_size
*
args
.
batch_size
/
batch_time
.
avg
,
batch_time
=
batch_time
,
batch_time
=
batch_time
,
loss
=
losses
,
top1
=
top1
,
top5
=
top5
))
loss
=
losses
,
top1
=
top1
,
top5
=
top5
))
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_push
(
"prefetcher.next()"
)
input
,
target
=
prefetcher
.
next
()
input
,
target
=
prefetcher
.
next
()
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_pop
()
# Pop range "Body of iteration {}".format(i)
if
args
.
prof
>=
0
:
torch
.
cuda
.
nvtx
.
range_pop
()
if
args
.
prof
>=
0
and
i
==
args
.
prof
+
10
:
print
(
"Profiling ended at iteration {}"
.
format
(
i
))
torch
.
cuda
.
cudart
().
cudaProfilerStop
()
quit
()
def
validate
(
val_loader
,
model
,
criterion
):
def
validate
(
val_loader
,
model
,
criterion
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment