Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
327b2446
Commit
327b2446
authored
Sep 13, 2018
by
Michael Carilli
Browse files
Fixing imagenet main.py and main_reducer.py to save and load master params
parent
b7025fc9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
36 deletions
+54
-36
examples/imagenet/main.py
examples/imagenet/main.py
+19
-12
examples/imagenet/main_fp16_optimizer.py
examples/imagenet/main_fp16_optimizer.py
+16
-12
examples/imagenet/main_reducer.py
examples/imagenet/main_reducer.py
+19
-12
No files found.
examples/imagenet/main.py
View file @
327b2446
...
@@ -139,19 +139,25 @@ def main():
...
@@ -139,19 +139,25 @@ def main():
momentum
=
args
.
momentum
,
momentum
=
args
.
momentum
,
weight_decay
=
args
.
weight_decay
)
weight_decay
=
args
.
weight_decay
)
#
o
ptionally resume from a checkpoint
#
O
ptionally resume from a checkpoint
if
args
.
resume
:
if
args
.
resume
:
if
os
.
path
.
isfile
(
args
.
resume
):
# Use a local scope to avoid dangling references
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
def
resume
():
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
if
os
.
path
.
isfile
(
args
.
resume
):
args
.
start_epoch
=
checkpoint
[
'epoch'
]
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
best_prec1
=
checkpoint
[
'best_prec1'
]
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
args
.
start_epoch
=
checkpoint
[
'epoch'
]
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
best_prec1
=
checkpoint
[
'best_prec1'
]
print
(
"=> loaded checkpoint '{}' (epoch {})"
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
saved_master_params
=
checkpoint
[
'master_params'
]
else
:
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
master
.
data
.
copy_
(
saved
.
data
)
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
print
(
"=> loaded checkpoint '{}' (epoch {})"
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
else
:
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
resume
()
# Data loading code
# Data loading code
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
...
@@ -219,6 +225,7 @@ def main():
...
@@ -219,6 +225,7 @@ def main():
'state_dict'
:
model
.
state_dict
(),
'state_dict'
:
model
.
state_dict
(),
'best_prec1'
:
best_prec1
,
'best_prec1'
:
best_prec1
,
'optimizer'
:
optimizer
.
state_dict
(),
'optimizer'
:
optimizer
.
state_dict
(),
'master_params'
:
master_params
,
},
is_best
)
},
is_best
)
class
data_prefetcher
():
class
data_prefetcher
():
...
...
examples/imagenet/main_fp16_optimizer.py
View file @
327b2446
...
@@ -144,19 +144,23 @@ def main():
...
@@ -144,19 +144,23 @@ def main():
static_loss_scale
=
args
.
static_loss_scale
,
static_loss_scale
=
args
.
static_loss_scale
,
dynamic_loss_scale
=
args
.
dynamic_loss_scale
)
dynamic_loss_scale
=
args
.
dynamic_loss_scale
)
#
o
ptionally resume from a checkpoint
#
O
ptionally resume from a checkpoint
if
args
.
resume
:
if
args
.
resume
:
if
os
.
path
.
isfile
(
args
.
resume
):
# Use a local scope to avoid dangling references
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
def
resume
():
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
if
os
.
path
.
isfile
(
args
.
resume
):
args
.
start_epoch
=
checkpoint
[
'epoch'
]
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
best_prec1
=
checkpoint
[
'best_prec1'
]
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
args
.
start_epoch
=
checkpoint
[
'epoch'
]
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
best_prec1
=
checkpoint
[
'best_prec1'
]
print
(
"=> loaded checkpoint '{}' (epoch {})"
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
# An FP16_Optimizer instance's state dict internally stashes the master params.
else
:
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
print
(
"=> loaded checkpoint '{}' (epoch {})"
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
else
:
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
resume
()
# Data loading code
# Data loading code
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
...
...
examples/imagenet/main_reducer.py
View file @
327b2446
...
@@ -139,19 +139,25 @@ def main():
...
@@ -139,19 +139,25 @@ def main():
momentum
=
args
.
momentum
,
momentum
=
args
.
momentum
,
weight_decay
=
args
.
weight_decay
)
weight_decay
=
args
.
weight_decay
)
#
o
ptionally resume from a checkpoint
#
O
ptionally resume from a checkpoint
if
args
.
resume
:
if
args
.
resume
:
if
os
.
path
.
isfile
(
args
.
resume
):
# Use a local scope to avoid dangling references
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
def
resume
():
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
if
os
.
path
.
isfile
(
args
.
resume
):
args
.
start_epoch
=
checkpoint
[
'epoch'
]
print
(
"=> loading checkpoint '{}'"
.
format
(
args
.
resume
))
best_prec1
=
checkpoint
[
'best_prec1'
]
checkpoint
=
torch
.
load
(
args
.
resume
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
args
.
gpu
))
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
args
.
start_epoch
=
checkpoint
[
'epoch'
]
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
best_prec1
=
checkpoint
[
'best_prec1'
]
print
(
"=> loaded checkpoint '{}' (epoch {})"
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
saved_master_params
=
checkpoint
[
'master_params'
]
else
:
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
master
.
data
.
copy_
(
saved
.
data
)
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
print
(
"=> loaded checkpoint '{}' (epoch {})"
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
else
:
print
(
"=> no checkpoint found at '{}'"
.
format
(
args
.
resume
))
resume
()
# Data loading code
# Data loading code
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
traindir
=
os
.
path
.
join
(
args
.
data
,
'train'
)
...
@@ -219,6 +225,7 @@ def main():
...
@@ -219,6 +225,7 @@ def main():
'state_dict'
:
model
.
state_dict
(),
'state_dict'
:
model
.
state_dict
(),
'best_prec1'
:
best_prec1
,
'best_prec1'
:
best_prec1
,
'optimizer'
:
optimizer
.
state_dict
(),
'optimizer'
:
optimizer
.
state_dict
(),
'master_params'
:
master_params
,
},
is_best
)
},
is_best
)
class
data_prefetcher
():
class
data_prefetcher
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment