Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
48f105d9
"vscode:/vscode.git/clone" did not exist on "8c507d92c0950305d376b19137b5d8cccccea457"
Commit
48f105d9
authored
Sep 14, 2018
by
Michael Carilli
Browse files
Only save and load master params if training with FP16
parent
327b2446
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
22 deletions
+34
-22
examples/imagenet/main.py
examples/imagenet/main.py
+17
-11
examples/imagenet/main_reducer.py
examples/imagenet/main_reducer.py
+17
-11
No files found.
examples/imagenet/main.py
View file @
48f105d9
...
@@ -149,9 +149,10 @@ def main():
...
@@ -149,9 +149,10 @@ def main():
args
.
start_epoch
=
checkpoint
[
'epoch'
]
args
.
start_epoch
=
checkpoint
[
'epoch'
]
best_prec1
=
checkpoint
[
'best_prec1'
]
best_prec1
=
checkpoint
[
'best_prec1'
]
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
saved_master_params
=
checkpoint
[
'master_params'
]
if
args
.
fp16
:
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
saved_master_params
=
checkpoint
[
'master_params'
]
master
.
data
.
copy_
(
saved
.
data
)
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
master
.
data
.
copy_
(
saved
.
data
)
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
print
(
"=> loaded checkpoint '{}' (epoch {})"
print
(
"=> loaded checkpoint '{}' (epoch {})"
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
...
@@ -219,14 +220,19 @@ def main():
...
@@ -219,14 +220,19 @@ def main():
if
args
.
local_rank
==
0
:
if
args
.
local_rank
==
0
:
is_best
=
prec1
>
best_prec1
is_best
=
prec1
>
best_prec1
best_prec1
=
max
(
prec1
,
best_prec1
)
best_prec1
=
max
(
prec1
,
best_prec1
)
save_checkpoint
({
# Use local scope to avoid dangling references
'epoch'
:
epoch
+
1
,
def
create_and_save_checkpoint
():
'arch'
:
args
.
arch
,
checkpoint_dict
=
{
'state_dict'
:
model
.
state_dict
(),
'epoch'
:
epoch
+
1
,
'best_prec1'
:
best_prec1
,
'arch'
:
args
.
arch
,
'optimizer'
:
optimizer
.
state_dict
(),
'state_dict'
:
model
.
state_dict
(),
'master_params'
:
master_params
,
'best_prec1'
:
best_prec1
,
},
is_best
)
'optimizer'
:
optimizer
.
state_dict
(),
}
if
args
.
fp16
:
checkpoint_dict
[
'master_params'
]
=
master_params
save_checkpoint
(
checkpoint_dict
,
is_best
)
create_and_save_checkpoint
()
class
data_prefetcher
():
class
data_prefetcher
():
def
__init__
(
self
,
loader
):
def
__init__
(
self
,
loader
):
...
...
examples/imagenet/main_reducer.py
View file @
48f105d9
...
@@ -149,9 +149,10 @@ def main():
...
@@ -149,9 +149,10 @@ def main():
args
.
start_epoch
=
checkpoint
[
'epoch'
]
args
.
start_epoch
=
checkpoint
[
'epoch'
]
best_prec1
=
checkpoint
[
'best_prec1'
]
best_prec1
=
checkpoint
[
'best_prec1'
]
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
model
.
load_state_dict
(
checkpoint
[
'state_dict'
])
saved_master_params
=
checkpoint
[
'master_params'
]
if
args
.
fp16
:
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
saved_master_params
=
checkpoint
[
'master_params'
]
master
.
data
.
copy_
(
saved
.
data
)
for
master
,
saved
in
zip
(
master_params
,
saved_master_params
):
master
.
data
.
copy_
(
saved
.
data
)
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
print
(
"=> loaded checkpoint '{}' (epoch {})"
print
(
"=> loaded checkpoint '{}' (epoch {})"
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
.
format
(
args
.
resume
,
checkpoint
[
'epoch'
]))
...
@@ -219,14 +220,19 @@ def main():
...
@@ -219,14 +220,19 @@ def main():
if
args
.
local_rank
==
0
:
if
args
.
local_rank
==
0
:
is_best
=
prec1
>
best_prec1
is_best
=
prec1
>
best_prec1
best_prec1
=
max
(
prec1
,
best_prec1
)
best_prec1
=
max
(
prec1
,
best_prec1
)
save_checkpoint
({
# Use local scope to avoid dangling references
'epoch'
:
epoch
+
1
,
def
create_and_save_checkpoint
():
'arch'
:
args
.
arch
,
checkpoint_dict
=
{
'state_dict'
:
model
.
state_dict
(),
'epoch'
:
epoch
+
1
,
'best_prec1'
:
best_prec1
,
'arch'
:
args
.
arch
,
'optimizer'
:
optimizer
.
state_dict
(),
'state_dict'
:
model
.
state_dict
(),
'master_params'
:
master_params
,
'best_prec1'
:
best_prec1
,
},
is_best
)
'optimizer'
:
optimizer
.
state_dict
(),
}
if
args
.
fp16
:
checkpoint_dict
[
'master_params'
]
=
master_params
save_checkpoint
(
checkpoint_dict
,
is_best
)
create_and_save_checkpoint
()
class
data_prefetcher
():
class
data_prefetcher
():
def
__init__
(
self
,
loader
):
def
__init__
(
self
,
loader
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment