Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
920da6da
Unverified
Commit
920da6da
authored
Dec 04, 2018
by
mcarilli
Committed by
GitHub
Dec 04, 2018
Browse files
Merge pull request #89 from ptrblck/update_examples
Update examples to PyTorch >=0.4.0
parents
0273d7ad
9ccebe5b
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
49 additions
and
90 deletions
+49
-90
examples/FP16_Optimizer_simple/closure.py
examples/FP16_Optimizer_simple/closure.py
+3
-4
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
...izer_simple/distributed_apex/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
...ributed_apex_legacy_launcher/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
...r_simple/distributed_pytorch/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/minimal.py
examples/FP16_Optimizer_simple/minimal.py
+2
-3
examples/FP16_Optimizer_simple/save_load.py
examples/FP16_Optimizer_simple/save_load.py
+2
-3
examples/distributed/main.py
examples/distributed/main.py
+1
-7
examples/imagenet/main.py
examples/imagenet/main.py
+4
-10
examples/imagenet/main_fp16_optimizer.py
examples/imagenet/main_fp16_optimizer.py
+4
-12
examples/imagenet/main_reducer.py
examples/imagenet/main_reducer.py
+4
-10
examples/word_language_model/generate.py
examples/word_language_model/generate.py
+14
-16
examples/word_language_model/main.py
examples/word_language_model/main.py
+3
-6
examples/word_language_model/main_fp16_optimizer.py
examples/word_language_model/main_fp16_optimizer.py
+3
-6
examples/word_language_model/model.py
examples/word_language_model/model.py
+3
-4
No files found.
examples/FP16_Optimizer_simple/closure.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
...
...
@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
...
...
@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
import
argparse
from
apex.fp16_utils
import
FP16_Optimizer
...
...
@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
...
...
examples/FP16_Optimizer_simple/minimal.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/FP16_Optimizer_simple/save_load.py
View file @
920da6da
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/distributed/main.py
View file @
920da6da
...
...
@@ -6,7 +6,6 @@ import torch.nn as nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
from
torchvision
import
datasets
,
transforms
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
to_python_float
#=====START: ADDED FOR DISTRIBUTED======
...
...
@@ -82,9 +81,6 @@ if args.distributed:
#=====END: ADDED FOR DISTRIBUTED======
torch
.
manual_seed
(
args
.
seed
)
if
args
.
cuda
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
args
.
cuda
else
{}
...
...
@@ -158,7 +154,6 @@ def train(epoch):
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
...
...
@@ -177,11 +172,10 @@ def test():
with
torch
.
no_grad
():
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
output
=
model
(
data
)
test_loss
+=
to_python_float
(
F
.
nll_loss
(
output
,
target
,
size_average
=
False
).
data
)
# sum up batch loss
pred
=
output
.
data
.
max
(
1
,
keepdim
=
True
)[
1
]
# get the index of the max log-probability
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
cpu
().
sum
()
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
float
().
cpu
().
sum
()
test_loss
/=
len
(
test_loader
.
dataset
)
...
...
examples/imagenet/main.py
View file @
920da6da
...
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
...
...
@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_fp16_optimizer.py
View file @
920da6da
...
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
...
...
@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
@@ -375,14 +371,10 @@ def validate(val_loader, model, criterion):
while
input
is
not
None
:
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_reducer.py
View file @
920da6da
...
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
...
...
@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
loss
=
criterion
(
output
,
target
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/word_language_model/generate.py
View file @
920da6da
...
...
@@ -8,7 +8,6 @@
import
argparse
import
torch
from
torch.autograd
import
Variable
import
data
...
...
@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
temperature
<
1e-3
:
parser
.
error
(
"--temperature has to be greater or equal 1e-3"
)
...
...
@@ -56,14 +53,15 @@ else:
corpus
=
data
.
Corpus
(
args
.
data
)
ntokens
=
len
(
corpus
.
dictionary
)
hidden
=
model
.
init_hidden
(
1
)
input
=
Variable
(
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
(),
volatile
=
True
)
if
args
.
cuda
:
input
.
data
=
input
.
data
.
cuda
()
with
torch
.
no_grad
():
input
=
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
()
if
args
.
cuda
:
input
=
input
.
cuda
()
with
open
(
args
.
outf
,
'w'
)
as
outf
:
with
open
(
args
.
outf
,
'w'
)
as
outf
:
for
i
in
range
(
args
.
words
):
output
,
hidden
=
model
(
input
,
hidden
)
word_weights
=
output
.
squeeze
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_weights
=
output
.
squeeze
().
float
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_idx
=
torch
.
multinomial
(
word_weights
,
1
)[
0
]
input
.
data
.
fill_
(
word_idx
)
word
=
corpus
.
dictionary
.
idx2word
[
word_idx
]
...
...
examples/word_language_model/main.py
View file @
920da6da
...
...
@@ -4,7 +4,6 @@ import time
import
math
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
model
...
...
@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
...
@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
else
:
...
...
@@ -136,8 +133,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
...
...
examples/word_language_model/main_fp16_optimizer.py
View file @
920da6da
...
...
@@ -4,7 +4,6 @@ import time
import
math
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
model
...
...
@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
...
@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
else
:
...
...
@@ -151,8 +148,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
...
...
examples/word_language_model/model.py
View file @
920da6da
import
torch.nn
as
nn
from
torch.autograd
import
Variable
class
RNNModel
(
nn
.
Module
):
...
...
@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
def
init_hidden
(
self
,
bsz
):
weight
=
next
(
self
.
parameters
()).
data
if
self
.
rnn_type
==
'LSTM'
:
return
(
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
,
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
)
return
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
(),
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
else
:
return
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
return
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment