Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
920da6da
Unverified
Commit
920da6da
authored
Dec 04, 2018
by
mcarilli
Committed by
GitHub
Dec 04, 2018
Browse files
Merge pull request #89 from ptrblck/update_examples
Update examples to PyTorch >=0.4.0
parents
0273d7ad
9ccebe5b
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
49 additions
and
90 deletions
+49
-90
examples/FP16_Optimizer_simple/closure.py
examples/FP16_Optimizer_simple/closure.py
+3
-4
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
...izer_simple/distributed_apex/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
...ributed_apex_legacy_launcher/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
...r_simple/distributed_pytorch/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/minimal.py
examples/FP16_Optimizer_simple/minimal.py
+2
-3
examples/FP16_Optimizer_simple/save_load.py
examples/FP16_Optimizer_simple/save_load.py
+2
-3
examples/distributed/main.py
examples/distributed/main.py
+1
-7
examples/imagenet/main.py
examples/imagenet/main.py
+4
-10
examples/imagenet/main_fp16_optimizer.py
examples/imagenet/main_fp16_optimizer.py
+4
-12
examples/imagenet/main_reducer.py
examples/imagenet/main_reducer.py
+4
-10
examples/word_language_model/generate.py
examples/word_language_model/generate.py
+14
-16
examples/word_language_model/main.py
examples/word_language_model/main.py
+3
-6
examples/word_language_model/main_fp16_optimizer.py
examples/word_language_model/main_fp16_optimizer.py
+3
-6
examples/word_language_model/model.py
examples/word_language_model/model.py
+3
-4
No files found.
examples/FP16_Optimizer_simple/closure.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
@@ -29,4 +28,4 @@ for t in range(5):
...
@@ -29,4 +28,4 @@ for t in range(5):
return
loss
return
loss
loss
=
optimizer
.
step
(
closure
)
loss
=
optimizer
.
step
(
closure
)
print
(
"final loss = "
,
loss
)
print
(
"final loss = "
,
loss
)
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
...
...
examples/FP16_Optimizer_simple/minimal.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/FP16_Optimizer_simple/save_load.py
View file @
920da6da
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/distributed/main.py
View file @
920da6da
...
@@ -6,7 +6,6 @@ import torch.nn as nn
...
@@ -6,7 +6,6 @@ import torch.nn as nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.optim
as
optim
from
torchvision
import
datasets
,
transforms
from
torchvision
import
datasets
,
transforms
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
to_python_float
from
apex.fp16_utils
import
to_python_float
#=====START: ADDED FOR DISTRIBUTED======
#=====START: ADDED FOR DISTRIBUTED======
...
@@ -82,9 +81,6 @@ if args.distributed:
...
@@ -82,9 +81,6 @@ if args.distributed:
#=====END: ADDED FOR DISTRIBUTED======
#=====END: ADDED FOR DISTRIBUTED======
torch
.
manual_seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
if
args
.
cuda
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
args
.
cuda
else
{}
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
args
.
cuda
else
{}
...
@@ -158,7 +154,6 @@ def train(epoch):
...
@@ -158,7 +154,6 @@ def train(epoch):
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
if
args
.
cuda
:
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
output
=
model
(
data
)
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
loss
=
F
.
nll_loss
(
output
,
target
)
...
@@ -177,11 +172,10 @@ def test():
...
@@ -177,11 +172,10 @@ def test():
with
torch
.
no_grad
():
with
torch
.
no_grad
():
if
args
.
cuda
:
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
output
=
model
(
data
)
output
=
model
(
data
)
test_loss
+=
to_python_float
(
F
.
nll_loss
(
output
,
target
,
size_average
=
False
).
data
)
# sum up batch loss
test_loss
+=
to_python_float
(
F
.
nll_loss
(
output
,
target
,
size_average
=
False
).
data
)
# sum up batch loss
pred
=
output
.
data
.
max
(
1
,
keepdim
=
True
)[
1
]
# get the index of the max log-probability
pred
=
output
.
data
.
max
(
1
,
keepdim
=
True
)[
1
]
# get the index of the max log-probability
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
cpu
().
sum
()
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
float
().
cpu
().
sum
()
test_loss
/=
len
(
test_loader
.
dataset
)
test_loss
/=
len
(
test_loader
.
dataset
)
...
...
examples/imagenet/main.py
View file @
920da6da
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
...
@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_fp16_optimizer.py
View file @
920da6da
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -375,14 +371,10 @@ def validate(val_loader, model, criterion):
...
@@ -375,14 +371,10 @@ def validate(val_loader, model, criterion):
while
input
is
not
None
:
while
input
is
not
None
:
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_reducer.py
View file @
920da6da
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
...
@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/word_language_model/generate.py
View file @
920da6da
...
@@ -8,7 +8,6 @@
...
@@ -8,7 +8,6 @@
import
argparse
import
argparse
import
torch
import
torch
from
torch.autograd
import
Variable
import
data
import
data
...
@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
...
@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
temperature
<
1e-3
:
if
args
.
temperature
<
1e-3
:
parser
.
error
(
"--temperature has to be greater or equal 1e-3"
)
parser
.
error
(
"--temperature has to be greater or equal 1e-3"
)
...
@@ -56,19 +53,20 @@ else:
...
@@ -56,19 +53,20 @@ else:
corpus
=
data
.
Corpus
(
args
.
data
)
corpus
=
data
.
Corpus
(
args
.
data
)
ntokens
=
len
(
corpus
.
dictionary
)
ntokens
=
len
(
corpus
.
dictionary
)
hidden
=
model
.
init_hidden
(
1
)
hidden
=
model
.
init_hidden
(
1
)
input
=
Variable
(
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
(),
volatile
=
True
)
with
torch
.
no_grad
():
if
args
.
cuda
:
input
=
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
()
input
.
data
=
input
.
data
.
cuda
()
if
args
.
cuda
:
input
=
input
.
cuda
()
with
open
(
args
.
outf
,
'w'
)
as
outf
:
with
open
(
args
.
outf
,
'w'
)
as
outf
:
for
i
in
range
(
args
.
words
):
for
i
in
range
(
args
.
words
):
output
,
hidden
=
model
(
input
,
hidden
)
output
,
hidden
=
model
(
input
,
hidden
)
word_weights
=
output
.
squeeze
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_weights
=
output
.
squeeze
().
float
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_idx
=
torch
.
multinomial
(
word_weights
,
1
)[
0
]
word_idx
=
torch
.
multinomial
(
word_weights
,
1
)[
0
]
input
.
data
.
fill_
(
word_idx
)
input
.
data
.
fill_
(
word_idx
)
word
=
corpus
.
dictionary
.
idx2word
[
word_idx
]
word
=
corpus
.
dictionary
.
idx2word
[
word_idx
]
outf
.
write
(
word
+
(
'
\n
'
if
i
%
20
==
19
else
' '
))
outf
.
write
(
word
+
(
'
\n
'
if
i
%
20
==
19
else
' '
))
if
i
%
args
.
log_interval
==
0
:
if
i
%
args
.
log_interval
==
0
:
print
(
'| Generated {}/{} words'
.
format
(
i
,
args
.
words
))
print
(
'| Generated {}/{} words'
.
format
(
i
,
args
.
words
))
examples/word_language_model/main.py
View file @
920da6da
...
@@ -4,7 +4,6 @@ import time
...
@@ -4,7 +4,6 @@ import time
import
math
import
math
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
data
import
model
import
model
...
@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
...
@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
...
@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
def
repackage_hidden
(
h
):
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
return
h
.
detach
()
else
:
else
:
...
@@ -136,8 +133,8 @@ def repackage_hidden(h):
...
@@ -136,8 +133,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
return
data
,
target
...
...
examples/word_language_model/main_fp16_optimizer.py
View file @
920da6da
...
@@ -4,7 +4,6 @@ import time
...
@@ -4,7 +4,6 @@ import time
import
math
import
math
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
data
import
model
import
model
...
@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
...
@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
...
@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
def
repackage_hidden
(
h
):
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
return
h
.
detach
()
else
:
else
:
...
@@ -151,8 +148,8 @@ def repackage_hidden(h):
...
@@ -151,8 +148,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
return
data
,
target
...
...
examples/word_language_model/model.py
View file @
920da6da
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
class
RNNModel
(
nn
.
Module
):
class
RNNModel
(
nn
.
Module
):
...
@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
...
@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
def
init_hidden
(
self
,
bsz
):
def
init_hidden
(
self
,
bsz
):
weight
=
next
(
self
.
parameters
()).
data
weight
=
next
(
self
.
parameters
()).
data
if
self
.
rnn_type
==
'LSTM'
:
if
self
.
rnn_type
==
'LSTM'
:
return
(
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
,
return
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
(),
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
)
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
else
:
else
:
return
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
return
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment