Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
28bdc04e
Commit
28bdc04e
authored
Dec 02, 2018
by
ptrblck
Browse files
update examples to PyTorch >=0.4.0
parent
bc62f325
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
49 additions
and
88 deletions
+49
-88
examples/FP16_Optimizer_simple/closure.py
examples/FP16_Optimizer_simple/closure.py
+3
-4
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
...izer_simple/distributed_apex/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
...ributed_apex_legacy_launcher/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
...r_simple/distributed_pytorch/distributed_data_parallel.py
+2
-3
examples/FP16_Optimizer_simple/minimal.py
examples/FP16_Optimizer_simple/minimal.py
+2
-3
examples/FP16_Optimizer_simple/save_load.py
examples/FP16_Optimizer_simple/save_load.py
+2
-3
examples/distributed/main.py
examples/distributed/main.py
+1
-7
examples/imagenet/main.py
examples/imagenet/main.py
+4
-10
examples/imagenet/main_fp16_optimizer.py
examples/imagenet/main_fp16_optimizer.py
+4
-10
examples/imagenet/main_reducer.py
examples/imagenet/main_reducer.py
+4
-10
examples/word_language_model/generate.py
examples/word_language_model/generate.py
+14
-16
examples/word_language_model/main.py
examples/word_language_model/main.py
+3
-6
examples/word_language_model/main_fp16_optimizer.py
examples/word_language_model/main_fp16_optimizer.py
+3
-6
examples/word_language_model/model.py
examples/word_language_model/model.py
+3
-4
No files found.
examples/FP16_Optimizer_simple/closure.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
@@ -29,4 +28,4 @@ for t in range(5):
...
@@ -29,4 +28,4 @@ for t in range(5):
return
loss
return
loss
loss
=
optimizer
.
step
(
closure
)
loss
=
optimizer
.
step
(
closure
)
print
(
"final loss = "
,
loss
)
print
(
"final loss = "
,
loss
)
examples/FP16_Optimizer_simple/distributed_apex/distributed_data_parallel.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_apex_legacy_launcher/distributed_data_parallel.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.parallel
import
DistributedDataParallel
as
DDP
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
DDP
(
model
)
model
=
DDP
(
model
)
...
...
examples/FP16_Optimizer_simple/distributed_pytorch/distributed_data_parallel.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
import
argparse
import
argparse
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
...
@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
...
@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
...
...
examples/FP16_Optimizer_simple/minimal.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/FP16_Optimizer_simple/save_load.py
View file @
28bdc04e
import
torch
import
torch
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
FP16_Optimizer
from
apex.fp16_utils
import
FP16_Optimizer
torch
.
backends
.
cudnn
.
benchmark
=
True
torch
.
backends
.
cudnn
.
benchmark
=
True
N
,
D_in
,
D_out
=
64
,
1024
,
16
N
,
D_in
,
D_out
=
64
,
1024
,
16
x
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_in
).
normal_
())
.
half
(
)
x
=
torch
.
randn
(
N
,
D_in
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
y
=
Variable
(
torch
.
cuda
.
FloatTensor
(
N
,
D_out
).
normal_
())
.
half
(
)
y
=
torch
.
randn
(
N
,
D_out
,
device
=
'cuda'
,
dtype
=
torch
.
half
)
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
model
=
torch
.
nn
.
Linear
(
D_in
,
D_out
).
cuda
().
half
()
...
...
examples/distributed/main.py
View file @
28bdc04e
...
@@ -6,7 +6,6 @@ import torch.nn as nn
...
@@ -6,7 +6,6 @@ import torch.nn as nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.optim
as
optim
from
torchvision
import
datasets
,
transforms
from
torchvision
import
datasets
,
transforms
from
torch.autograd
import
Variable
from
apex.fp16_utils
import
to_python_float
from
apex.fp16_utils
import
to_python_float
#=====START: ADDED FOR DISTRIBUTED======
#=====START: ADDED FOR DISTRIBUTED======
...
@@ -82,9 +81,6 @@ if args.distributed:
...
@@ -82,9 +81,6 @@ if args.distributed:
#=====END: ADDED FOR DISTRIBUTED======
#=====END: ADDED FOR DISTRIBUTED======
torch
.
manual_seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
if
args
.
cuda
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
args
.
cuda
else
{}
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
args
.
cuda
else
{}
...
@@ -158,7 +154,6 @@ def train(epoch):
...
@@ -158,7 +154,6 @@ def train(epoch):
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
if
args
.
cuda
:
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
output
=
model
(
data
)
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
loss
=
F
.
nll_loss
(
output
,
target
)
...
@@ -177,11 +172,10 @@ def test():
...
@@ -177,11 +172,10 @@ def test():
with
torch
.
no_grad
():
with
torch
.
no_grad
():
if
args
.
cuda
:
if
args
.
cuda
:
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
data
.
cuda
(),
target
.
cuda
()
data
,
target
=
Variable
(
data
),
Variable
(
target
)
output
=
model
(
data
)
output
=
model
(
data
)
test_loss
+=
to_python_float
(
F
.
nll_loss
(
output
,
target
,
size_average
=
False
).
data
)
# sum up batch loss
test_loss
+=
to_python_float
(
F
.
nll_loss
(
output
,
target
,
size_average
=
False
).
data
)
# sum up batch loss
pred
=
output
.
data
.
max
(
1
,
keepdim
=
True
)[
1
]
# get the index of the max log-probability
pred
=
output
.
data
.
max
(
1
,
keepdim
=
True
)[
1
]
# get the index of the max log-probability
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
cpu
().
sum
()
correct
+=
pred
.
eq
(
target
.
data
.
view_as
(
pred
)).
cpu
().
float
().
sum
()
test_loss
/=
len
(
test_loader
.
dataset
)
test_loss
/=
len
(
test_loader
.
dataset
)
...
...
examples/imagenet/main.py
View file @
28bdc04e
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
...
@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_fp16_optimizer.py
View file @
28bdc04e
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -376,13 +372,11 @@ def validate(val_loader, model, criterion):
...
@@ -376,13 +372,11 @@ def validate(val_loader, model, criterion):
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/imagenet/main_reducer.py
View file @
28bdc04e
...
@@ -4,7 +4,6 @@ import shutil
...
@@ -4,7 +4,6 @@ import shutil
import
time
import
time
import
torch
import
torch
from
torch.autograd
import
Variable
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.parallel
import
torch.nn.parallel
import
torch.backends.cudnn
as
cudnn
import
torch.backends.cudnn
as
cudnn
...
@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
...
@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
# measure data loading time
data_time
.
update
(
time
.
time
()
-
end
)
data_time
.
update
(
time
.
time
()
-
end
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
...
@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
i
+=
1
i
+=
1
target
=
target
.
cuda
(
async
=
True
)
target
=
target
.
cuda
(
async
=
True
)
input_var
=
Variable
(
input
)
target_var
=
Variable
(
target
)
# compute output
# compute output
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
input
_var
)
output
=
model
(
input
)
loss
=
criterion
(
output
,
target
_var
)
loss
=
criterion
(
output
,
target
)
# measure accuracy and record loss
# measure accuracy and record loss
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
prec1
,
prec5
=
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
5
))
...
...
examples/word_language_model/generate.py
View file @
28bdc04e
...
@@ -8,7 +8,6 @@
...
@@ -8,7 +8,6 @@
import
argparse
import
argparse
import
torch
import
torch
from
torch.autograd
import
Variable
import
data
import
data
...
@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
...
@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
temperature
<
1e-3
:
if
args
.
temperature
<
1e-3
:
parser
.
error
(
"--temperature has to be greater or equal 1e-3"
)
parser
.
error
(
"--temperature has to be greater or equal 1e-3"
)
...
@@ -56,19 +53,20 @@ else:
...
@@ -56,19 +53,20 @@ else:
corpus
=
data
.
Corpus
(
args
.
data
)
corpus
=
data
.
Corpus
(
args
.
data
)
ntokens
=
len
(
corpus
.
dictionary
)
ntokens
=
len
(
corpus
.
dictionary
)
hidden
=
model
.
init_hidden
(
1
)
hidden
=
model
.
init_hidden
(
1
)
input
=
Variable
(
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
(),
volatile
=
True
)
with
torch
.
no_grad
():
if
args
.
cuda
:
input
=
torch
.
rand
(
1
,
1
).
mul
(
ntokens
).
long
()
input
.
data
=
input
.
data
.
cuda
()
if
args
.
cuda
:
input
=
input
.
cuda
()
with
open
(
args
.
outf
,
'w'
)
as
outf
:
with
open
(
args
.
outf
,
'w'
)
as
outf
:
for
i
in
range
(
args
.
words
):
for
i
in
range
(
args
.
words
):
output
,
hidden
=
model
(
input
,
hidden
)
output
,
hidden
=
model
(
input
,
hidden
)
word_weights
=
output
.
squeeze
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_weights
=
output
.
squeeze
().
float
().
data
.
div
(
args
.
temperature
).
exp
().
cpu
()
word_idx
=
torch
.
multinomial
(
word_weights
,
1
)[
0
]
word_idx
=
torch
.
multinomial
(
word_weights
,
1
)[
0
]
input
.
data
.
fill_
(
word_idx
)
input
.
data
.
fill_
(
word_idx
)
word
=
corpus
.
dictionary
.
idx2word
[
word_idx
]
word
=
corpus
.
dictionary
.
idx2word
[
word_idx
]
outf
.
write
(
word
+
(
'
\n
'
if
i
%
20
==
19
else
' '
))
outf
.
write
(
word
+
(
'
\n
'
if
i
%
20
==
19
else
' '
))
if
i
%
args
.
log_interval
==
0
:
if
i
%
args
.
log_interval
==
0
:
print
(
'| Generated {}/{} words'
.
format
(
i
,
args
.
words
))
print
(
'| Generated {}/{} words'
.
format
(
i
,
args
.
words
))
examples/word_language_model/main.py
View file @
28bdc04e
...
@@ -4,7 +4,6 @@ import time
...
@@ -4,7 +4,6 @@ import time
import
math
import
math
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
data
import
model
import
model
...
@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
...
@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
...
@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
def
repackage_hidden
(
h
):
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
return
h
.
detach
()
else
:
else
:
...
@@ -136,8 +133,8 @@ def repackage_hidden(h):
...
@@ -136,8 +133,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
return
data
,
target
...
...
examples/word_language_model/main_fp16_optimizer.py
View file @
28bdc04e
...
@@ -4,7 +4,6 @@ import time
...
@@ -4,7 +4,6 @@ import time
import
math
import
math
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
import
data
import
data
import
model
import
model
...
@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
...
@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
if
not
args
.
cuda
:
if
not
args
.
cuda
:
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
print
(
"WARNING: You have a CUDA device, so you should probably run with --cuda"
)
else
:
torch
.
cuda
.
manual_seed
(
args
.
seed
)
if
args
.
fp16
and
not
args
.
cuda
:
if
args
.
fp16
and
not
args
.
cuda
:
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
print
(
"WARNING: --fp16 requires --cuda, ignoring --fp16 option"
)
...
@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
...
@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
def
repackage_hidden
(
h
):
def
repackage_hidden
(
h
):
"""
Wrap
s hidden states
in new Variables, to detach them
from their history."""
"""
Detache
s hidden states from their history."""
if
torch
.
is_tensor
(
h
):
if
torch
.
is_tensor
(
h
):
return
h
.
detach
()
return
h
.
detach
()
else
:
else
:
...
@@ -151,8 +148,8 @@ def repackage_hidden(h):
...
@@ -151,8 +148,8 @@ def repackage_hidden(h):
def
get_batch
(
source
,
i
):
def
get_batch
(
source
,
i
):
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
seq_len
=
min
(
args
.
bptt
,
len
(
source
)
-
1
-
i
)
data
=
Variable
(
source
[
i
:
i
+
seq_len
]
)
data
=
source
[
i
:
i
+
seq_len
]
target
=
Variable
(
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
)
target
=
source
[
i
+
1
:
i
+
1
+
seq_len
].
view
(
-
1
)
return
data
,
target
return
data
,
target
...
...
examples/word_language_model/model.py
View file @
28bdc04e
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.autograd
import
Variable
class
RNNModel
(
nn
.
Module
):
class
RNNModel
(
nn
.
Module
):
...
@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
...
@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
def
init_hidden
(
self
,
bsz
):
def
init_hidden
(
self
,
bsz
):
weight
=
next
(
self
.
parameters
()).
data
weight
=
next
(
self
.
parameters
()).
data
if
self
.
rnn_type
==
'LSTM'
:
if
self
.
rnn_type
==
'LSTM'
:
return
(
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
,
return
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
(),
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
)
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
())
else
:
else
:
return
Variable
(
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
)
return
weight
.
new
(
self
.
nlayers
,
bsz
,
self
.
nhid
).
zero_
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment