Commit 28bdc04e authored by ptrblck's avatar ptrblck
Browse files

update examples to PyTorch >=0.4.0

parent bc62f325
import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
......@@ -29,4 +28,4 @@ for t in range(5):
return loss
loss = optimizer.step(closure)
print("final loss = ", loss)
print("final loss = ", loss)
import torch
from torch.autograd import Variable
import argparse
from apex.parallel import DistributedDataParallel as DDP
from apex.fp16_utils import FP16_Optimizer
......@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
model = DDP(model)
......
import torch
from torch.autograd import Variable
import argparse
from apex.parallel import DistributedDataParallel as DDP
from apex.fp16_utils import FP16_Optimizer
......@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
model = DDP(model)
......
import torch
from torch.autograd import Variable
import argparse
from apex.fp16_utils import FP16_Optimizer
......@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
model = torch.nn.parallel.DistributedDataParallel(model,
......
import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
......
import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half()
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half()
x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half()
......
......@@ -6,7 +6,6 @@ import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from apex.fp16_utils import to_python_float
#=====START: ADDED FOR DISTRIBUTED======
......@@ -82,9 +81,6 @@ if args.distributed:
#=====END: ADDED FOR DISTRIBUTED======
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
......@@ -158,7 +154,6 @@ def train(epoch):
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
......@@ -177,11 +172,10 @@ def test():
with torch.no_grad():
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += to_python_float(F.nll_loss(output, target, size_average=False).data) # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
correct += pred.eq(target.data.view_as(pred)).cpu().float().sum()
test_loss /= len(test_loader.dataset)
......
......@@ -4,7 +4,6 @@ import shutil
import time
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
......@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
i += 1
target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output
with torch.no_grad():
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
......@@ -4,7 +4,6 @@ import shutil
import time
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
......@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......@@ -376,13 +372,11 @@ def validate(val_loader, model, criterion):
i += 1
target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output
with torch.no_grad():
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
......@@ -4,7 +4,6 @@ import shutil
import time
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
......@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time
data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
i += 1
target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output
with torch.no_grad():
output = model(input_var)
loss = criterion(output, target_var)
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
......@@ -8,7 +8,6 @@
import argparse
import torch
from torch.autograd import Variable
import data
......@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available():
if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.temperature < 1e-3:
parser.error("--temperature has to be greater or equal 1e-3")
......@@ -56,19 +53,20 @@ else:
corpus = data.Corpus(args.data)
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(1)
input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
if args.cuda:
input.data = input.data.cuda()
with torch.no_grad():
input = torch.rand(1, 1).mul(ntokens).long()
if args.cuda:
input = input.cuda()
with open(args.outf, 'w') as outf:
for i in range(args.words):
output, hidden = model(input, hidden)
word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
word_idx = torch.multinomial(word_weights, 1)[0]
input.data.fill_(word_idx)
word = corpus.dictionary.idx2word[word_idx]
with open(args.outf, 'w') as outf:
for i in range(args.words):
output, hidden = model(input, hidden)
word_weights = output.squeeze().float().data.div(args.temperature).exp().cpu()
word_idx = torch.multinomial(word_weights, 1)[0]
input.data.fill_(word_idx)
word = corpus.dictionary.idx2word[word_idx]
outf.write(word + ('\n' if i % 20 == 19 else ' '))
outf.write(word + ('\n' if i % 20 == 19 else ' '))
if i % args.log_interval == 0:
print('| Generated {}/{} words'.format(i, args.words))
if i % args.log_interval == 0:
print('| Generated {}/{} words'.format(i, args.words))
......@@ -4,7 +4,6 @@ import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
import data
import model
......@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available():
if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.fp16 and not args.cuda:
print("WARNING: --fp16 requires --cuda, ignoring --fp16 option")
......@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
def repackage_hidden(h):
"""Wraps hidden states in new Variables, to detach them from their history."""
"""Detaches hidden states from their history."""
if torch.is_tensor(h):
return h.detach()
else:
......@@ -136,8 +133,8 @@ def repackage_hidden(h):
def get_batch(source, i):
seq_len = min(args.bptt, len(source) - 1 - i)
data = Variable(source[i:i+seq_len])
target = Variable(source[i+1:i+1+seq_len].view(-1))
data = source[i:i+seq_len]
target = source[i+1:i+1+seq_len].view(-1)
return data, target
......
......@@ -4,7 +4,6 @@ import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
import data
import model
......@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available():
if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.fp16 and not args.cuda:
print("WARNING: --fp16 requires --cuda, ignoring --fp16 option")
......@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
def repackage_hidden(h):
"""Wraps hidden states in new Variables, to detach them from their history."""
"""Detaches hidden states from their history."""
if torch.is_tensor(h):
return h.detach()
else:
......@@ -151,8 +148,8 @@ def repackage_hidden(h):
def get_batch(source, i):
seq_len = min(args.bptt, len(source) - 1 - i)
data = Variable(source[i:i+seq_len])
target = Variable(source[i+1:i+1+seq_len].view(-1))
data = source[i:i+seq_len]
target = source[i+1:i+1+seq_len].view(-1)
return data, target
......
import torch.nn as nn
from torch.autograd import Variable
class RNNModel(nn.Module):
......@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
def init_hidden(self, bsz):
weight = next(self.parameters()).data
if self.rnn_type == 'LSTM':
return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
weight.new(self.nlayers, bsz, self.nhid).zero_())
else:
return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
return weight.new(self.nlayers, bsz, self.nhid).zero_()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment