Commit 28bdc04e authored by ptrblck's avatar ptrblck
Browse files

update examples to PyTorch >=0.4.0

parent bc62f325
import torch import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
......
import torch import torch
from torch.autograd import Variable
import argparse import argparse
from apex.parallel import DistributedDataParallel as DDP from apex.parallel import DistributedDataParallel as DDP
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
...@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True ...@@ -16,8 +15,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
model = DDP(model) model = DDP(model)
......
import torch import torch
from torch.autograd import Variable
import argparse import argparse
from apex.parallel import DistributedDataParallel as DDP from apex.parallel import DistributedDataParallel as DDP
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
...@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True ...@@ -24,8 +23,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
model = DDP(model) model = DDP(model)
......
import torch import torch
from torch.autograd import Variable
import argparse import argparse
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
...@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True ...@@ -15,8 +14,8 @@ torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
model = torch.nn.parallel.DistributedDataParallel(model, model = torch.nn.parallel.DistributedDataParallel(model,
......
import torch import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
......
import torch import torch
from torch.autograd import Variable
from apex.fp16_utils import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
N, D_in, D_out = 64, 1024, 16 N, D_in, D_out = 64, 1024, 16
x = Variable(torch.cuda.FloatTensor(N, D_in ).normal_()).half() x = torch.randn(N, D_in, device='cuda', dtype=torch.half)
y = Variable(torch.cuda.FloatTensor(N, D_out).normal_()).half() y = torch.randn(N, D_out, device='cuda', dtype=torch.half)
model = torch.nn.Linear(D_in, D_out).cuda().half() model = torch.nn.Linear(D_in, D_out).cuda().half()
......
...@@ -6,7 +6,6 @@ import torch.nn as nn ...@@ -6,7 +6,6 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
from torchvision import datasets, transforms from torchvision import datasets, transforms
from torch.autograd import Variable
from apex.fp16_utils import to_python_float from apex.fp16_utils import to_python_float
#=====START: ADDED FOR DISTRIBUTED====== #=====START: ADDED FOR DISTRIBUTED======
...@@ -82,9 +81,6 @@ if args.distributed: ...@@ -82,9 +81,6 @@ if args.distributed:
#=====END: ADDED FOR DISTRIBUTED====== #=====END: ADDED FOR DISTRIBUTED======
torch.manual_seed(args.seed) torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
...@@ -158,7 +154,6 @@ def train(epoch): ...@@ -158,7 +154,6 @@ def train(epoch):
for batch_idx, (data, target) in enumerate(train_loader): for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda: if args.cuda:
data, target = data.cuda(), target.cuda() data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad() optimizer.zero_grad()
output = model(data) output = model(data)
loss = F.nll_loss(output, target) loss = F.nll_loss(output, target)
...@@ -177,11 +172,10 @@ def test(): ...@@ -177,11 +172,10 @@ def test():
with torch.no_grad(): with torch.no_grad():
if args.cuda: if args.cuda:
data, target = data.cuda(), target.cuda() data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data) output = model(data)
test_loss += to_python_float(F.nll_loss(output, target, size_average=False).data) # sum up batch loss test_loss += to_python_float(F.nll_loss(output, target, size_average=False).data) # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum() correct += pred.eq(target.data.view_as(pred)).cpu().float().sum()
test_loss /= len(test_loader.dataset) test_loss /= len(test_loader.dataset)
......
...@@ -4,7 +4,6 @@ import shutil ...@@ -4,7 +4,6 @@ import shutil
import time import time
import torch import torch
from torch.autograd import Variable
import torch.nn as nn import torch.nn as nn
import torch.nn.parallel import torch.nn.parallel
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
...@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch): ...@@ -315,12 +314,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time # measure data loading time
data_time.update(time.time() - end) data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
...@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion): ...@@ -392,13 +388,11 @@ def validate(val_loader, model, criterion):
i += 1 i += 1
target = target.cuda(async=True) target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
with torch.no_grad(): with torch.no_grad():
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
...@@ -4,7 +4,6 @@ import shutil ...@@ -4,7 +4,6 @@ import shutil
import time import time
import torch import torch
from torch.autograd import Variable
import torch.nn as nn import torch.nn as nn
import torch.nn.parallel import torch.nn.parallel
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
...@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch): ...@@ -307,12 +306,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time # measure data loading time
data_time.update(time.time() - end) data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
...@@ -376,13 +372,11 @@ def validate(val_loader, model, criterion): ...@@ -376,13 +372,11 @@ def validate(val_loader, model, criterion):
i += 1 i += 1
target = target.cuda(async=True) target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
with torch.no_grad(): with torch.no_grad():
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
...@@ -4,7 +4,6 @@ import shutil ...@@ -4,7 +4,6 @@ import shutil
import time import time
import torch import torch
from torch.autograd import Variable
import torch.nn as nn import torch.nn as nn
import torch.nn.parallel import torch.nn.parallel
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
...@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch): ...@@ -301,12 +300,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
# measure data loading time # measure data loading time
data_time.update(time.time() - end) data_time.update(time.time() - end)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
...@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion): ...@@ -382,13 +378,11 @@ def validate(val_loader, model, criterion):
i += 1 i += 1
target = target.cuda(async=True) target = target.cuda(async=True)
input_var = Variable(input)
target_var = Variable(target)
# compute output # compute output
with torch.no_grad(): with torch.no_grad():
output = model(input_var) output = model(input)
loss = criterion(output, target_var) loss = criterion(output, target)
# measure accuracy and record loss # measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
......
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
import argparse import argparse
import torch import torch
from torch.autograd import Variable
import data import data
...@@ -38,8 +37,6 @@ torch.manual_seed(args.seed) ...@@ -38,8 +37,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available(): if torch.cuda.is_available():
if not args.cuda: if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda") print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.temperature < 1e-3: if args.temperature < 1e-3:
parser.error("--temperature has to be greater or equal 1e-3") parser.error("--temperature has to be greater or equal 1e-3")
...@@ -56,14 +53,15 @@ else: ...@@ -56,14 +53,15 @@ else:
corpus = data.Corpus(args.data) corpus = data.Corpus(args.data)
ntokens = len(corpus.dictionary) ntokens = len(corpus.dictionary)
hidden = model.init_hidden(1) hidden = model.init_hidden(1)
input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) with torch.no_grad():
if args.cuda: input = torch.rand(1, 1).mul(ntokens).long()
input.data = input.data.cuda() if args.cuda:
input = input.cuda()
with open(args.outf, 'w') as outf: with open(args.outf, 'w') as outf:
for i in range(args.words): for i in range(args.words):
output, hidden = model(input, hidden) output, hidden = model(input, hidden)
word_weights = output.squeeze().data.div(args.temperature).exp().cpu() word_weights = output.squeeze().float().data.div(args.temperature).exp().cpu()
word_idx = torch.multinomial(word_weights, 1)[0] word_idx = torch.multinomial(word_weights, 1)[0]
input.data.fill_(word_idx) input.data.fill_(word_idx)
word = corpus.dictionary.idx2word[word_idx] word = corpus.dictionary.idx2word[word_idx]
......
...@@ -4,7 +4,6 @@ import time ...@@ -4,7 +4,6 @@ import time
import math import math
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.autograd import Variable
import data import data
import model import model
...@@ -58,8 +57,6 @@ torch.manual_seed(args.seed) ...@@ -58,8 +57,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available(): if torch.cuda.is_available():
if not args.cuda: if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda") print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.fp16 and not args.cuda: if args.fp16 and not args.cuda:
print("WARNING: --fp16 requires --cuda, ignoring --fp16 option") print("WARNING: --fp16 requires --cuda, ignoring --fp16 option")
...@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss() ...@@ -117,7 +114,7 @@ criterion = nn.CrossEntropyLoss()
def repackage_hidden(h): def repackage_hidden(h):
"""Wraps hidden states in new Variables, to detach them from their history.""" """Detaches hidden states from their history."""
if torch.is_tensor(h): if torch.is_tensor(h):
return h.detach() return h.detach()
else: else:
...@@ -136,8 +133,8 @@ def repackage_hidden(h): ...@@ -136,8 +133,8 @@ def repackage_hidden(h):
def get_batch(source, i): def get_batch(source, i):
seq_len = min(args.bptt, len(source) - 1 - i) seq_len = min(args.bptt, len(source) - 1 - i)
data = Variable(source[i:i+seq_len]) data = source[i:i+seq_len]
target = Variable(source[i+1:i+1+seq_len].view(-1)) target = source[i+1:i+1+seq_len].view(-1)
return data, target return data, target
......
...@@ -4,7 +4,6 @@ import time ...@@ -4,7 +4,6 @@ import time
import math import math
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.autograd import Variable
import data import data
import model import model
...@@ -61,8 +60,6 @@ torch.manual_seed(args.seed) ...@@ -61,8 +60,6 @@ torch.manual_seed(args.seed)
if torch.cuda.is_available(): if torch.cuda.is_available():
if not args.cuda: if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda") print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
if args.fp16 and not args.cuda: if args.fp16 and not args.cuda:
print("WARNING: --fp16 requires --cuda, ignoring --fp16 option") print("WARNING: --fp16 requires --cuda, ignoring --fp16 option")
...@@ -132,7 +129,7 @@ if args.cuda and args.fp16: ...@@ -132,7 +129,7 @@ if args.cuda and args.fp16:
def repackage_hidden(h): def repackage_hidden(h):
"""Wraps hidden states in new Variables, to detach them from their history.""" """Detaches hidden states from their history."""
if torch.is_tensor(h): if torch.is_tensor(h):
return h.detach() return h.detach()
else: else:
...@@ -151,8 +148,8 @@ def repackage_hidden(h): ...@@ -151,8 +148,8 @@ def repackage_hidden(h):
def get_batch(source, i): def get_batch(source, i):
seq_len = min(args.bptt, len(source) - 1 - i) seq_len = min(args.bptt, len(source) - 1 - i)
data = Variable(source[i:i+seq_len]) data = source[i:i+seq_len]
target = Variable(source[i+1:i+1+seq_len].view(-1)) target = source[i+1:i+1+seq_len].view(-1)
return data, target return data, target
......
import torch.nn as nn import torch.nn as nn
from torch.autograd import Variable
class RNNModel(nn.Module): class RNNModel(nn.Module):
...@@ -53,7 +52,7 @@ class RNNModel(nn.Module): ...@@ -53,7 +52,7 @@ class RNNModel(nn.Module):
def init_hidden(self, bsz): def init_hidden(self, bsz):
weight = next(self.parameters()).data weight = next(self.parameters()).data
if self.rnn_type == 'LSTM': if self.rnn_type == 'LSTM':
return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()), return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())) weight.new(self.nlayers, bsz, self.nhid).zero_())
else: else:
return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()) return weight.new(self.nlayers, bsz, self.nhid).zero_()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment