Rename ValidConvolutions to SubmanifoldConvolutions, update for PyTorch 0.4 Tensor/Variable merge

d77687a6 · Benjamin Graham · Benjamin Thomas Graham · 297e04c0 · d77687a6 · d77687a6
Commit d77687a6 authored Mar 04, 2018 by Benjamin Graham Committed by Benjamin Thomas Graham Mar 05, 2018
20 changed files
--- a/PyTorch/sparseconvnet/activations.py
+++ b/PyTorch/sparseconvnet/activations.py
@@ -6,12 +6,13 @@
 import sparseconvnet
 import torch.nn.functional as F
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 from .batchNormalization import BatchNormalization
 class Sigmoid(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
@@ -20,6 +21,7 @@ class Sigmoid(Module):
        output.spatial_size = input.spatial_size
        return output
 class Tanh(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
@@ -28,6 +30,7 @@ class Tanh(Module):
        output.spatial_size = input.spatial_size
        return output
 class ReLU(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
@@ -36,6 +39,7 @@ class ReLU(Module):
        output.spatial_size = input.spatial_size
        return output
 class ELU(Module):
    def forward(self, input):
        output = SparseConvNetTensor()

--- a/PyTorch/sparseconvnet/averagePooling.py
+++ b/PyTorch/sparseconvnet/averagePooling.py
@@ -4,11 +4,12 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class AveragePoolingFunction(Function):
    @staticmethod
    def forward(
@@ -21,15 +22,11 @@ class AveragePoolingFunction(Function):
            pool_size,
            pool_stride,
            nFeaturesToDrop):
-        ctx.input_features=input_features
+        ctx.input_metadata = input_metadata
-        ctx.input_metadata=input_metadata
-        ctx.input_spatial_size = input_spatial_size
-        ctx.output_spatial_size = output_spatial_size
        ctx.dimension = dimension
-        ctx.pool_size = pool_size
-        ctx.pool_stride = pool_stride
        ctx.nFeaturesToDrop = nFeaturesToDrop
        output_features = input_features.new()
        dim_typed_fn(dimension, input_features, 'AveragePooling_updateOutput')(
            input_spatial_size,
            output_spatial_size,
@@ -40,23 +37,36 @@ class AveragePoolingFunction(Function):
            output_features,
            nFeaturesToDrop,
            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
+        ctx.save_for_backward(input_features,
+                              output_features,
+                              input_spatial_size,
+                              output_spatial_size,
+                              pool_size,
+                              pool_stride)
        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features,\
+        output_features,\
+        input_spatial_size,\
+        output_spatial_size,\
+        pool_size,\
+        pool_stride = ctx.saved_tensors
+        grad_input = grad_output.new()
        dim_typed_fn(
-            ctx.dimension, ctx.input_features, 'AveragePooling_updateGradInput')(
+            ctx.dimension, input_features, 'AveragePooling_updateGradInput')(
-            ctx.input_spatial_size,
+            input_spatial_size,
-            ctx.output_spatial_size,
+            output_spatial_size,
-            ctx.pool_size,
+            pool_size,
-            ctx.pool_stride,
+            pool_stride,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            grad_output.data.contiguous(),
+            grad_output.contiguous(),
            ctx.nFeaturesToDrop,
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, None, None, None, None, None, None, None
@@ -67,29 +77,39 @@ class AveragePooling(Module):
        self.pool_size = toLongTensor(dimension, pool_size)
        self.pool_stride = toLongTensor(dimension, pool_stride)
        self.nFeaturesToDrop = nFeaturesToDrop
    def forward(self, input):
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size = (
            input.spatial_size - self.pool_size) / self.pool_stride + 1
-        assert ((output.spatial_size-1)*self.pool_stride+self.pool_size==input.spatial_size).all()
+        assert ((output.spatial_size - 1) * self.pool_stride +
-        output.features =  AveragePoolingFunction().apply(
+                self.pool_size == input.spatial_size).all()
-            input.features, input.metadata, input.spatial_size,
+        output.features = AveragePoolingFunction.apply(
-            output.spatial_size, self.dimension,self.pool_size,self.pool_stride,
+            input.features,
+            input.metadata,
+            input.spatial_size,
+            output.spatial_size,
+            self.dimension,
+            self.pool_size,
+            self.pool_stride,
            self.nFeaturesToDrop)
        return output
    def input_spatial_size(self, out_size):
        return (out_size - 1) * self.pool_stride + self.pool_size
    def __repr__(self):
        s = 'AveragePooling'
        if self.pool_size.max() == self.pool_size.min() and\
                self.pool_stride.max() == self.pool_stride.min():
-            s = s + str(self.pool_size[0]) + '/' + str(self.pool_stride[0])
+            s = s + str(self.pool_size[0].item()) + \
+                '/' + str(self.pool_stride[0].item())
        else:
-            s = s + '(' + str(self.pool_size[0])
+            s = s + '(' + str(self.pool_size[0].item())
            for i in self.pool_size[1:]:
                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.pool_stride[0])
+            s = s + ')/(' + str(self.pool_stride[0].item())
            for i in self.pool_stride[1:]:
                s = s + ',' + str(i)
            s = s + ')'

--- a/PyTorch/sparseconvnet/batchNormalization.py
+++ b/PyTorch/sparseconvnet/batchNormalization.py
@@ -15,11 +15,12 @@ leakiness : Apply activation def inplace: 0<=leakiness<=1.
 0 for ReLU, values in (0,1) for LeakyReLU, 1 for no activation def.
 """
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class BatchNormalizationFunction(Function):
    @staticmethod
    def forward(
@@ -33,60 +34,72 @@ class BatchNormalizationFunction(Function):
            momentum,
            train,
            leakiness):
-        ctx.nPlanes=runningMean.shape[0]
+        ctx.nPlanes = runningMean.shape[0]
-        ctx.input_features=input_features
+        ctx.train = train
-        ctx.weight=weight
+        ctx.leakiness = leakiness
-        ctx.bias=bias
+        output_features = input_features.new()
-        ctx.runningMean=runningMean
+        saveMean = input_features.new().resize_(ctx.nPlanes)
-        ctx.runningVar=runningVar
+        saveInvStd = runningMean.clone().resize_(ctx.nPlanes)
-        ctx.train=train
-        ctx.leakiness=leakiness
-        ctx.output_features = input_features.new()
-        ctx.saveMean = input_features.new().resize_(ctx.nPlanes)
-        ctx.saveInvStd = runningMean.clone().resize_(ctx.nPlanes)
        typed_fn(input_features, 'BatchNormalization_updateOutput')(
            input_features,
-            ctx.output_features,
+            output_features,
-            ctx.saveMean,
+            saveMean,
-            ctx.saveInvStd,
+            saveInvStd,
-            ctx.runningMean,
+            runningMean,
-            ctx.runningVar,
+            runningVar,
-            ctx.weight if ctx.weight is not None else nullptr,
+            weight if weight is not None else nullptr,
-            ctx.bias if ctx.bias is not None else nullptr,
+            bias if bias is not None else nullptr,
            eps,
            momentum,
            ctx.train,
            ctx.leakiness)
-        return ctx.output_features
+        ctx.save_for_backward(input_features,
+                              output_features,
+                              weight,
+                              bias,
+                              runningMean,
+                              runningVar,
+                              saveMean,
+                              saveInvStd)
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
+        input_features,\
+            output_features,\
+            weight,\
+            bias,\
+            runningMean,\
+            runningVar,\
+            saveMean,\
+            saveInvStd = ctx.saved_tensors
        assert ctx.train
-        grad_input=Variable(grad_output.data.new())
+        grad_input = grad_output.new()
-        if ctx.weight is None:
+        if weight is None:
-            grad_weight=None
+            grad_weight = None
        else:
-            grad_weight=Variable(ctx.input_features.new().resize_(ctx.nPlanes).zero_())
+            grad_weight = input_features.new().resize_(ctx.nPlanes).zero_()
-        if ctx.bias is None:
+        if bias is None:
-            grad_bias=None
+            grad_bias = None
        else:
-            grad_bias=Variable(ctx.input_features.new().resize_(ctx.nPlanes).zero_())
+            grad_bias = input_features.new().resize_(ctx.nPlanes).zero_()
-        typed_fn(ctx.input_features, 'BatchNormalization_backward')(
+        typed_fn(input_features, 'BatchNormalization_backward')(
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            ctx.output_features,
+            output_features,
-            grad_output.data.contiguous(),
+            grad_output.contiguous(),
-            ctx.saveMean,
+            saveMean,
-            ctx.saveInvStd,
+            saveInvStd,
-            ctx.runningMean,
+            runningMean,
-            ctx.runningVar,
+            runningVar,
-            ctx.weight if ctx.weight is not None else nullptr,
+            weight if weight is not None else nullptr,
-            ctx.bias if ctx.bias is not None else nullptr,
+            bias if bias is not None else nullptr,
            grad_weight.data if grad_weight is not None else nullptr,
            grad_bias.data if grad_bias is not None else nullptr,
            ctx.leakiness)
        return grad_input, grad_weight, grad_bias, None, None, None, None, None, None
 class BatchNormalization(Module):
    def __init__(
            self,
@@ -109,12 +122,13 @@ class BatchNormalization(Module):
        else:
            self.weight = None
            self.bias = None
    def forward(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nPlanes
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nPlanes
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size = input.spatial_size
-        output.features = BatchNormalizationFunction().apply(
+        output.features = BatchNormalizationFunction.apply(
            input.features,
            self.weight,
            self.bias,
@@ -125,8 +139,10 @@ class BatchNormalization(Module):
            self.training,
            self.leakiness)
        return output
    def input_spatial_size(self, out_size):
        return out_size
    def __repr__(self):
        s = 'BatchNorm(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine)
@@ -135,17 +151,21 @@ class BatchNormalization(Module):
        s = s + ')'
        return s
 class BatchNormReLU(BatchNormalization):
    def __init__(self, nPlanes, eps=1e-4, momentum=0.9):
        BatchNormalization.__init__(self, nPlanes, eps, momentum, True, 0)
    def __repr__(self):
        s = 'BatchNormReLU(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'
        return s
 class BatchNormLeakyReLU(BatchNormalization):
    def __init__(self, nPlanes, eps=1e-4, momentum=0.9):
        BatchNormalization.__init__(self, nPlanes, eps, momentum, True, 0.333)
    def __repr__(self):
        s = 'BatchNormReLU(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'

--- a/PyTorch/sparseconvnet/classificationTrainValidate.py
+++ b/PyTorch/sparseconvnet/classificationTrainValidate.py
@@ -8,7 +8,6 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
-from torch.autograd import Variable
 import sparseconvnet as s
 import time
 import os
@@ -16,26 +15,28 @@ import math
 import numpy as np
 from PIL import Image
 def updateStats(stats, output, target, loss):
    batchSize = output.size(0)
-    nClasses= output.size(1)
+    nClasses = output.size(1)
    if not stats:
        stats['top1'] = 0
        stats['top5'] = 0
        stats['n'] = 0
        stats['nll'] = 0
-        stats['confusion matrix'] = output.new().resize_(nClasses,nClasses).zero_()
+        stats['confusion matrix'] = output.new().resize_(
+            nClasses, nClasses).zero_()
    stats['n'] = stats['n'] + batchSize
    stats['nll'] = stats['nll'] + loss * batchSize
    _, predictions = output.float().sort(1, True)
    correct = predictions.eq(
-        target[:,None].expand_as(output))
+        target[:, None].expand_as(output))
    # Top-1 score
-    stats['top1'] += correct.narrow(1, 0, 1).sum()
+    stats['top1'] += correct[:, :1].long().sum().item()
    # Top-5 score
    l = min(5, correct.size(1))
-    stats['top5'] += correct.narrow(1, 0, l).sum()
+    stats['top5'] += correct[:, :l].long().sum().item()
-    stats['confusion matrix'].index_add_(0,target,F.softmax(Variable(output),1).data)
+    stats['confusion matrix'].index_add_(0, target, F.softmax(output, 1).data)
 def ClassificationTrainValidate(model, dataset, p):
@@ -59,10 +60,10 @@ def ClassificationTrainValidate(model, dataset, p):
    if 'test_reps' not in p:
        p['test_reps'] = 1
    optimizer = optim.SGD(model.parameters(),
-        lr=p['initial_lr'],
+                          lr=p['initial_lr'],
-        momentum = p['momentum'],
+                          momentum=p['momentum'],
-        weight_decay = p['weight_decay'],
+                          weight_decay=p['weight_decay'],
-        nesterov=True)
+                          nesterov=True)
    if p['check_point'] and os.path.isfile('epoch.pth'):
        p['epoch'] = torch.load('epoch.pth') + 1
        print('Restarting at epoch ' +
@@ -70,7 +71,7 @@ def ClassificationTrainValidate(model, dataset, p):
              ' from model.pth ..')
        model.load_state_dict(torch.load('model.pth'))
    else:
-        p['epoch']=1
+        p['epoch'] = 1
    print(p)
    print('#parameters', sum([x.nelement() for x in model.parameters()]))
    for epoch in range(p['epoch'], p['n_epochs'] + 1):
@@ -78,18 +79,16 @@ def ClassificationTrainValidate(model, dataset, p):
        stats = {}
        for param_group in optimizer.param_groups:
            param_group['lr'] = p['initial_lr'] * \
-            math.exp((1 - epoch) * p['lr_decay'])
+                math.exp((1 - epoch) * p['lr_decay'])
        start = time.time()
        for batch in dataset['train']():
            if p['use_gpu']:
-                batch['input']=batch['input'].cuda()
+                batch['input'] = batch['input'].cuda()
                batch['target'] = batch['target'].cuda()
-            batch['input'].to_variable(requires_grad=True)
-            batch['target'] = Variable(batch['target'])
            optimizer.zero_grad()
            output = model(batch['input'])
            loss = criterion(output, batch['target'])
-            updateStats(stats, output.data, batch['target'].data, loss.data[0])
+            updateStats(stats, output, batch['target'], loss.item())
            loss.backward()
            optimizer.step()
        print(epoch, 'train: top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %
@@ -102,73 +101,80 @@ def ClassificationTrainValidate(model, dataset, p):
                   stats['n']), stats['nll'] /
                  stats['n'], time.time() -
                  start))
-        cm=stats['confusion matrix'].cpu().numpy()
+        cm = stats['confusion matrix'].cpu().numpy()
-        np.savetxt('train confusion matrix.csv',cm,delimiter=',')
+        np.savetxt('train confusion matrix.csv', cm, delimiter=',')
-        cm*=255/(cm.sum(1,keepdims=True)+1e-9)
+        cm *= 255 / (cm.sum(1, keepdims=True) + 1e-9)
-        Image.fromarray(cm.astype('uint8'),mode='L').save('train confusion matrix.png')
+        Image.fromarray(cm.astype('uint8'), mode='L').save(
+            'train confusion matrix.png')
        if p['check_point']:
            torch.save(epoch, 'epoch.pth')
-            torch.save(model.state_dict(),'model.pth')
+            torch.save(model.state_dict(), 'model.pth')
        model.eval()
        s.forward_pass_multiplyAdd_count = 0
        s.forward_pass_hidden_states = 0
        start = time.time()
-        if p['test_reps'] ==1:
+        if p['test_reps'] == 1:
            stats = {}
            for batch in dataset['val']():
                if p['use_gpu']:
-                    batch['input']=batch['input'].cuda()
+                    batch['input'] = batch['input'].cuda()
                    batch['target'] = batch['target'].cuda()
-                batch['input'].to_variable()
-                batch['target'] = Variable(batch['target'])
                output = model(batch['input'])
                loss = criterion(output, batch['target'])
-                updateStats(stats, output.data, batch['target'].data, loss.data[0])
+                updateStats(stats, output, batch['target'], loss.item())
-            print(epoch, 'test:  top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %(
+            print(epoch, 'test:  top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %
-                100 * (1 - 1.0 * stats['top1'] / stats['n']),
+                  (100 *
-                100 * (1 - 1.0 * stats['top5'] / stats['n']),
+                   (1 -
-                stats['nll'] / stats['n'],
+                    1.0 *
-                time.time() - start),
+                    stats['top1'] /
-                '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' % (
+                       stats['n']), 100 *
-                    s.forward_pass_multiplyAdd_count / stats['n'],
+                      (1 -
-                    s.forward_pass_hidden_states / stats['n']))
+                       1.0 *
+                       stats['top5'] /
+                       stats['n']), stats['nll'] /
+                      stats['n'], time.time() -
+                      start), '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' %
+                  (s.forward_pass_multiplyAdd_count /
+                      stats['n'], s.forward_pass_hidden_states /
+                      stats['n']))
        else:
-            for rep in range(1,p['test_reps']+1):
+            for rep in range(1, p['test_reps'] + 1):
-                pr=[]
+                pr = []
-                ta=[]
+                ta = []
-                idxs=[]
+                idxs = []
                for batch in dataset['val']():
                    if p['use_gpu']:
-                        batch['input']=batch['input'].cuda()
+                        batch['input'] = batch['input'].cuda()
                        batch['target'] = batch['target'].cuda()
                        batch['idx'] = batch['idx'].cuda()
                    batch['input'].to_variable()
                    output = model(batch['input'])
-                    pr.append( output.data )
+                    pr.append(output.data)
-                    ta.append( batch['target'] )
+                    ta.append(batch['target'])
-                    idxs.append( batch['idx'] )
+                    idxs.append(batch['idx'])
-                pr=torch.cat(pr,0)
+                pr = torch.cat(pr, 0)
-                ta=torch.cat(ta,0)
+                ta = torch.cat(ta, 0)
-                idxs=torch.cat(idxs,0)
+                idxs = torch.cat(idxs, 0)
-                if rep==1:
+                if rep == 1:
-                    predictions=pr.new().resize_as_(pr).zero_().index_add_(0,idxs,pr)
+                    predictions = pr.new().resize_as_(pr).zero_().index_add_(0, idxs, pr)
-                    targets=ta.new().resize_as_(ta).zero_().index_add_(0,idxs,ta)
+                    targets = ta.new().resize_as_(ta).zero_().index_add_(0, idxs, ta)
                else:
-                    predictions.index_add_(0,idxs,pr)
+                    predictions.index_add_(0, idxs, pr)
-                loss = criterion(predictions/rep, targets)
+                loss = criterion(predictions / rep, targets)
                stats = {}
-                updateStats(stats, predictions, targets, loss.data[0])
+                updateStats(stats, predictions, targets, loss.item())
                print(epoch, 'test rep ', rep,
-                    ': top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %(
+                      ': top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' % (
-                    100 * (1 - 1.0 * stats['top1'] / stats['n']),
+                          100 * (1 - 1.0 * stats['top1'] / stats['n']),
-                    100 * (1 - 1.0 * stats['top5'] / stats['n']),
+                          100 * (1 - 1.0 * stats['top5'] / stats['n']),
-                    stats['nll'] / stats['n'],
+                          stats['nll'] / stats['n'],
-                    time.time() - start),
+                          time.time() - start),
-                    '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' % (
+                      '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' % (
-                    s.forward_pass_multiplyAdd_count / stats['n'],
+                          s.forward_pass_multiplyAdd_count / stats['n'],
-                    s.forward_pass_hidden_states / stats['n']))
+                          s.forward_pass_hidden_states / stats['n']))
-        cm=stats['confusion matrix'].cpu().numpy()
+        cm = stats['confusion matrix'].cpu().numpy()
-        np.savetxt('test confusion matrix.csv',cm,delimiter=',')
+        np.savetxt('test confusion matrix.csv', cm, delimiter=',')
-        cm*=255/(cm.sum(1,keepdims=True)+1e-9)
+        cm *= 255 / (cm.sum(1, keepdims=True) + 1e-9)
-        Image.fromarray(cm.astype('uint8'),mode='L').save('test confusion matrix.png')
+        Image.fromarray(cm.astype('uint8'), mode='L').save(
+            'test confusion matrix.png')
--- a/PyTorch/sparseconvnet/convolution.py
+++ b/PyTorch/sparseconvnet/convolution.py
@@ -5,35 +5,42 @@
 # LICENSE file in the root directory of this source tree.
 import sparseconvnet
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class ConvolutionFunction(Function):
    @staticmethod
    def forward(
-        ctx,
+            ctx,
-        input_features,
+            input_features,
-        weight,
+            weight,
-        bias,
+            bias,
-        input_metadata,
+            input_metadata,
-        input_spatial_size,
+            input_spatial_size,
-        output_spatial_size,
+            output_spatial_size,
-        dimension,
+            dimension,
-        filter_size,
+            filter_size,
-        filter_stride):
+            filter_stride):
-        output_features=input_features.new()
+        output_features = input_features.new()
-        ctx.input_features=input_features
+        ctx.input_metadata = input_metadata
-        ctx.input_metadata=input_metadata
+        ctx.dimension = dimension
-        ctx.input_spatial_size=input_spatial_size
+        # ctx.weight=weight
-        ctx.weight=weight
+        # ctx.bias=bias
-        ctx.bias=bias
+        # ctx.output_spatial_size=output_spatial_size
-        ctx.output_features=input_features.new()
+        # ctx.filter_size=filter_size
-        ctx.output_spatial_size=output_spatial_size
+        # ctx.filter_stride=filter_stride
-        ctx.dimension=dimension
+        # bias??
-        ctx.filter_size=filter_size
+        ctx.save_for_backward(
-        ctx.filter_stride=filter_stride
+            input_features,
+            input_spatial_size,
+            weight,
+            bias,
+            output_spatial_size,
+            filter_size,
+            filter_stride)
        sparseconvnet.forward_pass_multiplyAdd_count +=\
            dim_typed_fn(
                dimension, input_features, 'Convolution_updateOutput')(
@@ -46,35 +53,38 @@ class ConvolutionFunction(Function):
                output_features,
                weight,
                bias if bias is not None else nullptr,
-                0, #remove this parameter!!
+                0,  # remove this parameter!!
                torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        sparseconvnet.forward_pass_hidden_states += output_features.nelement()
        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features, input_spatial_size, weight, bias, output_spatial_size, filter_size, filter_stride = ctx.saved_tensors
-        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero_())
+        grad_input = grad_output.new()
-        if ctx.bias is None:
+        grad_weight = grad_output.new().resize_as_(weight).zero_()
-            grad_bias=None
+        if bias is None:
+            grad_bias = None
        else:
-            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias).zero_())
+            grad_bias = grad_output.new().resize_as_(bias).zero_()
        dim_typed_fn(
-            ctx.dimension, ctx.input_features, 'Convolution_backward')(
+            ctx.dimension, input_features, 'Convolution_backward')(
-            ctx.input_spatial_size,
+            input_spatial_size,
-            ctx.output_spatial_size,
+            output_spatial_size,
-            ctx.filter_size,
+            filter_size,
-            ctx.filter_stride,
+            filter_stride,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            grad_output.data.contiguous(),
+            grad_output.contiguous(),
-            ctx.weight,
+            weight,
-            grad_weight.data,
+            grad_weight,
            grad_bias.data if grad_bias is not None else nullptr,
-            0, #remove this parameter
+            0,  # remove this parameter
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, grad_weight, grad_bias, None, None, None, None, None, None
 class Convolution(Module):
    def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
        Module.__init__(self)
@@ -82,7 +92,7 @@ class Convolution(Module):
        self.nIn = nIn
        self.nOut = nOut
        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_volume = self.filter_size.prod()
+        self.filter_volume = self.filter_size.prod().item()
        self.filter_stride = toLongTensor(dimension, filter_stride)
        std = (2.0 / nIn / self.filter_volume)**0.5
        self.weight = Parameter(torch.Tensor(
@@ -92,15 +102,17 @@ class Convolution(Module):
        if bias:
            self.bias = Parameter(torch.Tensor(nOut).zero_())
        else:
-            self.bias=None
+            self.bias = None
    def forward(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size =\
            (input.spatial_size - self.filter_size) / self.filter_stride + 1
-        assert ((output.spatial_size-1)*self.filter_stride+self.filter_size==input.spatial_size).all()
+        assert ((output.spatial_size - 1) * self.filter_stride +
-        output.features=ConvolutionFunction().apply(
+                self.filter_size == input.spatial_size).all()
+        output.features = ConvolutionFunction.apply(
            input.features,
            self.weight,
            self.bias,
@@ -117,12 +129,13 @@ class Convolution(Module):
        s = 'Convolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
        if self.filter_size.max() == self.filter_size.min() and\
                self.filter_stride.max() == self.filter_stride.min():
-            s = s + str(self.filter_size[0]) + '/' + str(self.filter_stride[0])
+            s = s + str(self.filter_size[0].item()) + \
+                '/' + str(self.filter_stride[0].item())
        else:
-            s = s + '(' + str(self.filter_size[0])
+            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.filter_stride[0])
+            s = s + ')/(' + str(self.filter_stride[0].item())
            for i in self.filter_stride[1:]:
                s = s + ',' + str(i)
            s = s + ')'

--- a/PyTorch/sparseconvnet/deconvolution.py
+++ b/PyTorch/sparseconvnet/deconvolution.py
@@ -5,34 +5,29 @@
 # LICENSE file in the root directory of this source tree.
 import sparseconvnet
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class DeconvolutionFunction(Function):
    @staticmethod
    def forward(
-        ctx,
+            ctx,
-        input_features,
+            input_features,
-        weight,
+            weight,
-        bias,
+            bias,
-        input_metadata,
+            input_metadata,
-        input_spatial_size,
+            input_spatial_size,
-        output_spatial_size,
+            output_spatial_size,
-        dimension,
+            dimension,
-        filter_size,
+            filter_size,
-        filter_stride):
+            filter_stride):
-        ctx.input_features=input_features
+        ctx.input_metadata = input_metadata
-        ctx.input_metadata=input_metadata
+        output_features = input_features.new()
-        ctx.input_spatial_size=input_spatial_size
+        ctx.dimension = dimension
-        ctx.weight=weight
-        ctx.bias=bias
-        ctx.output_features=input_features.new()
-        ctx.output_spatial_size=output_spatial_size
-        ctx.dimension=dimension
-        ctx.filter_size=filter_size
-        ctx.filter_stride=filter_stride
        sparseconvnet.forward_pass_multiplyAdd_count +=\
            dim_typed_fn(
                dimension, input_features, 'Deconvolution_updateOutput')(
@@ -42,38 +37,56 @@ class DeconvolutionFunction(Function):
                filter_stride,
                input_metadata.ffi,
                input_features,
-                ctx.output_features,
+                output_features,
                weight,
                bias if bias is not None else nullptr,
-                0, #remove this parameter!!
+                0,  # remove this parameter!!
                torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
-        sparseconvnet.forward_pass_hidden_states += ctx.output_features.nelement()
+        sparseconvnet.forward_pass_hidden_states += output_features.nelement()
-        return ctx.output_features
+        ctx.save_for_backward(input_features,
+                              output_features,
+                              input_spatial_size,
+                              weight,
+                              bias,
+                              output_spatial_size,
+                              filter_size,
+                              filter_stride)
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features,\
-        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero_())
+            output_features,\
-        if ctx.bias is None:
+            input_spatial_size,\
-            grad_bias=None
+            weight,\
+            bias,\
+            output_spatial_size,\
+            filter_size,\
+            filter_stride = ctx.saved_tensors
+        grad_input = grad_output.new()
+        grad_weight = grad_output.new().resize_as_(weight).zero_()
+        if bias is None:
+            grad_bias = None
        else:
-            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias).zero_())
+            grad_bias = grad_output.new().resize_as_(bias).zero_()
        dim_typed_fn(
-            ctx.dimension, ctx.input_features, 'Deconvolution_backward')(
+            ctx.dimension, input_features, 'Deconvolution_backward')(
-            ctx.input_spatial_size,
+            input_spatial_size,
-            ctx.output_spatial_size,
+            output_spatial_size,
-            ctx.filter_size,
+            filter_size,
-            ctx.filter_stride,
+            filter_stride,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            grad_output.data.contiguous(),
+            grad_output.contiguous(),
-            ctx.weight,
+            weight,
-            grad_weight.data,
+            grad_weight,
            grad_bias.data if grad_bias is not None else nullptr,
-            0, #remove this parameter
+            0,  # remove this parameter
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, grad_weight, grad_bias, None, None, None, None, None, None
 class Deconvolution(Module):
    def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
        Module.__init__(self)
@@ -81,7 +94,7 @@ class Deconvolution(Module):
        self.nIn = nIn
        self.nOut = nOut
        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_volume = self.filter_size.prod()
+        self.filter_volume = self.filter_size.prod().item()
        self.filter_stride = toLongTensor(dimension, filter_stride)
        std = (2.0 / nIn / self.filter_volume)**0.5
        self.weight = Parameter(torch.Tensor(
@@ -91,14 +104,15 @@ class Deconvolution(Module):
        if bias:
            self.bias = Parameter(torch.Tensor(nOut).zero_())
        else:
-            self.bias=None
+            self.bias = None
    def forward(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size =\
            (input.spatial_size - 1) * self.filter_stride + self.filter_size
-        output.features=DeconvolutionFunction().apply(
+        output.features = DeconvolutionFunction.apply(
            input.features,
            self.weight,
            self.bias,
@@ -115,12 +129,13 @@ class Deconvolution(Module):
        s = 'Deconvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
        if self.filter_size.max() == self.filter_size.min() and\
                self.filter_stride.max() == self.filter_stride.min():
-            s = s + str(self.filter_size[0]) + '/' + str(self.filter_stride[0])
+            s = s + str(self.filter_size[0].item()) + \
+                '/' + str(self.filter_stride[0].item())
        else:
-            s = s + '(' + str(self.filter_size[0])
+            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.filter_stride[0])
+            s = s + ')/(' + str(self.filter_stride[0].item())
            for i in self.filter_stride[1:]:
                s = s + ',' + str(i)
            s = s + ')'
@@ -128,5 +143,6 @@ class Deconvolution(Module):
    def input_spatial_size(self, out_size):
        in_size = (out_size - self.filter_size) / self.filter_stride + 1
-        assert ((in_size - 1) * self.filter_stride + self.filter_size == out_size).all()
+        assert ((in_size - 1) * self.filter_stride +
+                self.filter_size == out_size).all()
        return in_size
--- a/PyTorch/sparseconvnet/denseToSparse.py
+++ b/PyTorch/sparseconvnet/denseToSparse.py
@@ -12,12 +12,13 @@ Parameters:
 dimension : of the input field
 """
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .metadata import Metadata
 from .sparseConvNetTensor import SparseConvNetTensor
 class DenseToSparseFunction(Function):
    @staticmethod
    def forward(
@@ -26,32 +27,36 @@ class DenseToSparseFunction(Function):
            output_metadata,
            output_spatial_size,
            dimension):
-        ctx.dimension=dimension
+        ctx.dimension = dimension
-        a=input
+        aa = input.permute(
-        aa=a.permute(*([0,]+list(range(2,2+dimension))+[1,])).clone()
+            *([0, ] + list(range(2, 2 + dimension)) + [1, ])).clone()
-        ctx.aas=aa.size()
+        aas = aa.size()
-        nz=aa.abs().sum(dimension+1).view(aa.size()[0:-1])
+        nz = aa.abs().sum(dimension + 1).view(aa.size()[0:-1])
-        s=torch.LongTensor(nz.stride()).view(1,dimension+1)
+        s = torch.LongTensor(nz.stride()).view(1, dimension + 1)
-        nz=nz.nonzero()
+        nz = nz.nonzero()
-        s=s.type_as(nz)
+        s = s.type_as(nz)
-        aa=aa.view(-1,a.size(1))
+        aa = aa.view(-1, input.size(1))
-        ctx.aas2=aa.size()
+        aas2 = aa.size()
-        ctx.r=(nz*s.expand_as(nz)).sum(1).view(-1)
+        r = (nz * s.expand_as(nz)).sum(1).view(-1)
-        ctx.output_features=aa.index_select(0,ctx.r)
+        output_features = aa.index_select(0, ctx.r)
        dim_fn(dimension, 'createMetadataForDenseToSparse')(
            output_metadata.ffi,
            output_spatial_size,
            nz.cpu(),
            input.size(0))
-        return ctx.output_features
+        ctx.save_for_backwards(output_features, aas, aas2, r)
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new().resize_(ctx.aas2).zero_().index_copy_(0,ctx.r,grad_output.data))
+        output_features, aas, aas2, r = ctx.saved_tensors
-        grad_input=grad_input.view(ctx.aas).permute(*([0,ctx.dimension+1]+list(range(1,ctx.dimension+1))))
+        grad_input = grad_output.new().resize_(
+            aas2).zero_().index_copy_(0, r, grad_output.data)
+        grad_input = grad_input.view(aas).permute(
+            *([0, ctx.dimension + 1] + list(range(1, ctx.dimension + 1))))
        return grad_input, None, None, None
 class DenseToSparse(Module):
    def __init__(self, dimension):
        Module.__init__(self)
@@ -60,8 +65,8 @@ class DenseToSparse(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
        output.metadata = Metadata(self.dimension)
-        output.spatial_size=torch.LongTensor(list(input.size()[2:]))
+        output.spatial_size = torch.LongTensor(list(input.size()[2:]))
-        output.features=DenseToSparseFunction().apply(
+        output.features = DenseToSparseFunction.apply(
            input,
            output.metadata,
            output.spatial_size,

--- a/PyTorch/sparseconvnet/dropout.py
+++ b/PyTorch/sparseconvnet/dropout.py
@@ -4,43 +4,49 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class Dropout(Module):
-    def __init__(self, p = 0.5):
+    def __init__(self, p=0.5):
        Module.__init__(self)
        self.p = p
    def forward(self, input):
        output = SparseConvNetTensor()
        i = input.features
        if self.training:
-            m = i.new().resize_(1).expand_as(i).fill_(1-self.p)
+            m = i.new().resize_(1).expand_as(i).fill_(1 - self.p)
            output.features = i * torch.bernoulli(m)
        else:
            output.features = i * (1 - self.p)
        output.metadata = input.metadata
        output.spatial_size = input.spatial_size
        return output
    def input_spatial_size(self, out_size):
        return out_size
 class BatchwiseDropout(Module):
-    def __init__(self, p = 0.5):
+    def __init__(self, p=0.5):
        Module.__init__(self)
        self.p = p
    def forward(self, input):
        output = SparseConvNetTensor()
        i = input.features
        if self.training:
-            m = i.new().resize_(1).expand(1,i.shape[1]).fill_(1-self.p)
+            m = i.new().resize_(1).expand(1, i.shape[1]).fill_(1 - self.p)
            output.features = i * torch.bernoulli(m)
        else:
            output.features = i * (1 - self.p)
        output.metadata = input.metadata
        output.spatial_size = input.spatial_size
        return output
    def input_spatial_size(self, out_size):
        return out_size
--- a/PyTorch/sparseconvnet/identity.py
+++ b/PyTorch/sparseconvnet/identity.py
@@ -6,8 +6,10 @@
 from torch.nn import Module
 class Identity(Module):
    def forward(self, input):
        return input
    def input_spatial_size(self, out_size):
        return out_size
--- a/PyTorch/sparseconvnet/inputBatch.py
+++ b/PyTorch/sparseconvnet/inputBatch.py
@@ -9,6 +9,7 @@ from .metadata import Metadata
 from .utils import toLongTensor, dim_fn
 from .sparseConvNetTensor import SparseConvNetTensor
 class InputBatch(SparseConvNetTensor):
    def __init__(self, dimension, spatial_size):
        self.dimension = dimension
@@ -51,7 +52,7 @@ class InputBatch(SparseConvNetTensor):
          to add point (1,2,3) to sample 7, and (4,5,6) to sample 9 (0-indexed).
        """
-        l = locations.narrow(1,0,self.dimension)
+        l = locations[:, :self.dimension]
        assert l.min() >= 0 and (self.spatial_size.expand_as(l) - l).min() > 0
        dim_fn(self.dimension, 'setInputSpatialLocations')(
            self.metadata.ffi, self.features, locations, vectors, overwrite)

--- a/PyTorch/sparseconvnet/inputLayer.py
+++ b/PyTorch/sparseconvnet/inputLayer.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from torch.autograd import Function
+from torch.nn import Module, Parameter
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+from .metadata import Metadata
+class InputLayerFunction(Function):
+    @staticmethod
+    def forward(
+            ctx,
+            dimension,
+            metadata,
+            spatial_size,
+            coords,
+            input_features,
+            batch_size,
+            mode):
+        output_features = input_features.new()
+        ctx.dimension = dimension
+        ctx.metadata = metadata
+        ctx.dimension = dimension
+        dim_typed_fn(dimension, input_features, 'InputLayer_updateOutput')(
+            metadata.ffi,
+            spatial_size,
+            coords,
+            input_features,
+            output_features,
+            batch_size,
+            mode,
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr
+        )
+        return output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.data.new()
+        dim_typed_fn(
+            ctx.dimension,
+            grad_output.data,
+            'InputLayer_updateGradInput')(
+            ctx.metadata.ffi,
+            grad_input.data,
+            grad_output.contiguous().data,
+            torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr)
+        return None, None, None, None, grad_input, None, None
+class InputLayer(Module):
+    def __init__(self, dimension, spatial_size, mode=3):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.spatial_size = toLongTensor(dimension, spatial_size)
+        self.mode = mode
+    # (coords,input_features,batch_size or None) = input
+    def forward(self, input):
+        output = SparseConvNetTensor(
+            metadata=Metadata(
+                self.dimension),
+            spatial_size=self.spatial_size)
+        output.features = InputLayerFunction.apply(
+            self.dimension,
+            output.metadata,
+            self.spatial_size,
+            input[0],
+            input[1],
+            0 if len(input == 2) else input[2],
+            self.mode
+        )
+        return output
+class BLInputLayerFunction(Function):
+    @staticmethod
+    def forward(
+            ctx,
+            dimension,
+            metadata,
+            spatial_size,
+            coords,
+            input_features,
+            mode):
+        output_features = input_features.new()
+        ctx.metadata = metadata
+        ctx.dimension = dimension
+        dim_typed_fn(dimension, input_features, 'BLInputLayer_updateOutput')(
+            metadata.ffi,
+            spatial_size,
+            coords,
+            input_features,
+            output_features,
+            mode,
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr
+        )
+        return output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.data.new()
+        dim_typed_fn(
+            ctx.dimension,
+            grad_output.data,
+            'BLInputLayer_updateGradInput')(
+            ctx.metadata.ffi,
+            grad_input.data,
+            grad_output.contiguous().data,
+            torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr)
+        return None, None, None, None, grad_input, None
+class BLInputLayer(Module):
+    def __init__(self, dimension, spatial_size, mode=3):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.spatial_size = toLongTensor(dimension, spatial_size)
+        self.mode = mode
+    # (coords,input_features) = input
+    def forward(self, input):
+        output = SparseConvNetTensor(
+            metadata=Metadata(
+                self.dimension),
+            spatial_size=self.spatial_size)
+        output.features = BLInputLayerFunction.apply(
+            self.dimension,
+            output.metadata,
+            self.spatial_size,
+            input[0],
+            input[1],
+            self.mode
+        )
+        return output
+class BLOutputLayerFunction(Function):
+    @staticmethod
+    def forward(
+            ctx,
+            dimension,
+            metadata,
+            input_features):
+        output_features = input_features.new()
+        ctx.metadata = metadata
+        ctx.dimension = dimension
+        dim_typed_fn(dimension, input_features, 'BLOutputLayer_updateOutput')(
+            metadata.ffi,
+            input_features,
+            output_features,
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr
+        )
+        return output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.data.new()
+        dim_typed_fn(
+            ctx.dimension,
+            grad_output.data,
+            'BLOutputLayer_updateGradInput')(
+            ctx.metadata.ffi,
+            grad_input.data,
+            grad_output.contiguous().data,
+            torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr)
+        return None, None, grad_input
+class BLOutputLayer(Module):
+    def __init__(self, dimension):
+        Module.__init__(self)
+        self.dimension = dimension
+    def forward(self, input):
+        output = BLOutputLayerFunction.apply(
+            self.dimension,
+            input.metadata,
+            input.features
+        )
+        return output
--- a/PyTorch/sparseconvnet/legacy/__init__.py
+++ b/PyTorch/sparseconvnet/legacy/__init__.py
@@ -25,7 +25,7 @@ from .networkInNetwork import NetworkInNetwork
 from .reLU import ReLU
 from .sequential import Sequential
 from .sparseToDense import SparseToDense
-from .validConvolution import ValidConvolution
+from .submanifoldConvolution import SubmanifoldConvolution
 from .networkArchitectures import *
 from .classificationTrainValidate import ClassificationTrainValidate
 from .misc import *
--- a/PyTorch/sparseconvnet/legacy/averagePooling.py
+++ b/PyTorch/sparseconvnet/legacy/averagePooling.py
@@ -25,7 +25,10 @@ class AveragePooling(SparseModule):
        self.output.metadata = input.metadata
        self.output.spatial_size =\
            (input.spatial_size - self.pool_size) / self.pool_stride + 1
-        dim_typed_fn(self.dimension, input.features, 'AveragePooling_updateOutput')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'AveragePooling_updateOutput')(
            input.spatial_size,
            self.output.spatial_size,
            self.pool_size,

--- a/PyTorch/sparseconvnet/legacy/batchNormalization.py
+++ b/PyTorch/sparseconvnet/legacy/batchNormalization.py
@@ -21,6 +21,7 @@ from . import SparseModule
 from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
 from ..sparseConvNetTensor import SparseConvNetTensor
 class BatchNormalization(SparseModule):
    def __init__(
            self,
@@ -48,7 +49,7 @@ class BatchNormalization(SparseModule):
        self.gradInput = torch.Tensor()
    def updateOutput(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nPlanes
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nPlanes
        self.output.metadata = input.metadata
        self.output.spatial_size = input.spatial_size
        typed_fn(input.features, 'BatchNormalization_updateOutput')(
@@ -112,6 +113,8 @@ class BatchNormReLU(BatchNormalization):
        s = 'BatchNormReLU(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'
        return s
 class BatchNormLeakyReLU(BatchNormalization):
    def __init__(self, nPlanes, eps=1e-4, momentum=0.9):
        BatchNormalization.__init__(self, nPlanes, eps, momentum, True, 0.333)
@@ -121,6 +124,7 @@ class BatchNormLeakyReLU(BatchNormalization):
            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'
        return s
 class BatchNormalizationInTensor(BatchNormalization):
    def __init__(
            self,

--- a/PyTorch/sparseconvnet/legacy/batchwiseDropout.py
+++ b/PyTorch/sparseconvnet/legacy/batchwiseDropout.py
@@ -21,6 +21,7 @@ from . import SparseModule
 from ..utils import toLongTensor, typed_fn
 from ..sparseConvNetTensor import SparseConvNetTensor
 class BatchwiseDropout(SparseModule):
    def __init__(
            self,
@@ -39,9 +40,9 @@ class BatchwiseDropout(SparseModule):
    def updateOutput(self, input):
        if self.train:
-            self.noise.bernoulli_(1-self.p)
+            self.noise.bernoulli_(1 - self.p)
        else:
-            self.noise.fill_(1-self.p)
+            self.noise.fill_(1 - self.p)
        if self.inplace:
            self.output = input
@@ -50,11 +51,7 @@ class BatchwiseDropout(SparseModule):
            self.output.spatialSize = input.spatialSize
        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateOutput')(
-            input.features,
+            input.features, self.output.features, self.noise, self.leakiness)
-            self.output.features,
-            self.noise,
-            self.leakiness
-            )
        return self.output
    def updateGradInput(self, input, gradOutput):
@@ -67,7 +64,7 @@ class BatchwiseDropout(SparseModule):
            gradOutput,
            self.noise,
            self.leakiness
-            )
+        )
        return self.gradInput
    def type(self, t, tensorCache=None):
@@ -94,6 +91,7 @@ class BatchwiseDropout(SparseModule):
        s = s + ')'
        return s
 class BatchwiseDropoutInTensor(BatchwiseDropout):
    def __init__(
            self,
@@ -106,9 +104,9 @@ class BatchwiseDropoutInTensor(BatchwiseDropout):
    def updateOutput(self, input):
        if self.train:
-            self.noise.bernoulli_(1-self.p)
+            self.noise.bernoulli_(1 - self.p)
        else:
-            self.noise.fill_(1-self.p)
+            self.noise.fill_(1 - self.p)
        self.output.metadata = input.metadata
        self.output.spatial_size = input.spatial_size
@@ -116,12 +114,13 @@ class BatchwiseDropoutInTensor(BatchwiseDropout):
        o = self.output.features.narrow(
            1, self.output_column_offset, self.nPlanes)
-        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateOutput')(
+        typed_fn(
+            input.features,
+            'BatchwiseMultiplicativeDropout_updateOutput')(
            input.features,
            o,
            self.noise,
-            self.leakiness
+            self.leakiness)
-            )
        return self.output
    def updateGradInput(self, input, gradOutput):
@@ -130,17 +129,12 @@ class BatchwiseDropoutInTensor(BatchwiseDropout):
        d_o = gradOutput.narrow(1, self.output_column_offset, self.nPlanes)
        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateGradInput')(
-            input.features,
+            input.features, self.gradInput, d_o, self.noise, self.leakiness)
-            self.gradInput,
-            d_o,
-            self.noise,
-            self.leakiness
-            )
        return self.gradInput
    def __repr__(self):
-        s = 'BatchwiseDropoutInTensor(' + str(self.nPlanes) + ',p=' + str(self.p) + \
+        s = 'BatchwiseDropoutInTensor(' + str(self.nPlanes) + ',p=' + str(
-            ',column_offset=' + str(self.output_column_offset)
+            self.p) + ',column_offset=' + str(self.output_column_offset)
        if self.leakiness > 0:
            s = s + ',leakiness=' + str(self.leakiness)
        s = s + ')'

--- a/PyTorch/sparseconvnet/legacy/cAddTable.py
+++ b/PyTorch/sparseconvnet/legacy/cAddTable.py
@@ -7,7 +7,7 @@
 """
 Assume all the inputs have identical SparseGrids and input[i].nActive
 Assume input[0].nPlanes >= input[i].nPlanes for all i=1,#input
-output.validRules is taken from input[0].validRules (could do set union?)
+output.submanifoldRules is taken from input[0].submanifoldRules (could do set union?)
 (for resnets, make sure the residual link is input[1])
 """

--- a/PyTorch/sparseconvnet/legacy/convolution.py
+++ b/PyTorch/sparseconvnet/legacy/convolution.py
@@ -36,7 +36,7 @@ class Convolution(SparseModule):
        self.gradInput = torch.Tensor()
    def updateOutput(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        self.output.metadata = input.metadata
        self.output.spatial_size =\
            (input.spatial_size - self.filter_size) / self.filter_stride + 1

--- a/PyTorch/sparseconvnet/legacy/deconvolution.py
+++ b/PyTorch/sparseconvnet/legacy/deconvolution.py
@@ -35,7 +35,7 @@ class Deconvolution(SparseModule):
        self.gradInput = torch.Tensor()
    def updateOutput(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        self.output.metadata = input.metadata
        self.output.spatial_size =\
            (input.spatial_size - 1) * self.filter_stride + self.filter_size

--- a/PyTorch/sparseconvnet/legacy/denseNetBlock.py
+++ b/PyTorch/sparseconvnet/legacy/denseNetBlock.py
@@ -11,7 +11,7 @@ from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr, set
 from ..sparseConvNetTensor import SparseConvNetTensor
 from .batchNormalization import *
 from .affineReLUTrivialConvolution import AffineReLUTrivialConvolution
-from .validConvolution import ValidConvolution
+from .submanifoldConvolution import SubmanifoldConvolution
 import math
@@ -43,7 +43,7 @@ class DenseNetBlock(Container):
            self.add(BatchNormalization(nFeaturesB))
            # Module 4*i+3
            self.add(
-                ValidConvolution(
+                SubmanifoldConvolution(
                    dimension,
                    nFeaturesB,
                    growthRate,

--- a/PyTorch/sparseconvnet/legacy/denseToSparse.py
+++ b/PyTorch/sparseconvnet/legacy/denseToSparse.py
@@ -18,26 +18,28 @@ from ..utils import dim_fn, nullptr
 from ..sparseConvNetTensor import SparseConvNetTensor
 from ..metadata import Metadata
 class DenseToSparse(SparseModule):
    def __init__(self, dimension):
        SparseModule.__init__(self)
        self.dimension = dimension
-        self.output = SparseConvNetTensor(torch.Tensor(),Metadata(dimension))
+        self.output = SparseConvNetTensor(torch.Tensor(), Metadata(dimension))
        self.gradInput = torch.Tensor()
    def updateOutput(self, input):
-        a=input
+        a = input
-        aa=a.permute(*([0,]+list(range(2,2+self.dimension))+[1,])).clone()
+        aa = a.permute(
-        self.aas=aa.size()
+            *([0, ] + list(range(2, 2 + self.dimension)) + [1, ])).clone()
-        nz=aa.abs().sum(self.dimension+1).view(aa.size()[0:-1])
+        self.aas = aa.size()
-        s=torch.LongTensor(nz.stride()).view(1,self.dimension+1)
+        nz = aa.abs().sum(self.dimension + 1).view(aa.size()[0:-1])
-        nz=nz.nonzero()
+        s = torch.LongTensor(nz.stride()).view(1, self.dimension + 1)
-        s=s.type_as(nz)
+        nz = nz.nonzero()
-        aa=aa.view(-1,a.size(1))
+        s = s.type_as(nz)
-        self.aas2=aa.size()
+        aa = aa.view(-1, a.size(1))
-        self.r=(nz*s.expand_as(nz)).sum(1).view(-1)
+        self.aas2 = aa.size()
-        self.output.features=aa.index_select(0,self.r)
+        self.r = (nz * s.expand_as(nz)).sum(1).view(-1)
-        self.output.spatial_size=torch.LongTensor(list(input.size()[2:]))
+        self.output.features = aa.index_select(0, self.r)
+        self.output.spatial_size = torch.LongTensor(list(input.size()[2:]))
        dim_fn(self.dimension, 'createMetadataForDenseToSparse')(
            self.output.metadata.ffi,
            self.output.spatial_size,
@@ -47,13 +49,15 @@ class DenseToSparse(SparseModule):
    def updateGradInput(self, input, gradOutput):
        self.gradInput.resize_(self.aas2).zero_()
-        self.gradInput.index_copy_(0,self.r,gradOutput)
+        self.gradInput.index_copy_(0, self.r, gradOutput)
-        self.gradInput=self.gradInput.view(self.aas).permute(*([0,self.dimension+1]+list(range(1,self.dimension+1))))
+        self.gradInput = self.gradInput.view(self.aas).permute(
+            *([0, self.dimension + 1] + list(range(1, self.dimension + 1))))
        return self.gradInput
    def clearState(self):
        SparseModule.clearState(self)
-        self.aas=None
+        self.aas = None
-        self.r=None
+        self.r = None
    def __repr__(self):
        return 'DenseToSparse(' + str(self.dimension) + ')'