Rename ValidConvolutions to SubmanifoldConvolutions, update for PyTorch 0.4 Tensor/Variable merge

d77687a6 · Benjamin Graham · Benjamin Thomas Graham · 297e04c0 · d77687a6 · d77687a6
Commit d77687a6 authored Mar 04, 2018 by Benjamin Graham Committed by Benjamin Thomas Graham Mar 05, 2018
20 changed files
--- a/PyTorch/sparseconvnet/legacy/maxPooling.py
+++ b/PyTorch/sparseconvnet/legacy/maxPooling.py
@@ -26,7 +26,10 @@ class MaxPooling(SparseModule):
        self.output.metadata = input.metadata
        self.output.spatial_size =\
            (input.spatial_size - self.pool_size) / self.pool_stride + 1
-        dim_typed_fn(self.dimension, input.features, 'MaxPooling_updateOutput')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'MaxPooling_updateOutput')(
            input.spatial_size,
            self.output.spatial_size,
            self.pool_size,
@@ -39,7 +42,10 @@ class MaxPooling(SparseModule):
        return self.output
    def updateGradInput(self, input, gradOutput):
-        dim_typed_fn(self.dimension, input.features, 'MaxPooling_updateGradInput')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'MaxPooling_updateGradInput')(
            input.spatial_size,
            self.output.spatial_size,
            self.pool_size,

--- a/PyTorch/sparseconvnet/legacy/misc.py
+++ b/PyTorch/sparseconvnet/legacy/misc.py
@@ -10,12 +10,13 @@ from .sparseModule import SparseModule
 from ..sparseConvNetTensor import SparseConvNetTensor
 from .batchNormalization import BatchNormalization
 class Tanh(SparseModule):
    def __init__(self):
        SparseModule.__init__(self)
-        self.module=nn.Tanh()
+        self.module = nn.Tanh()
        self.output = SparseConvNetTensor()
-        self.output.features=self.module.output
+        self.output.features = self.module.output
        self.gradInput = self.module.gradInput
    def updateOutput(self, input):
@@ -25,19 +26,20 @@ class Tanh(SparseModule):
        return self.output
    def updateGradInput(self, input, gradOutput):
-        self.module.updateGradInput(input.features,gradOutput)
+        self.module.updateGradInput(input.features, gradOutput)
        return self.gradInput
    def type(self, t, tensorCache=None):
        if t:
-            self.module.type(t,tensorCache)
+            self.module.type(t, tensorCache)
-            self.output.features=self.module.output
+            self.output.features = self.module.output
            self.gradInput = self.module.gradInput
 class ELU(SparseModule):
    def __init__(self):
        SparseModule.__init__(self)
-        self.module=nn.ELU()
+        self.module = nn.ELU()
        self.output = SparseConvNetTensor()
        self.gradInput = self.module.gradInput
@@ -48,14 +50,20 @@ class ELU(SparseModule):
        return self.output
    def updateGradInput(self, input, gradOutput):
-        self.module.updateGradInput(input.features,gradOutput)
+        self.module.updateGradInput(input.features, gradOutput)
        return self.gradInput
    def type(self, t, tensorCache=None):
        if t:
-            self.module.type(t,tensorCache)
+            self.module.type(t, tensorCache)
-            self.output.features=self.module.output
+            self.output.features = self.module.output
            self.gradInput = self.module.gradInput
 def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9):
-    return Sequential().add(BatchNormalization(nPlanes,eps,momentum)).add(ELU())
+    return Sequential().add(
+        BatchNormalization(
+            nPlanes,
+            eps,
+            momentum)).add(
+        ELU())
--- a/PyTorch/sparseconvnet/legacy/networkArchitectures.py
+++ b/PyTorch/sparseconvnet/legacy/networkArchitectures.py
@@ -20,7 +20,7 @@ from .cAddTable import CAddTable
 from .convolution import Convolution
 from .deconvolution import Deconvolution
 from .denseNetBlock import DenseNetBlock
-from .validConvolution import ValidConvolution
+from .submanifoldConvolution import SubmanifoldConvolution
 from .networkInNetwork import NetworkInNetwork
 from .batchNormalization import BatchNormReLU, BatchNormalizationInTensor
 from .maxPooling import MaxPooling
@@ -60,7 +60,7 @@ def DeepCNet(dimension, nInputPlanes, nPlanes, bn=True):
 def SparseVggNet(dimension, nInputPlanes, layers):
    """
    VGG style nets
-    Use valid convolutions
+    Use submanifold convolutions
    Also implements 'Plus'-augmented nets
    """
    nPlanes = nInputPlanes
@@ -71,19 +71,19 @@ def SparseVggNet(dimension, nInputPlanes, layers):
        elif x[0] == 'MP':
            m.add(MaxPooling(dimension, x[1], x[2]))
        elif x[0] == 'C' and len(x) == 2:
-            m.add(ValidConvolution(dimension, nPlanes, x[1], 3, False))
+            m.add(SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False))
            nPlanes = x[1]
            m.add(BatchNormReLU(nPlanes))
        elif x[0] == 'C' and len(x) == 3:
            m.add(ConcatTable()
                  .add(
-                ValidConvolution(dimension, nPlanes, x[1], 3, False)
+                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
            )
                .add(
                Sequential()
                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
                .add(BatchNormReLU(x[2]))
-                .add(ValidConvolution(dimension, x[2], x[2], 3, False))
+                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
                .add(BatchNormReLU(x[2]))
                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
            )).add(JoinTable([x[1], x[2]]))
@@ -92,28 +92,28 @@ def SparseVggNet(dimension, nInputPlanes, layers):
        elif x[0] == 'C' and len(x) == 4:
            m.add(ConcatTable()
                  .add(
-                ValidConvolution(dimension, nPlanes, x[1], 3, False)
+                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
            )
                .add(
                Sequential()
                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
                .add(BatchNormReLU(x[2]))
-                .add(ValidConvolution(dimension, x[2], x[2], 3, False))
+                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
                .add(BatchNormReLU(x[2]))
                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
            )
                .add(Sequential()
                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
                     .add(BatchNormReLU(x[3]))
-                     .add(ValidConvolution(dimension, x[3], x[3], 3, False))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
                     .add(BatchNormReLU(x[3]))
                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
                     .add(BatchNormReLU(x[3]))
-                     .add(ValidConvolution(dimension, x[3], x[3], 3, False))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
                     .add(BatchNormReLU(x[3]))
                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
                     .add(BatchNormReLU(x[3]))
-                     .add(ValidConvolution(dimension, x[3], x[3], 3, False))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
                     .add(BatchNormReLU(x[3]))
                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
                     )).add(JoinTable([x[1], x[2], x[3]]))
@@ -145,7 +145,7 @@ def SparseResNet(dimension, nInputPlanes, layers):
                    m.add(
                        ConcatTable().add(
                            Sequential().add(
-                                ValidConvolution(
+                                SubmanifoldConvolution(
                                    dimension,
                                    nPlanes,
                                    n,
@@ -158,7 +158,7 @@ def SparseResNet(dimension, nInputPlanes, layers):
                                    stride,
                                    False)) .add(
                                BatchNormReLU(n)) .add(
-                                ValidConvolution(
+                                SubmanifoldConvolution(
                                    dimension,
                                    n,
                                    n,
@@ -173,14 +173,14 @@ def SparseResNet(dimension, nInputPlanes, layers):
                        ConcatTable().add(
                            Sequential().add(
                                BatchNormReLU(nPlanes)) .add(
-                                ValidConvolution(
+                                SubmanifoldConvolution(
                                    dimension,
                                    nPlanes,
                                    n,
                                    3,
                                    False)) .add(
                                BatchNormReLU(n)) .add(
-                                ValidConvolution(
+                                SubmanifoldConvolution(
                                    dimension,
                                    n,
                                    n,
@@ -195,7 +195,7 @@ def SparseResNet(dimension, nInputPlanes, layers):
 def SparseDenseNet(dimension, nInputPlanes, layers):
    """
-    SparseConvNet meets DenseNets using valid convolutions
+    SparseConvNet meets DenseNets using submanifold convolutions
    Could do with a less confusing name
    """
    nPlanes = nInputPlanes

--- a/PyTorch/sparseconvnet/legacy/sparseToDense.py
+++ b/PyTorch/sparseconvnet/legacy/sparseToDense.py
@@ -27,12 +27,15 @@ class SparseToDense(SparseModule):
        self.dimension = dimension
        self.output = torch.Tensor()
        self.gradInput = torch.FloatTensor()
-        self.nPlanes=nPlanes
+        self.nPlanes = nPlanes
    def updateOutput(self, input):
        if not self.nPlanes:
-            self.nPlanes=input.features.size(1)
+            self.nPlanes = input.features.size(1)
-        dim_typed_fn(self.dimension, input.features, 'SparseToDense_updateOutput')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'SparseToDense_updateOutput')(
            input.spatial_size,
            input.metadata.ffi,
            input.features,
@@ -42,7 +45,10 @@ class SparseToDense(SparseModule):
        return self.output
    def updateGradInput(self, input, gradOutput):
-        dim_typed_fn(self.dimension, input.features, 'SparseToDense_updateGradInput')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'SparseToDense_updateGradInput')(
            input.spatial_size,
            input.metadata.ffi,
            input.features,

--- a/PyTorch/sparseconvnet/legacy/validConvolution.py
+++ b/PyTorch/sparseconvnet/legacy/validConvolution.py
@@ -10,7 +10,8 @@ from . import SparseModule
 from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
 from ..sparseConvNetTensor import SparseConvNetTensor
-class ValidConvolution(SparseModule):
+class SubmanifoldConvolution(SparseModule):
    def __init__(self, dimension, nIn, nOut, filter_size, bias):
        SparseModule.__init__(self)
        self.dimension = dimension
@@ -31,11 +32,11 @@ class ValidConvolution(SparseModule):
        self.gradInput = torch.Tensor()
    def updateOutput(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        self.output.metadata = input.metadata
        self.output.spatial_size = input.spatial_size
        s.forward_pass_multiplyAdd_count +=\
-            dim_typed_fn(self.dimension, input.features, 'ValidConvolution_updateOutput')(
+            dim_typed_fn(self.dimension, input.features, 'SubmanifoldConvolution_updateOutput')(
                input.spatial_size,
                self.filter_size,
                input.metadata.ffi,
@@ -51,7 +52,10 @@ class ValidConvolution(SparseModule):
    def backward(self, input, gradOutput, scale=1):
        assert scale == 1
-        dim_typed_fn(self.dimension, input.features, 'ValidConvolution_backward')(
+        dim_typed_fn(
+            self.dimension,
+            input.features,
+            'SubmanifoldConvolution_backward')(
            input.spatial_size,
            self.filter_size,
            input.metadata.ffi,
@@ -60,7 +64,9 @@ class ValidConvolution(SparseModule):
            gradOutput,
            self.weight,
            self.gradWeight,
-            optionalTensor(self, 'gradBias'),
+            optionalTensor(
+                self,
+                'gradBias'),
            self.filter_volume,
            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
        return self.gradInput
@@ -78,7 +84,8 @@ class ValidConvolution(SparseModule):
            self.gradBias = self.gradBias.type(t)
    def __repr__(self):
-        s = 'ValidConvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
+        s = 'SubmanifoldConvolution ' + \
+            str(self.nIn) + '->' + str(self.nOut) + ' C'
        if self.filter_size.max() == self.filter_size.min():
            s = s + str(self.filter_size[0])
        else:

--- a/PyTorch/sparseconvnet/maxPooling.py
+++ b/PyTorch/sparseconvnet/maxPooling.py
@@ -4,11 +4,12 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class MaxPoolingFunction(Function):
    @staticmethod
    def forward(
@@ -21,15 +22,10 @@ class MaxPoolingFunction(Function):
            pool_size,
            pool_stride,
            nFeaturesToDrop):
-        ctx.input_features=input_features
+        ctx.input_metadata = input_metadata
-        ctx.input_metadata=input_metadata
-        ctx.input_spatial_size = input_spatial_size
-        ctx.output_spatial_size = output_spatial_size
        ctx.dimension = dimension
-        ctx.pool_size = pool_size
-        ctx.pool_stride = pool_stride
        ctx.nFeaturesToDrop = nFeaturesToDrop
-        ctx.output_features = input_features.new()
+        output_features = input_features.new()
        dim_typed_fn(dimension, input_features, 'MaxPooling_updateOutput')(
            input_spatial_size,
            output_spatial_size,
@@ -37,27 +33,40 @@ class MaxPoolingFunction(Function):
            pool_stride,
            input_metadata.ffi,
            input_features,
-            ctx.output_features,
+            output_features,
            nFeaturesToDrop,
            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
-        return ctx.output_features
+        ctx.save_for_backward(
+            input_features,
+            output_features,
+            input_spatial_size,
+            output_spatial_size,
+            pool_size,
+            pool_stride)
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features,\
+            output_features,\
+            input_spatial_size,\
+            output_spatial_size,\
+            pool_size,\
+            pool_stride = ctx.saved_tensors
+        grad_input = grad_output.new()
        dim_typed_fn(
-            ctx.dimension, ctx.input_features, 'MaxPooling_updateGradInput')(
+            ctx.dimension, input_features, 'MaxPooling_updateGradInput')(
-            ctx.input_spatial_size,
+            input_spatial_size,
-            ctx.output_spatial_size,
+            output_spatial_size,
-            ctx.pool_size,
+            pool_size,
-            ctx.pool_stride,
+            pool_stride,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            ctx.output_features,
+            output_features,
-            grad_output.data,
+            grad_output,
            ctx.nFeaturesToDrop,
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, None, None, None, None, None, None, None
@@ -68,29 +77,39 @@ class MaxPooling(Module):
        self.pool_size = toLongTensor(dimension, pool_size)
        self.pool_stride = toLongTensor(dimension, pool_stride)
        self.nFeaturesToDrop = nFeaturesToDrop
    def forward(self, input):
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size = (
            input.spatial_size - self.pool_size) / self.pool_stride + 1
-        assert ((output.spatial_size-1)*self.pool_stride+self.pool_size==input.spatial_size).all()
+        assert ((output.spatial_size - 1) * self.pool_stride +
-        output.features =  MaxPoolingFunction().apply(
+                self.pool_size == input.spatial_size).all()
-            input.features, input.metadata, input.spatial_size,
+        output.features = MaxPoolingFunction.apply(
-            output.spatial_size, self.dimension,self.pool_size,self.pool_stride,
+            input.features,
+            input.metadata,
+            input.spatial_size,
+            output.spatial_size,
+            self.dimension,
+            self.pool_size,
+            self.pool_stride,
            self.nFeaturesToDrop)
        return output
    def input_spatial_size(self, out_size):
        return (out_size - 1) * self.pool_stride + self.pool_size
    def __repr__(self):
        s = 'MaxPooling'
        if self.pool_size.max() == self.pool_size.min() and\
                self.pool_stride.max() == self.pool_stride.min():
-            s = s + str(self.pool_size[0]) + '/' + str(self.pool_stride[0])
+            s = s + str(self.pool_size[0].item()) + \
+                '/' + str(self.pool_stride[0].item())
        else:
-            s = s + '(' + str(self.pool_size[0])
+            s = s + '(' + str(self.pool_size[0].item())
            for i in self.pool_size[1:]:
                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.pool_stride[0])
+            s = s + ')/(' + str(self.pool_stride[0].item())
            for i in self.pool_stride[1:]:
                s = s + ',' + str(i)
            s = s + ')'

--- a/PyTorch/sparseconvnet/metadata.py
+++ b/PyTorch/sparseconvnet/metadata.py
@@ -6,7 +6,7 @@
 """
 Store Metadata relating to which spatial locations are active at each scale.
-Convolutions, valid convolutions and 'convolution reversing' deconvolutions
+Convolutions, submanifold convolutions and 'convolution reversing' deconvolutions
 all coexist within the same MetaData object as long as each spatial size
 only occurs once.

--- a/PyTorch/sparseconvnet/networkArchitectures.py
+++ b/PyTorch/sparseconvnet/networkArchitectures.py
@@ -21,7 +21,7 @@ from .tables import *
 def SparseVggNet(dimension, nInputPlanes, layers):
    """
    VGG style nets
-    Use valid convolutions
+    Use submanifold convolutions
    Also implements 'Plus'-augmented nets
    """
    nPlanes = nInputPlanes
@@ -216,7 +216,7 @@ def ResNetUNet(dimension, nPlanes, reps, depth=4):
        def __init__(self):
            nn.Module.__init__(self)
            self.sparseModel = scn.Sequential().add(
-               scn.ValidConvolution(3, nInputFeatures, 64, 3, False)).add(
+               scn.SubmanifoldConvolution(3, nInputFeatures, 64, 3, False)).add(
               scn.ResNetUNet(3, 64, 2, 4))
            self.linear = nn.Linear(64, nClasses)
        def forward(self,x):

--- a/PyTorch/sparseconvnet/networkInNetwork.py
+++ b/PyTorch/sparseconvnet/networkInNetwork.py
@@ -5,49 +5,57 @@
 # LICENSE file in the root directory of this source tree.
 import sparseconvnet
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class NetworkInNetworkFunction(Function):
    @staticmethod
    def forward(
-        ctx,
+            ctx,
-        input_features,
+            input_features,
-        weight,
+            weight,
-        bias):
+            bias):
-        ctx.input_features=input_features
+        output_features = input_features.new()
-        ctx.weight=weight
+        ctx.save_forbackwards(input_features,
-        ctx.bias=bias
+                              output_features,
-        ctx.output_features=input_features.new()
+                              weight,
+                              bias)
        sparseconvnet.forward_pass_multiplyAdd_count +=\
            typed_fn(input_features, 'NetworkInNetwork_updateOutput')(
                input_features,
-                ctx.output_features,
+                output_features,
                weight,
                bias if bias is not None else nullptr)
-        sparseconvnet.forward_pass_hidden_states += ctx.output_features.nelement()
+        sparseconvnet.forward_pass_hidden_states += output_features.nelement()
-        return ctx.output_features
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features,\
-        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero_())
+            output_features,\
-        if ctx.bias is None:
+            weight,\
-            grad_bias=None
+            bias = ctx.saved_tensors
+        grad_input = grad_output.new()
+        grad_weight = grad_output.new().resize_as_(weight).zero_()
+        if bias is None:
+            grad_bias = None
        else:
-            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias))
+            grad_bias = grad_output.new().resize_as_(bias)
-        typed_fn(ctx.input_features, 'NetworkInNetwork_updateGradInput')(
+        typed_fn(input_features, 'NetworkInNetwork_updateGradInput')(
-            grad_input.data,
+            grad_input,
-            grad_output.data,
+            grad_output,
-            ctx.weight)
+            weight)
-        typed_fn(ctx.input_features, 'NetworkInNetwork_accGradParameters')(
+        typed_fn(input_features, 'NetworkInNetwork_accGradParameters')(
-            ctx.input_features,
+            input_features,
-            grad_output.data,
+            grad_output,
-            grad_weight.data,
+            grad_weight,
            grad_bias.data if grad_bias is not None else nullptr)
        return grad_input, grad_weight, grad_bias
 class NetworkInNetwork(Module):
    def __init__(self, nIn, nOut, bias=False):
        Module.__init__(self)
@@ -62,13 +70,14 @@ class NetworkInNetwork(Module):
            self.bias = Parameter(torch.Tensor(nOut).
                                  _())
        else:
-            self.bias=None
+            self.bias = None
    def forward(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size = input.spatial_size
-        output.features=NetworkInNetworkFunction().apply(
+        output.features = NetworkInNetworkFunction.apply(
            input.features,
            self.weight,
            self.bias)

--- a/PyTorch/sparseconvnet/sequential.py
+++ b/PyTorch/sparseconvnet/sequential.py
@@ -7,11 +7,13 @@
 from torch.nn import Sequential as S
 from .utils import set
 class Sequential(S):
    def input_spatial_size(self, out_size):
        for m in reversed(self._modules):
            out_size = self._modules[m].input_spatial_size(out_size)
        return out_size
    def add(self, module):
-        self._modules[str(len(self._modules))]=module
+        self._modules[str(len(self._modules))] = module
        return self
--- a/PyTorch/sparseconvnet/sparseConvNetTensor.py
+++ b/PyTorch/sparseconvnet/sparseConvNetTensor.py
@@ -22,7 +22,12 @@ class SparseConvNetTensor(object):
            spatial_size = self.spatial_size
        t = torch.LongTensor()
-        dim_fn(self.metadata.dimension, 'getSpatialLocations')(self.metadata.ffi, spatial_size, t)
+        dim_fn(
+            self.metadata.dimension,
+            'getSpatialLocations')(
+            self.metadata.ffi,
+            spatial_size,
+            t)
        return t
    def type(self, t=None):
@@ -50,5 +55,8 @@ class SparseConvNetTensor(object):
    def to_variable(self, requires_grad=False, volatile=False):
        "Convert self.features to a variable for use with modern PyTorch interface."
-        self.features = Variable(self.features, requires_grad=requires_grad, volatile=volatile)
+        self.features = Variable(
+            self.features,
+            requires_grad=requires_grad,
+            volatile=volatile)
        return self
--- a/PyTorch/sparseconvnet/sparseToDense.py
+++ b/PyTorch/sparseconvnet/sparseToDense.py
@@ -15,11 +15,12 @@ Parameters:
 dimension : of the input field,
 """
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class SparseToDenseFunction(Function):
    @staticmethod
    def forward(
@@ -29,12 +30,14 @@ class SparseToDenseFunction(Function):
            spatial_size,
            dimension,
            nPlanes):
-        ctx.input_metadata=input_metadata
+        ctx.input_metadata = input_metadata
-        ctx.spatial_size=spatial_size
+        ctx.dimension = dimension
-        ctx.dimension=dimension
+        ctx.save_for_backward(input_features, spatial_size)
-        ctx.input_features=input_features
        output = input_features.new()
-        dim_typed_fn(ctx.dimension, input_features, 'SparseToDense_updateOutput')(
+        dim_typed_fn(
+            ctx.dimension,
+            input_features,
+            'SparseToDense_updateOutput')(
            spatial_size,
            input_metadata.ffi,
            input_features,
@@ -42,28 +45,41 @@ class SparseToDenseFunction(Function):
            torch.cuda.IntTensor() if input_features.is_cuda else nullptr,
            nPlanes)
        return output
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        grad_input = grad_output.new()
-        dim_typed_fn(ctx.dimension, ctx.input_features, 'SparseToDense_updateGradInput')(
+        input_features, spatial_size = ctx.saved_tensors
-            ctx.spatial_size,
+        dim_typed_fn(
+            ctx.dimension,
+            input_features,
+            'SparseToDense_updateGradInput')(
+            spatial_size,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            grad_output.data,
+            grad_output,
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, None, None, None, None
 class SparseToDense(Module):
    def __init__(self, dimension, nPlanes):
        Module.__init__(self)
        self.dimension = dimension
-        self.nPlanes=nPlanes
+        self.nPlanes = nPlanes
    def forward(self, input):
-        return SparseToDenseFunction().apply(input.features,input.metadata,input.spatial_size,self.dimension,self.nPlanes)
+        return SparseToDenseFunction.apply(
+            input.features,
+            input.metadata,
+            input.spatial_size,
+            self.dimension,
+            self.nPlanes)
    def input_spatial_size(self, out_size):
        return out_size
    def __repr__(self):
-        return 'SparseToDense(' + str(self.dimension) + ','+ str(self.nPlanes)+ ')'
+        return 'SparseToDense(' + str(self.dimension) + \
+            ',' + str(self.nPlanes) + ')'
--- a/PyTorch/sparseconvnet/submanifoldConvolution.py
+++ b/PyTorch/sparseconvnet/submanifoldConvolution.py
@@ -4,79 +4,88 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-# 'SubmanifoldConvolution == ValidConvolution'
+# 'SubmanifoldConvolution == SubmanifoldConvolution'
 import sparseconvnet
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
-class ValidConvolutionFunction(Function):
+class SubmanifoldConvolutionFunction(Function):
    @staticmethod
    def forward(
-        ctx,
+            ctx,
-        input_features,
+            input_features,
-        weight,
+            weight,
-        bias,
+            bias,
-        input_metadata,
+            input_metadata,
-        spatial_size,
+            spatial_size,
-        dimension,
+            dimension,
-        filter_size):
+            filter_size):
-        ctx.input_features=input_features
+        ctx.input_metadata = input_metadata
-        ctx.input_metadata=input_metadata
+        ctx.dimension = dimension
-        ctx.spatial_size=spatial_size
+        # ctx.input_features=input_features
-        ctx.weight=weight
+        # ctx.spatial_size=spatial_size
-        ctx.bias=bias
+        # ctx.weight=weight
-        ctx.output_features=input_features.new()
+        # ctx.bias=bias
-        ctx.dimension=dimension
+        # ctx.filter_size=filter_size
-        ctx.filter_size=filter_size
+        output_features = input_features.new()
+        ctx.save_for_backward(
+            input_features,
+            spatial_size,
+            weight,
+            bias,
+            filter_size)
        sparseconvnet.forward_pass_multiplyAdd_count +=\
            dim_typed_fn(
-                dimension, input_features, 'ValidConvolution_updateOutput')(
+                dimension, input_features, 'SubmanifoldConvolution_updateOutput')(
                spatial_size,
                filter_size,
                input_metadata.ffi,
-                input_features,
+                input_features.data,
-                ctx.output_features,
+                output_features.data,
-                weight,
+                weight.data,
-                bias if bias is not None else nullptr,
+                bias.data if bias is not None else nullptr,
-                0, #remove this parameter!!
+                0,  # remove this parameter!!
                torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
-        sparseconvnet.forward_pass_hidden_states += ctx.output_features.nelement()
+        sparseconvnet.forward_pass_hidden_states += output_features.nelement()
-        return ctx.output_features
+        return output_features
    @staticmethod
    def backward(ctx, grad_output):
-        grad_input=Variable(grad_output.data.new())
+        input_features, spatial_size, weight, bias, filter_size = ctx.saved_tensors
-        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero_())
+        grad_input = grad_output.new()
-        if ctx.bias is None:
+        grad_weight = grad_output.new().resize_as_(weight).zero_()
-            grad_bias=None
+        if bias is None:
+            grad_bias = None
        else:
-            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias).zero_())
+            grad_bias = grad_output.new().resize_as_(bias).zero_()
        dim_typed_fn(
-            ctx.dimension, ctx.input_features, 'ValidConvolution_backward')(
+            ctx.dimension, input_features, 'SubmanifoldConvolution_backward')(
-            ctx.spatial_size,
+            spatial_size,
-            ctx.filter_size,
+            filter_size,
            ctx.input_metadata.ffi,
-            ctx.input_features,
+            input_features,
-            grad_input.data,
+            grad_input,
-            grad_output.data.contiguous(),
+            grad_output.contiguous(),
-            ctx.weight,
+            weight,
-            grad_weight.data,
+            grad_weight,
            grad_bias.data if grad_bias is not None else nullptr,
-            0, #remove this parameter
+            0,  # remove this parameter
-            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
        return grad_input, grad_weight, grad_bias, None, None, None, None
-class ValidConvolution(Module):
+class SubmanifoldConvolution(Module):
    def __init__(self, dimension, nIn, nOut, filter_size, bias):
        Module.__init__(self)
        self.dimension = dimension
        self.nIn = nIn
        self.nOut = nOut
        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_volume = self.filter_size.prod()
+        self.filter_volume = self.filter_size.prod().item()
        std = (2.0 / nIn / self.filter_volume)**0.5
        self.weight = Parameter(torch.Tensor(
            nIn * self.filter_volume, nOut
@@ -87,11 +96,11 @@ class ValidConvolution(Module):
            self.bias = None
    def forward(self, input):
-        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
        output = SparseConvNetTensor()
        output.metadata = input.metadata
        output.spatial_size = input.spatial_size
-        output.features=ValidConvolutionFunction.apply(
+        output.features = SubmanifoldConvolutionFunction.apply(
            input.features,
            self.weight,
            self.bias,
@@ -102,11 +111,12 @@ class ValidConvolution(Module):
        return output
    def __repr__(self):
-        s = 'ValidConvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
+        s = 'SubmanifoldConvolution ' + \
+            str(self.nIn) + '->' + str(self.nOut) + ' C'
        if self.filter_size.max() == self.filter_size.min():
-            s = s + str(self.filter_size[0])
+            s = s + str(self.filter_size[0].item())
        else:
-            s = s + '(' + str(self.filter_size[0])
+            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
                s = s + ',' + str(i)
            s = s + ')'
@@ -115,5 +125,6 @@ class ValidConvolution(Module):
    def input_spatial_size(self, out_size):
        return out_size
-class SubmanifoldConvolution(ValidConvolution):
+class ValidConvolution(SubmanifoldConvolution):
    pass
--- a/PyTorch/sparseconvnet/tables.py
+++ b/PyTorch/sparseconvnet/tables.py
@@ -4,36 +4,43 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-from torch.autograd import Function, Variable
+from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 class JoinTable(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
        output.metadata = input[0].metadata
        output.spatial_size = input[0].spatial_size
-        output.features=torch.cat([i.features for i in input],1)
+        output.features = torch.cat([i.features for i in input], 1)
        return output
-    def input_spatial_size(self,out_size):
+    def input_spatial_size(self, out_size):
        return out_size
 class AddTable(Module):
    def forward(self, input):
        output = SparseConvNetTensor()
        output.metadata = input[0].metadata
        output.spatial_size = input[0].spatial_size
-        output.features=sum([i.features for i in input])
+        output.features = sum([i.features for i in input])
        return output
-    def input_spatial_size(self,out_size):
+    def input_spatial_size(self, out_size):
        return out_size
 class ConcatTable(Module):
    def forward(self, input):
        return [module(input) for module in self._modules.values()]
    def add(self, module):
-        self._modules[str(len(self._modules))]=module
+        self._modules[str(len(self._modules))] = module
        return self
-    def input_spatial_size(self,out_size):
+    def input_spatial_size(self, out_size):
        return self._modules['0'].input_spatial_size(out_size)
--- a/PyTorch/sparseconvnet/utils.py
+++ b/PyTorch/sparseconvnet/utils.py
@@ -10,8 +10,11 @@ from cffi import FFI
 def toLongTensor(dimension, x):
-    if type(x).__name__ == 'LongTensor':
+    if hasattr(x, 'type') and x.type() == 'torch.LongTensor':
        return x
+    elif isinstance(x, (list, tuple)):
+        assert len(x) == dimension
+        return torch.LongTensor(x)
    else:
        return torch.LongTensor(dimension).fill_(x)
@@ -28,16 +31,18 @@ def dim_fn(dimension, name):
 def typed_fn(t, name):
-    # print('typed_fn',t.features.type(),name)
+    # print('typed_fn',t.type(),name)
    return getattr(scn, 'scn_' + typeTable[t.type()] + '_' + name)
 def dim_typed_fn(dimension, t, name):
-    # print('dim_typed_fn',dimension,t.features.type(),name)
+    # print('dim_typed_fn',dimension,t.type(),name)
    return getattr(scn, 'scn_' +
                   typeTable[t.type()] +
                   str(dimension) +
                   name)
 ffi = FFI()
 nullptr = ffi.NULL
@@ -69,6 +74,14 @@ def threadDatasetIterator(d):
    return iterator
+# def threadDatasetIterator(d):
+#     print('not threads!!!')
+#     def iterator():
+#         for x in d:
+#             yield x
+#     return iterator
 def set(obj):
    if hasattr(obj, 'storage_type'):
        obj.set_(obj.storage_type()())

--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ This is the Torch/PyTorch library for training Submanifold Sparse Convolutional
 ## Spatial sparsity
-This library brings [Spatially-sparse convolutional networks](https://github.com/btgraham/SparseConvNet) to Torch/PyTorch. Moreover, it introduces **Submanifold Sparse Convolutions**, that can be used to build computationally efficient sparse VGG/ResNet/DenseNet-style networks.
+This library brings [Spatially-sparse convolutional networks](https://github.com/btgraham/SparseConvNet) to PyTorch and [Torch classic](README_Torch.md). Moreover, it introduces **Submanifold Sparse Convolutions**, that can be used to build computationally efficient sparse VGG/ResNet/DenseNet-style networks.
 With regular 3x3 convolutions, the set of active (non-zero) sites grows rapidly:<br />
 ![submanifold](img/i.gif) <br />
@@ -122,83 +122,9 @@ output = model.forward(input)
 # Output is 2x32x10x10: our minibatch has 2 samples, the network has 32 output
 # feature planes, and 10x10 is the spatial size of the output.
-print(output.size(), output.data.type())
+print(output.size(), output.type())
 ```
-## Hello World - (Lua)Torch
-Convolutional networks are built with SparseConvNet in the same way as with Torch's nn/cunn/cudnn packages.
-```
--Train on the GPU if there is one, otherwise CPU
-scn=require 'sparseconvnet'
-tensorType = scn.cutorch and 'torch.CudaTensor' or 'torch.FloatTensor'
-model = scn.Sequential()
-:add(scn.SparseVggNet(2,1,{ --dimension 2, 1 input plane
-      {'C', 8}, -- 3x3 VSC convolution, 8 output planes, batchnorm, ReLU
-      {'C', 8}, -- and another
-      {'MP', 3, 2}, --max pooling, size 3, stride 2
-      {'C', 16}, -- etc
-      {'C', 16},
-      {'MP', 3, 2},
-      {'C', 24},
-      {'C', 24},
-      {'MP', 3, 2}}))
-:add(scn.Convolution(2,24,32,3,1,false)) --an SC convolution on top
-:add(scn.BatchNormReLU(32))
-:add(scn.SparseToDense(2))
-:type(tensorType)
--[[
-To use the network we must create an scn.InputBatch with right dimensionality.
-If we want the output to have spatial size 10x10, we can find the appropriate
-input size, give that we uses three layers of MP3/2 max-pooling, and finish
-with a SC convoluton
-]]
-inputSpatialSize=model:suggestInputSize(torch.LongTensor{10,10}) --103x103
-input=scn.InputBatch(2,inputSpatialSize)
--Now we build the input batch, sample by sample, and active site by active site.
-msg={
-  " O   O  OOO  O    O    OO     O       O   OO   OOO   O    OOO   ",
-  " O   O  O    O    O   O  O    O       O  O  O  O  O  O    O  O  ",
-  " OOOOO  OO   O    O   O  O    O   O   O  O  O  OOO   O    O   O ",
-  " O   O  O    O    O   O  O     O O O O   O  O  O  O  O    O  O  ",
-  " O   O  OOO  OOO  OOO  OO       O   O     OO   O  O  OOO  OOO   ",
-}
-input:addSample()
-for y,line in ipairs(msg) do
-  for x = 1,string.len(line) do
-    if string.sub(line,x,x) == 'O' then
-      local location = torch.LongTensor{x,y}
-      local featureVector = torch.FloatTensor{1}
-      input:setLocation(location,featureVector,0)
-    end
-  end
-end
--[[
-Optional: allow metadata preprocessing to be done in batch preparation threads
-to improve GPU utilization.
-Parameter:
-3 if using MP3/2 or size-3 stride-2 convolutions for downsizeing,
-2 if using MP2
-]]
-input:precomputeMetadata(3)
-model:evaluate()
-input:type(tensorType)
-output = model:forward(input)
--[[
-Output is 1x32x10x10: our minibatch has 1 sample, the network has 32 output
-feature planes, and 10x10 is the spatial size of the output.
-]]
-print(output:size())
-```
 ## Examples
@@ -209,45 +135,33 @@ Examples in the examples folder include
 Data will be downloaded/preprocessed on the first run, i.e.
 ```
-cd examples/Assamese_handwriting
-th VGGplus.lua
-or
 cd examples/Assamese_handwriting
 python VGGplus.py
 ```
-## Setup
+## PyTorch Setup
+Tested with Ubuntu 16.04, Python 3 in [Miniconda](https://conda.io/miniconda.html) and PyTorch master (v0.4 with merged Tensors/Variables).
-Tested with Ubuntu 16.04. Install [Torch](http://torch.ch/docs/getting-started.html) and/or [PyTorch](http://pytorch.org/) ([Miniconda](https://conda.io/miniconda.html)) then: <br />
 ```
+git clone https://github.com/pytorch/pytorch.git
+cd pytorch
+python setup.py install
+cd ..
 apt-get install libsparsehash-dev
 git clone git@github.com:facebookresearch/SparseConvNet.git
-then
-cd SparseConvNet/Torch/
-luarocks make sparseconvnet-0.1-1.rockspec
-and/or
 cd SparseConvNet/PyTorch/
-python setup.py develop
+python setup.py install
 ```
 To run the examples you may also need to install unrar and TorchNet:
 ```
 apt-get install unrar
-and
-luarocks install torchnet
-or
 pip install git+https://github.com/pytorch/tnt.git@master
 ```
 ### Links
 1. [ICDAR 2013 Chinese Handwriting Recognition Competition 2013](http://www.nlpr.ia.ac.cn/events/CHRcompetition2013/competition/Home.html) First place in task 3, with test error of 2.61%. Human performance on the test set was 4.81%. [Report](http://www.nlpr.ia.ac.cn/events/CHRcompetition2013/competition/ICDAR%202013%20CHR%20competition.pdf)
 2. [Spatially-sparse convolutional neural networks, 2014](http://arxiv.org/abs/1409.6070) SparseConvNets for Chinese handwriting recognition

--- a/README_Torch.md
+++ b/README_Torch.md
+## Hello World - (Lua)Torch
+Convolutional networks are built with SparseConvNet in the same way as with Torch's nn/cunn/cudnn packages.
+```
+--Train on the GPU if there is one, otherwise CPU
+scn=require 'sparseconvnet'
+tensorType = scn.cutorch and 'torch.CudaTensor' or 'torch.FloatTensor'
+model = scn.Sequential()
+:add(scn.SparseVggNet(2,1,{ --dimension 2, 1 input plane
+      {'C', 8}, -- 3x3 VSC convolution, 8 output planes, batchnorm, ReLU
+      {'C', 8}, -- and another
+      {'MP', 3, 2}, --max pooling, size 3, stride 2
+      {'C', 16}, -- etc
+      {'C', 16},
+      {'MP', 3, 2},
+      {'C', 24},
+      {'C', 24},
+      {'MP', 3, 2}}))
+:add(scn.Convolution(2,24,32,3,1,false)) --an SC convolution on top
+:add(scn.BatchNormReLU(32))
+:add(scn.SparseToDense(2))
+:type(tensorType)
+--[[
+To use the network we must create an scn.InputBatch with right dimensionality.
+If we want the output to have spatial size 10x10, we can find the appropriate
+input size, give that we uses three layers of MP3/2 max-pooling, and finish
+with a SC convoluton
+]]
+inputSpatialSize=model:suggestInputSize(torch.LongTensor{10,10}) --103x103
+input=scn.InputBatch(2,inputSpatialSize)
+--Now we build the input batch, sample by sample, and active site by active site.
+msg={
+  " O   O  OOO  O    O    OO     O       O   OO   OOO   O    OOO   ",
+  " O   O  O    O    O   O  O    O       O  O  O  O  O  O    O  O  ",
+  " OOOOO  OO   O    O   O  O    O   O   O  O  O  OOO   O    O   O ",
+  " O   O  O    O    O   O  O     O O O O   O  O  O  O  O    O  O  ",
+  " O   O  OOO  OOO  OOO  OO       O   O     OO   O  O  OOO  OOO   ",
+}
+input:addSample()
+for y,line in ipairs(msg) do
+  for x = 1,string.len(line) do
+    if string.sub(line,x,x) == 'O' then
+      local location = torch.LongTensor{x,y}
+      local featureVector = torch.FloatTensor{1}
+      input:setLocation(location,featureVector,0)
+    end
+  end
+end
+--[[
+Optional: allow metadata preprocessing to be done in batch preparation threads
+to improve GPU utilization.
+Parameter:
+3 if using MP3/2 or size-3 stride-2 convolutions for downsizing,
+2 if using MP2
+]]
+input:precomputeMetadata(3)
+model:evaluate()
+input:type(tensorType)
+output = model:forward(input)
+--[[
+Output is 1x32x10x10: our minibatch has 1 sample, the network has 32 output
+feature planes, and 10x10 is the spatial size of the output.
+]]
+print(output:size())
+```
+## Torch Setup
+Tested with Ubuntu 16.04.
+Install [Torch](http://torch.ch/docs/getting-started.html) then: <br />
+```
+apt-get install libsparsehash-dev
+git clone git@github.com:facebookresearch/SparseConvNet.git
+then
+cd SparseConvNet/Torch/
+luarocks make sparseconvnet-0.1-1.rockspec
+```
+To run the examples you may also need to install unrar and TorchNet:
+```
+apt-get install unrar
+and
+luarocks install torchnet
+```
--- a/Torch/C.lua
+++ b/Torch/C.lua
@@ -227,11 +227,11 @@ void scn_ARCH_REAL_DIMENSIONSparseToDense_updateGradInput(
  THTensor *d_input_features, THTensor *d_output_features,
  THITensor *rulesBuffer);
-double scn_ARCH_REAL_DIMENSIONValidConvolution_updateOutput(
+double scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_updateOutput(
  THLongTensor *inputSize, THLongTensor *filterSize, void **m,
  THTensor *input_features, THTensor *output_features, THTensor *weight,
  THTensor *bias, long filterVolume, THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONValidConvolution_backward(
+void scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_backward(
  THLongTensor *inputSize, THLongTensor *filterSize, void **m,
  THTensor *input_features, THTensor *d_input_features,
  THTensor *d_output_features, THTensor *weight, THTensor *d_weight,

--- a/Torch/CAddTable.lua
+++ b/Torch/CAddTable.lua
@@ -7,7 +7,7 @@
 --[[
 Assume all the inputs have identical SparseGrids and input[i].nActive
 Assume input[1].nPlanes >= input[i].nPlanes for all i=1,#input
-output.validRules is taken from input[1].validRules (could do set union?)
+output.submanifoldRules is taken from input[1].submanifoldRules (could do set union?)
 (for resnets, make sure the residual link is input[2])
 ]]

--- a/Torch/DenseNetBlock.lua
+++ b/Torch/DenseNetBlock.lua
@@ -37,7 +37,7 @@ return function(sparseconvnet)
      --Module 4*i-1
      self:add(sparseconvnet.BatchNormalization(nFeaturesB,nil,nil,true,0))
      --Module 4*i
-      self:add(sparseconvnet.ValidConvolution(dimension, nFeaturesB, growthRate,
+      self:add(sparseconvnet.SubmanifoldConvolution(dimension, nFeaturesB, growthRate,
          3, false))
      --Module 4*i+1
      self:add(sparseconvnet.BatchNormalizationInTensor(growthRate,nil,nil,