non-legacy PyTorch

9c865087 · Benjamin Thomas Graham · 81d65180 · 9c865087 · 9c865087 · 9c865087
Commit 9c865087 authored Oct 01, 2017 by Benjamin Thomas Graham
20 changed files
--- a/PyTorch/sparseconvnet/legacy/joinTable.py
+++ b/PyTorch/sparseconvnet/legacy/joinTable.py
@@ -8,7 +8,7 @@ import torch
 import sparseconvnet
 from . import SparseModule
 from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr, set
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class JoinTable(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/leakyReLU.py
+++ b/PyTorch/sparseconvnet/legacy/leakyReLU.py
@@ -8,7 +8,7 @@ import torch
 import sparseconvnet
 from . import SparseModule
 from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class LeakyReLU(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/maxPooling.py
+++ b/PyTorch/sparseconvnet/legacy/maxPooling.py
@@ -8,7 +8,7 @@ import torch
 import sparseconvnet
 from . import SparseModule
 from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class MaxPooling(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/misc.py
+++ b/PyTorch/sparseconvnet/legacy/misc.py
@@ -7,7 +7,7 @@
 import torch.legacy.nn as nn
 from .sequential import Sequential
 from .sparseModule import SparseModule
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 from .batchNormalization import BatchNormalization
 class Tanh(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/networkInNetwork.py
+++ b/PyTorch/sparseconvnet/legacy/networkInNetwork.py
@@ -8,7 +8,7 @@ import torch
 from . import SparseModule
 import sparseconvnet as s
 from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class NetworkInNetwork(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/reLU.py
+++ b/PyTorch/sparseconvnet/legacy/reLU.py
@@ -16,7 +16,7 @@ import sparseconvnet
 from torch.legacy.nn import Module
 from .leakyReLU import LeakyReLU
 from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class ReLU(LeakyReLU):

--- a/PyTorch/sparseconvnet/legacy/sparseToDense.py
+++ b/PyTorch/sparseconvnet/legacy/sparseToDense.py
@@ -18,7 +18,7 @@ dimension : of the input field,
 import torch
 from . import SparseModule
 from ..utils import dim_typed_fn, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class SparseToDense(SparseModule):

--- a/PyTorch/sparseconvnet/legacy/validConvolution.py
+++ b/PyTorch/sparseconvnet/legacy/validConvolution.py
@@ -8,7 +8,7 @@ import torch
 import sparseconvnet as s
 from . import SparseModule
 from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from .sparseConvNetTensor import SparseConvNetTensor
+from ..sparseConvNetTensor import SparseConvNetTensor
 class ValidConvolution(SparseModule):

--- a/PyTorch/sparseconvnet/maxPooling.py
+++ b/PyTorch/sparseconvnet/maxPooling.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from torch.autograd import Function, Variable
+from torch.nn import Module
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+class MaxPoolingFunction(Function):
+    @staticmethod
+    def forward(
+            ctx,
+            input_features,
+            input_metadata,
+            input_spatial_size,
+            output_spatial_size,
+            dimension,
+            pool_size,
+            pool_stride,
+            nFeaturesToDrop):
+        ctx.input_features=input_features
+        ctx.input_metadata=input_metadata
+        ctx.input_spatial_size = input_spatial_size
+        ctx.output_spatial_size = output_spatial_size
+        ctx.dimension = dimension
+        ctx.pool_size = pool_size
+        ctx.pool_stride = pool_stride
+        ctx.nFeaturesToDrop = nFeaturesToDrop
+        ctx.output_features = input_features.new()
+        dim_typed_fn(dimension, input_features, 'MaxPooling_updateOutput')(
+            input_spatial_size,
+            output_spatial_size,
+            pool_size,
+            pool_stride,
+            input_metadata.ffi,
+            input_features,
+            ctx.output_features,
+            nFeaturesToDrop,
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
+        return ctx.output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input=Variable(grad_output.data.new())
+        dim_typed_fn(
+            ctx.dimension, ctx.input_features, 'MaxPooling_updateGradInput')(
+            ctx.input_spatial_size,
+            ctx.output_spatial_size,
+            ctx.pool_size,
+            ctx.pool_stride,
+            ctx.input_metadata.ffi,
+            ctx.input_features,
+            grad_input.data,
+            ctx.output_features,
+            grad_output.data,
+            ctx.nFeaturesToDrop,
+            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+        return grad_input, None, None, None, None, None, None, None
+class MaxPooling(Module):
+    def __init__(self, dimension, pool_size, pool_stride, nFeaturesToDrop=0):
+        super(MaxPooling, self).__init__()
+        self.dimension = dimension
+        self.pool_size = toLongTensor(dimension, pool_size)
+        self.pool_stride = toLongTensor(dimension, pool_stride)
+        self.nFeaturesToDrop = nFeaturesToDrop
+    def forward(self, input):
+        output = SparseConvNetTensor()
+        output.metadata = input.metadata
+        output.spatial_size = (
+            input.spatial_size - self.pool_size) / self.pool_stride + 1
+        assert ((output.spatial_size-1)*self.pool_stride+self.pool_size==input.spatial_size).all()
+        output.features =  MaxPoolingFunction().apply(
+            input.features, input.metadata, input.spatial_size,
+            output.spatial_size, self.dimension,self.pool_size,self.pool_stride,
+            self.nFeaturesToDrop)
+        return output
+    def input_spatial_size(self, out_size):
+        return (out_size - 1) * self.pool_stride + self.pool_size
+    def __repr__(self):
+        s = 'MaxPooling'
+        if self.pool_size.max() == self.pool_size.min() and\
+                self.pool_stride.max() == self.pool_stride.min():
+            s = s + str(self.pool_size[0]) + '/' + str(self.pool_stride[0])
+        else:
+            s = s + '(' + str(self.pool_size[0])
+            for i in self.pool_size[1:]:
+                s = s + ',' + str(i)
+            s = s + ')/(' + str(self.pool_stride[0])
+            for i in self.pool_stride[1:]:
+                s = s + ',' + str(i)
+            s = s + ')'
+        if self.nFeaturesToDrop > 0:
+            s = s + ' nFeaturesToDrop = ' + self.nFeaturesToDrop
+        return s
--- a/PyTorch/sparseconvnet/legacy/metadata.py
+++ b/PyTorch/sparseconvnet/legacy/metadata.py
@@ -16,8 +16,8 @@ object must be de-serialized exactly once.
 """
 import cffi
-from ..utils import dim_fn
+from .utils import dim_fn
-from ..SCN import scn_readPtr, scn_writePtr, scn_3_setInputSpatialSize
+from .SCN import scn_readPtr, scn_writePtr, scn_3_setInputSpatialSize
 ffi = cffi.FFI()

--- a/PyTorch/sparseconvnet/networkArchitectures.py
+++ b/PyTorch/sparseconvnet/networkArchitectures.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .averagePooling import AveragePooling
+from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
+from .convolution import Convolution
+from .sequential import Sequential
+from .submanifoldConvolution import SubmanifoldConvolution
+from .deconvolution import Deconvolution
+from .networkInNetwork import NetworkInNetwork
+from .maxPooling import MaxPooling
+from .identity import Identity
+from .sparseToDense import SparseToDense
+from .denseToSparse import DenseToSparse
+from .tables import *
+def SparseVggNet(dimension, nInputPlanes, layers):
+    """
+    VGG style nets
+    Use valid convolutions
+    Also implements 'Plus'-augmented nets
+    """
+    nPlanes = nInputPlanes
+    m = Sequential()
+    for x in layers:
+        if x == 'MP':
+            m.add(MaxPooling(dimension, 3, 2))
+        elif x[0] == 'MP':
+            m.add(MaxPooling(dimension, x[1], x[2]))
+        elif x[0] == 'C' and len(x) == 2:
+            m.add(SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False))
+            nPlanes = x[1]
+            m.add(BatchNormReLU(nPlanes))
+        elif x[0] == 'C' and len(x) == 3:
+            m.add(ConcatTable()
+             .add(
+                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
+            ).add(
+                Sequential()
+                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
+                .add(BatchNormReLU(x[2]))
+                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
+                .add(BatchNormReLU(x[2]))
+                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
+            )).add(JoinTable())
+            nPlanes = x[1] + x[2]
+            m.add(BatchNormReLU(nPlanes))
+        elif x[0] == 'C' and len(x) == 4:
+            m.add(ConcatTable()
+                  .add(
+                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
+            )
+                .add(
+                Sequential()
+                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
+                .add(BatchNormReLU(x[2]))
+                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
+                .add(BatchNormReLU(x[2]))
+                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
+            )
+                .add(Sequential()
+                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
+                     )).add(JoinTable())
+            nPlanes = x[1] + x[2] + x[3]
+            m.add(BatchNormReLU(nPlanes))
+        elif x[0] == 'C' and len(x) == 5:
+            m.add(ConcatTable()
+                  .add(
+                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
+            )
+                .add(
+                Sequential()
+                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
+                .add(BatchNormReLU(x[2]))
+                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
+                .add(BatchNormReLU(x[2]))
+                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
+            )
+                .add(Sequential()
+                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
+                     .add(BatchNormReLU(x[3]))
+                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
+            )
+                .add(Sequential()
+                     .add(Convolution(dimension, nPlanes, x[4], 3, 2, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(Convolution(dimension, x[4], x[4], 3, 2, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(Convolution(dimension, x[4], x[4], 3, 2, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
+                     .add(BatchNormReLU(x[4]))
+                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
+                     )).add(JoinTable())
+            nPlanes = x[1] + x[2] + x[3] + x[4]
+            m.add(BatchNormReLU(nPlanes))
+    return m
+def SparseResNet(dimension, nInputPlanes, layers):
+    """
+    pre-activated ResNet
+    e.g. layers = {{'basic',16,2,1},{'basic',32,2}}
+    """
+    nPlanes = nInputPlanes
+    m = Sequential()
+    def residual(nIn, nOut, stride):
+        if stride > 1:
+            return Convolution(dimension, nIn, nOut, 3, stride, False)
+        elif nIn != nOut:
+            return NetworkInNetwork(nIn, nOut, False)
+        else:
+            return Identity()
+    for blockType, n, reps, stride in layers:
+        for rep in range(reps):
+            if blockType[0] == 'b':  # basic block
+                if rep == 0:
+                    m.add(BatchNormReLU(nPlanes))
+                    m.add(
+                        ConcatTable().add(
+                            Sequential().add(
+                                SubmanifoldConvolution(
+                                    dimension,
+                                    nPlanes,
+                                    n,
+                                    3,
+                                    False) if stride == 1 else Convolution(
+                                    dimension,
+                                    nPlanes,
+                                    n,
+                                    3,
+                                    stride,
+                                    False)) .add(
+                                BatchNormReLU(n)) .add(
+                                SubmanifoldConvolution(
+                                    dimension,
+                                    n,
+                                    n,
+                                    3,
+                                    False))) .add(
+                            residual(
+                                nPlanes,
+                                n,
+                                stride)))
+                else:
+                    m.add(
+                        ConcatTable().add(
+                            Sequential().add(
+                                BatchNormReLU(nPlanes)) .add(
+                                SubmanifoldConvolution(
+                                    dimension,
+                                    nPlanes,
+                                    n,
+                                    3,
+                                    False)) .add(
+                                BatchNormReLU(n)) .add(
+                                SubmanifoldConvolution(
+                                    dimension,
+                                    n,
+                                    n,
+                                    3,
+                                    False))) .add(
+                            Identity()))
+            nPlanes = n
+            m.add(AddTable())
+    m.add(BatchNormReLU(nPlanes))
+    return m
--- a/PyTorch/sparseconvnet/networkInNetwork.py
+++ b/PyTorch/sparseconvnet/networkInNetwork.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import sparseconvnet
+from torch.autograd import Function, Variable
+from torch.nn import Module, Parameter
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+class NetworkInNetworkFunction(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input_features,
+        weight,
+        bias):
+        ctx.input_features=input_features
+        ctx.weight=weight
+        ctx.bias=bias
+        ctx.output_features=input_features.new()
+        sparseconvnet.forward_pass_multiplyAdd_count +=\
+            typed_fn(input_features, 'NetworkInNetwork_updateOutput')(
+                input_features,
+                ctx.output_features,
+                weight,
+                bias if bias is not None else nullptr)
+        sparseconvnet.forward_pass_hidden_states += ctx.output_features.nelement()
+        return ctx.output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input=Variable(grad_output.data.new())
+        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero())
+        if ctx.bias is None:
+            grad_bias=None
+        else:
+            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias))
+        typed_fn(ctx.input_features, 'NetworkInNetwork_updateGradInput')(
+            grad_input.data,
+            grad_output.data,
+            ctx.weight)
+        typed_fn(ctx.input_features, 'NetworkInNetwork_accGradParameters')(
+            ctx.input_features,
+            grad_output.data,
+            grad_weight.data,
+            grad_bias.data if grad_bias is not None else nullptr)
+        return grad_input, grad_weight, grad_bias
+class NetworkInNetwork(Module):
+    def __init__(self, nIn, nOut, bias=False):
+        Module.__init__(self)
+        self.nIn = nIn
+        self.nOut = nOut
+        std = (2.0 / nIn)**0.5
+        self.weight = Parameter(torch.Tensor(
+            nIn, nOut).normal_(
+            0,
+            std))
+        if bias:
+            self.bias = Parameter(torch.Tensor(nOut).zero_())
+        else:
+            self.bias=None
+    def forward(self, input):
+        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        output = SparseConvNetTensor()
+        output.metadata = input.metadata
+        output.spatial_size = input.spatial_size
+        output.features=NetworkInNetworkFunction().apply(
+            input.features,
+            self.weight,
+            self.bias)
+        return output
+    def __repr__(self):
+        s = 'NetworkInNetwork' + str(self.nIn) + '->' + str(self.nOut)
+        return s
+    def input_spatial_size(self, out_size):
+        return out_size
--- a/PyTorch/sparseconvnet/sequential.py
+++ b/PyTorch/sparseconvnet/sequential.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from torch.nn import Sequential as S
+from .utils import set
+class Sequential(S):
+    def input_spatial_size(self, out_size):
+        for m in reversed(self._modules):
+            out_size = self._modules[m].input_spatial_size(out_size)
+        return out_size
+    def add(self, module):
+        self._modules[str(len(self._modules))]=module
+        return self
--- a/PyTorch/sparseconvnet/legacy/sparseConvNetTensor.py
+++ b/PyTorch/sparseconvnet/legacy/sparseConvNetTensor.py
@@ -6,7 +6,8 @@
 import torch
-from ..utils import dim_fn
+from .utils import dim_fn
+from torch.autograd import Variable
 class SparseConvNetTensor(object):
    def __init__(self, features=None, metadata=None, spatial_size=None):
@@ -15,6 +16,7 @@ class SparseConvNetTensor(object):
        self.spatial_size = spatial_size
    def getSpatialLocations(self, spatial_size=None):
+        "Coordinates and batch index for the active spatial locations"
        if spatial_size is None:
            spatial_size = self.spatial_size
@@ -25,6 +27,15 @@ class SparseConvNetTensor(object):
    def type(self, t=None):
        if t:
            self.features = self.features.type(t)
+            return self
+        return self.features.type()
+    def cuda(self):
+        self.features = self.features.cuda()
+        return self
+    def cpu(self):
+        self.features = self.features.cpu()
        return self
    def set_(self):
@@ -35,3 +46,8 @@ class SparseConvNetTensor(object):
    def __repr__(self):
        return 'SparseConvNetTensor<<' + \
            repr(self.features) + repr(self.metadata) + repr(self.spatial_size) + '>>'
+    def to_variable(self, requires_grad = False, volatile=False):
+        "Convert self.features to a variable for use with modern PyTorch interface."
+        self.features=Variable(self.features, requires_grad=requires_grad, volatile=volatile)
+        return self
--- a/PyTorch/sparseconvnet/sparseToDense.py
+++ b/PyTorch/sparseconvnet/sparseToDense.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Function to convert a SparseConvNet hidden layer to a dense convolutional
+layer. Put a SparseToDense convolutional layer (or an ActivePooling layer) at
+the top of your sparse network. The output can then pass to a dense
+convolutional layers or (if the spatial dimensions have become trivial) a
+linear classifier.
+Parameters:
+dimension : of the input field,
+"""
+from torch.autograd import Function, Variable
+from torch.nn import Module
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+class SparseToDenseFunction(Function):
+    @staticmethod
+    def forward(
+            ctx,
+            input_features,
+            input_metadata,
+            spatial_size,
+            dimension,
+            nPlanes):
+        ctx.input_metadata=input_metadata
+        ctx.spatial_size=spatial_size
+        ctx.dimension=dimension
+        ctx.input_features=input_features
+        output = input_features.new()
+        dim_typed_fn(ctx.dimension, input_features, 'SparseToDense_updateOutput')(
+            spatial_size,
+            input_metadata.ffi,
+            input_features,
+            output,
+            torch.cuda.IntTensor() if input_features.is_cuda else nullptr,
+            nPlanes)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input=Variable(grad_output.data.new())
+        dim_typed_fn(ctx.dimension, ctx.input_features, 'SparseToDense_updateGradInput')(
+            ctx.spatial_size,
+            ctx.input_metadata.ffi,
+            ctx.input_features,
+            grad_input.data,
+            grad_output.data,
+            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+        return grad_input, None, None, None, None
+class SparseToDense(Module):
+    def __init__(self, dimension, nPlanes):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.nPlanes=nPlanes
+    def forward(self, input):
+        return SparseToDenseFunction().apply(input.features,input.metadata,input.spatial_size,self.dimension,self.nPlanes)
+    def input_spatial_size(self, out_size):
+        return out_size
+    def __repr__(self):
+        return 'SparseToDense(' + str(self.dimension) + ','+ str(self.nPlanes)+ ')'
--- a/PyTorch/sparseconvnet/submanifoldConvolution.py
+++ b/PyTorch/sparseconvnet/submanifoldConvolution.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# 'SubmanifoldConvolution == ValidConvolution'
+import sparseconvnet
+from torch.autograd import Function, Variable
+from torch.nn import Module, Parameter
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+class ValidConvolutionFunction(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input_features,
+        weight,
+        bias,
+        input_metadata,
+        spatial_size,
+        dimension,
+        filter_size):
+        ctx.input_features=input_features
+        ctx.input_metadata=input_metadata
+        ctx.spatial_size=spatial_size
+        ctx.weight=weight
+        ctx.bias=bias
+        ctx.output_features=input_features.new()
+        ctx.dimension=dimension
+        ctx.filter_size=filter_size
+        sparseconvnet.forward_pass_multiplyAdd_count +=\
+            dim_typed_fn(
+                dimension, input_features, 'ValidConvolution_updateOutput')(
+                spatial_size,
+                filter_size,
+                input_metadata.ffi,
+                input_features,
+                ctx.output_features,
+                weight,
+                bias if bias is not None else nullptr,
+                0, #remove this parameter!!
+                torch.cuda.IntTensor() if input_features.is_cuda else nullptr)
+        sparseconvnet.forward_pass_hidden_states += ctx.output_features.nelement()
+        return ctx.output_features
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input=Variable(grad_output.data.new())
+        grad_weight=Variable(grad_output.data.new().resize_as_(ctx.weight).zero_())
+        if ctx.bias is None:
+            grad_bias=None
+        else:
+            grad_bias = Variable(grad_output.data.new().resize_as_(ctx.bias).zero_())
+        dim_typed_fn(
+            ctx.dimension, ctx.input_features, 'ValidConvolution_backward')(
+            ctx.spatial_size,
+            ctx.filter_size,
+            ctx.input_metadata.ffi,
+            ctx.input_features,
+            grad_input.data,
+            grad_output.data.contiguous(),
+            ctx.weight,
+            grad_weight.data,
+            grad_bias.data if grad_bias is not None else nullptr,
+            0, #remove this parameter
+            torch.cuda.IntTensor() if ctx.input_features.is_cuda else nullptr)
+        return grad_input, grad_weight, grad_bias, None, None, None, None
+class ValidConvolution(Module):
+    def __init__(self, dimension, nIn, nOut, filter_size, bias):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.nIn = nIn
+        self.nOut = nOut
+        self.filter_size = toLongTensor(dimension, filter_size)
+        self.filter_volume = self.filter_size.prod()
+        std = (2.0 / nIn / self.filter_volume)**0.5
+        self.weight = Parameter(torch.Tensor(
+            nIn * self.filter_volume, nOut
+        ).normal_(0, std))
+        if bias:
+            self.bias = Parameter(torch.Tensor(nOut).zero_())
+        else:
+            self.bias = None
+    def forward(self, input):
+        assert input.features.ndimension()==0 or input.features.size(1) == self.nIn
+        output = SparseConvNetTensor()
+        output.metadata = input.metadata
+        output.spatial_size = input.spatial_size
+        output.features=ValidConvolutionFunction.apply(
+            input.features,
+            self.weight,
+            self.bias,
+            input.metadata,
+            input.spatial_size,
+            self.dimension,
+            self.filter_size)
+        return output
+    def __repr__(self):
+        s = 'ValidConvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
+        if self.filter_size.max() == self.filter_size.min():
+            s = s + str(self.filter_size[0])
+        else:
+            s = s + '(' + str(self.filter_size[0])
+            for i in self.filter_size[1:]:
+                s = s + ',' + str(i)
+            s = s + ')'
+        return s
+    def input_spatial_size(self, out_size):
+        return out_size
+class SubmanifoldConvolution(ValidConvolution):
+    pass
--- a/PyTorch/sparseconvnet/tables.py
+++ b/PyTorch/sparseconvnet/tables.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from torch.autograd import Function, Variable
+from torch.nn import Module
+from .utils import *
+from .sparseConvNetTensor import SparseConvNetTensor
+class JoinTable(Module):
+    def forward(self, input):
+        output = SparseConvNetTensor()
+        output.metadata = input[0].metadata
+        output.spatial_size = input[0].spatial_size
+        output.features=torch.cat([i.features for i in input],1)
+        return output
+    def input_spatial_size(self,out_size):
+        return out_size
+class AddTable(Module):
+    def forward(self, input):
+        output = SparseConvNetTensor()
+        output.metadata = input[0].metadata
+        output.spatial_size = input[0].spatial_size
+        output.features=sum([i.features for i in input])
+        return output
+    def input_spatial_size(self,out_size):
+        return out_size
+class ConcatTable(Module):
+    def forward(self, input):
+        return [module(input) for module in self._modules.values()]
+    def add(self, module):
+        self._modules[str(len(self._modules))]=module
+        return self
+    def input_spatial_size(self,out_size):
+        return self._modules['0'].input_spatial_size(out_size)
--- a/README.md
+++ b/README.md
@@ -44,29 +44,31 @@ In theory, the library supports up to 10 dimensions. In practice, ConvNets with
 ## Hello World - PyTorch
-The PyTorch interface is similar to the PyTorch's torch.nn.legacy interface
+SparseConvNets can be built either by [defining a function that inherits from torch.nn.Module](examples/Assamese_handwriting/VGGplus.py) or by stacking modules in a [sparseconvnet.Sequential](PyTorch/sparseconvnet/sequential.py):
 ```
 import torch
-import sparseconvnet.legacy as scn
+import sparseconvnet as scn
 # Use the GPU if there is one, otherwise CPU
-tensorType = 'torch.cuda.FloatTensor' if torch.cuda.is_available() else 'torch.FloatTensor'
+use_gpu = torch.cuda.is_available()
 model = scn.Sequential().add(
    scn.SparseVggNet(2, 1,
-       [['C',  8], ['C',  8], ['MP', 3, 2],
+                     [['C',  8], ['C',  8], ['MP', 3, 2],
-        ['C', 16], ['C', 16], ['MP', 3, 2],
+                      ['C', 16], ['C', 16], ['MP', 3, 2],
-        ['C', 24], ['C', 24], ['MP', 3, 2]])
+                      ['C', 24], ['C', 24], ['MP', 3, 2]])
 ).add(
    scn.ValidConvolution(2, 24, 32, 3, False)
 ).add(
    scn.BatchNormReLU(32)
 ).add(
-    scn.SparseToDense(2)
+    scn.SparseToDense(2,32)
-).type(tensorType)
+)
+if use_gpu:
+    model.cuda()
 # output will be 10x10
-inputSpatialSize = model.suggestInputSize(torch.LongTensor([10, 10]))
+inputSpatialSize = model.input_spatial_size(torch.LongTensor([10, 10]))
 input = scn.InputBatch(2, inputSpatialSize)
 msg = [
@@ -75,6 +77,8 @@ msg = [
    " XXXXX  XX   X    X   X  X    X   X   X  X  X  XXX   X    X   X ",
    " X   X  X    X    X   X  X     X X X X   X  X  X  X  X    X  X  ",
    " X   X  XXX  XXX  XXX  XX       X   X     XX   X  X  XXX  XXX   "]
+#Add a sample using setLocation
 input.addSample()
 for y, line in enumerate(msg):
    for x, c in enumerate(line):
@@ -83,6 +87,19 @@ for y, line in enumerate(msg):
            featureVector = torch.FloatTensor([1])
            input.setLocation(location, featureVector, 0)
+#Add a sample using setLocations
+input.addSample()
+locations = []
+features = []
+for y, line in enumerate(msg):
+    for x, c in enumerate(line):
+        if c == 'X':
+            locations.append([x,y])
+            features.append([1])
+locations = torch.LongTensor(locations)
+features = torch.FloatTensor(features)
+input.setLocations(locations, features, 0)
 # Optional: allow metadata preprocessing to be done in batch preparation threads
 # to improve GPU utilization.
 #
@@ -91,13 +108,14 @@ for y, line in enumerate(msg):
 #    2 if using MP2 pooling for downsizing.
 input.precomputeMetadata(3)
-model.evaluate()
+model.train()
-input.type(tensorType)
+if use_gpu:
+    input.cuda()
 output = model.forward(input)
-# Output is 1x32x10x10: our minibatch has 1 sample, the network has 32 output
+# Output is 2x32x10x10: our minibatch has 2 samples, the network has 32 output
 # feature planes, and 10x10 is the spatial size of the output.
-print(output.size())
+print(output.size(), output.data.type())
 ```
 ## Hello World - (Lua)Torch

--- a/examples/Assamese_handwriting/ResNet.py
+++ b/examples/Assamese_handwriting/ResNet.py
@@ -5,37 +5,41 @@
 # LICENSE file in the root directory of this source tree.
 import torch
-import torch.legacy.nn as nn
+import torch.nn as nn
-import sparseconvnet.legacy as scn
+import sparseconvnet as scn
 from data import getIterators
-# Use the GPU if there is one, otherwise CPU
-dtype = 'torch.cuda.FloatTensor' if torch.cuda.is_available() else 'torch.FloatTensor'
 # two-dimensional SparseConvNet
-model = nn.Sequential()
+class Model(nn.Module):
-sparseModel = scn.Sequential()
+    def __init__(self):
-denseModel = nn.Sequential()
+        nn.Module.__init__(self)
-model.add(sparseModel).add(denseModel)
+        self.sparseModel=scn.Sequential(
-sparseModel.add(scn.ValidConvolution(2, 3, 8, 3, False))
+        ).add(scn.ValidConvolution(2, 3, 8, 3, False)
-sparseModel.add(scn.MaxPooling(2, 3, 2))
+        ).add(scn.MaxPooling(2, 3, 2)
-sparseModel.add(scn.SparseResNet(2, 8, [
+        ).add(scn.SparseResNet(2, 8, [
-    ['b', 8, 2, 1],
+            ['b', 8, 2, 1],
-    ['b', 16, 2, 2],
+            ['b', 16, 2, 2],
-    ['b', 24, 2, 2],
+            ['b', 24, 2, 2],
-    ['b', 32, 2, 2]]))
+            ['b', 32, 2, 2]])
-sparseModel.add(scn.Convolution(2, 32, 64, 5, 1, False))
+        ).add(scn.Convolution(2, 32, 64, 5, 1, False)
-sparseModel.add(scn.BatchNormReLU(64))
+        ).add(scn.BatchNormReLU(64)
-sparseModel.add(scn.SparseToDense(2))
+        ).add(scn.SparseToDense(2,64))
-denseModel.add(nn.View(-1, 64))
+        self.linear = nn.Linear(64, 183)
-denseModel.add(nn.Linear(64, 183))
+    def forward(self, x):
-model.type(dtype)
+        x = self.sparseModel(x)
-print(len(model.parameters()[0]))
+        x = x.view(-1,64)
-print([x.size() for x in model.parameters()[0]])
+        x = self.linear(x)
+        return x
-spatial_size = sparseModel.suggestInputSize(torch.LongTensor([1, 1]))
+model=Model()
-print('input spatial size', spatial_size)
+spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
+print('Input spatial size:', spatial_size)
 dataset = getIterators(spatial_size, 63, 3)
 scn.ClassificationTrainValidate(
    model, dataset,
-    {'nEpochs': 100, 'initial_LR': 0.1, 'LR_decay': 0.05, 'weightDecay': 1e-4})
+    {'n_epochs': 100,
+    'initial_lr': 0.1,
+    'lr_decay': 0.05,
+    'weight_decay': 1e-4,
+    'use_gpu':  torch.cuda.is_available(),
+    'check_point': True,})
--- a/examples/Assamese_handwriting/ResNet_legacy.py
+++ b/examples/Assamese_handwriting/ResNet_legacy.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.legacy.nn as nn
+import sparseconvnet.legacy as scn
+from data import getIterators
+# Use the GPU if there is one, otherwise CPU
+dtype = 'torch.cuda.FloatTensor' if torch.cuda.is_available() else 'torch.FloatTensor'
+# two-dimensional SparseConvNet
+model = nn.Sequential()
+sparseModel = scn.Sequential()
+denseModel = nn.Sequential()
+model.add(sparseModel).add(denseModel)
+sparseModel.add(scn.ValidConvolution(2, 3, 8, 3, False))
+sparseModel.add(scn.MaxPooling(2, 3, 2))
+sparseModel.add(scn.SparseResNet(2, 8, [
+    ['b', 8, 2, 1],
+    ['b', 16, 2, 2],
+    ['b', 24, 2, 2],
+    ['b', 32, 2, 2]]))
+sparseModel.add(scn.Convolution(2, 32, 64, 5, 1, False))
+sparseModel.add(scn.BatchNormReLU(64))
+sparseModel.add(scn.SparseToDense(2))
+denseModel.add(nn.View(-1, 64))
+denseModel.add(nn.Linear(64, 183))
+model.type(dtype)
+print(len(model.parameters()[0]))
+print([x.size() for x in model.parameters()[0]])
+spatial_size = sparseModel.suggestInputSize(torch.LongTensor([1, 1]))
+print('input spatial size', spatial_size)
+dataset = getIterators(spatial_size, 63, 3)
+scn.ClassificationTrainValidate(
+    model, dataset,
+    {'nEpochs': 100, 'initial_LR': 0.1, 'LR_decay': 0.05, 'weightDecay': 1e-4})