3d segmantation

1df7b845 · Benjamin Thomas Graham · f2e3800b · f2e3800b · f2e3800b · f2e3800b
Commit 1df7b845 authored May 14, 2018 by Benjamin Thomas Graham
20 changed files
--- a/PyTorch/sparseconvnet/legacy/networkArchitectures.py
+++ b/PyTorch/sparseconvnet/legacy/networkArchitectures.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Functions to build some networks
-
-DeepCNet
-VggNet - deep
-ResNet - deeper
-"""
-
-import torch.legacy.nn
-import math
-from .affineReLUTrivialConvolution import AffineReLUTrivialConvolution
-from .averagePooling import AveragePooling
-from .cAddTable import CAddTable
-from .convolution import Convolution
-from .deconvolution import Deconvolution
-from .denseNetBlock import DenseNetBlock
-from .submanifoldConvolution import SubmanifoldConvolution
-from .networkInNetwork import NetworkInNetwork
-from .batchNormalization import BatchNormReLU, BatchNormalizationInTensor
-from .maxPooling import MaxPooling
-from .concatTable import ConcatTable
-from .joinTable import JoinTable
-from .sequential import Sequential
-from .reLU import ReLU
-from .identity import Identity
-
-
-def DeepCNet(dimension, nInputPlanes, nPlanes, bn=True):
-    """
-    i.e. sparseconvnet(2,nInputPlanes,{16,32,48,64,80},4,32) maps
-    (batchSize,nInputPlanes,16n+32,16n+32)->(batchSize,80,n,n)
-    Regular (i.e. not 'valid') convolutions
-    https://arxiv.org/abs/1409.6070
-    Based on "Multi-column Deep Neural Networks for Image Classification",
-    Dan Ciresan, Ueli Meier, Jonathan Masci and Jurgen Schmidhuber
-    """
-    m = Sequential()
-
-    def c(nIn, nOut, size):
-        m.add(Convolution(dimension, nIn, nOut, size, 1, false))
-        if bn:
-            m.add(BatchNormReLU(nOut))
-        else:
-            m.add(ReLU(True))
-    c(nInputPlanes, nPlanes[0], 3)
-    for i in range(1, len(nPlanes)):
-        m.add(MaxPooling(dimension, 2, 2))
-        c(nPlanes[i - 1], nPlanes[i], 2)
-    end
-    m.nOutputPlanes = nPlanes[-1]
-    return m
-
-
-def SparseVggNet(dimension, nInputPlanes, layers):
-    """
-    VGG style nets
-    Use submanifold convolutions
-    Also implements 'Plus'-augmented nets
-    """
-    nPlanes = nInputPlanes
-    m = Sequential()
-    for x in layers:
-        if x == 'MP':
-            m.add(MaxPooling(dimension, 3, 2))
-        elif x[0] == 'MP':
-            m.add(MaxPooling(dimension, x[1], x[2]))
-        elif x[0] == 'C' and len(x) == 2:
-            m.add(SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False))
-            nPlanes = x[1]
-            m.add(BatchNormReLU(nPlanes))
-        elif x[0] == 'C' and len(x) == 3:
-            m.add(ConcatTable()
-                  .add(
-                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
-            )
-                .add(
-                Sequential()
-                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
-                .add(BatchNormReLU(x[2]))
-                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
-                .add(BatchNormReLU(x[2]))
-                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
-            )).add(JoinTable([x[1], x[2]]))
-            nPlanes = x[1] + x[2]
-            m.add(BatchNormReLU(nPlanes))
-        elif x[0] == 'C' and len(x) == 4:
-            m.add(ConcatTable()
-                  .add(
-                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
-            )
-                .add(
-                Sequential()
-                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
-                .add(BatchNormReLU(x[2]))
-                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
-                .add(BatchNormReLU(x[2]))
-                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
-            )
-                .add(Sequential()
-                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     )).add(JoinTable([x[1], x[2], x[3]]))
-            nPlanes = x[1] + x[2] + x[3]
-            m.add(BatchNormReLU(nPlanes))
-    return m
-
-
-def SparseResNet(dimension, nInputPlanes, layers):
-    """
-    pre-activated ResNet
-    e.g. layers = {{'basic',16,2,1},{'basic',32,2}}
-    """
-    nPlanes = nInputPlanes
-    m = Sequential()
-
-    def residual(nIn, nOut, stride):
-        if stride > 1:
-            return Convolution(dimension, nIn, nOut, 3, stride, False)
-        elif nIn != nOut:
-            return NetworkInNetwork(nIn, nOut, False)
-        else:
-            return Identity()
-    for blockType, n, reps, stride in layers:
-        for rep in range(reps):
-            if blockType[0] == 'b':  # basic block
-                if rep == 0:
-                    m.add(BatchNormReLU(nPlanes))
-                    m.add(
-                        ConcatTable().add(
-                            Sequential().add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    False) if stride == 1 else Convolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    stride,
-                                    False)) .add(
-                                BatchNormReLU(n)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    n,
-                                    n,
-                                    3,
-                                    False))) .add(
-                            residual(
-                                nPlanes,
-                                n,
-                                stride)))
-                else:
-                    m.add(
-                        ConcatTable().add(
-                            Sequential().add(
-                                BatchNormReLU(nPlanes)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    False)) .add(
-                                BatchNormReLU(n)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    n,
-                                    n,
-                                    3,
-                                    False))) .add(
-                            Identity()))
-            nPlanes = n
-            m.add(CAddTable(True))
-    m.add(BatchNormReLU(nPlanes))
-    return m
-
-
-def SparseDenseNet(dimension, nInputPlanes, layers):
-    """
-    SparseConvNet meets DenseNets using submanifold convolutions
-    Could do with a less confusing name
-    """
-    nPlanes = nInputPlanes
-    m = Sequential()
-    for x in layers:
-        if 'pool' in x:
-            if 'size' not in x:
-                x['size'] = 2
-            if 'stride' not in x:
-                x['stride'] = 2
-            if 'base' not in x:
-                x['base'] = 16
-            if 'compression' not in x:
-                x['compression'] = 0
-            nDrop = x['base'] * \
-                math.floor(nPlanes * x['compression'] / x['base'])
-            if x['pool'] == 'MP':
-                m.add(MaxPooling(dimension, x['size'], x['stride'], nDrop))
-                nPlanes -= nDrop
-            if x['pool'] == 'AP':
-                m.add(AveragePooling(dimension, x['size'], x['stride'], nDrop))
-                nPlanes -= nDrop
-            elif x['pool'] == 'BN-R-C-AP':
-                m.add(BatchNormReLU(nPlanes))
-                m.add(NetworkInNetwork(nPlanes, nPlanes - nDrop))
-                nPlanes = nPlanes - nDrop
-                m.add(AveragePooling(dimension, x['size'], x['stride']))
-            elif x['pool'] == 'C-AP':
-                m.add(NetworkInNetwork(nPlanes, nPlanes - nDrop))
-                nPlanes = nPlanes - nDrop
-                m.add(AveragePooling(dimension, x['size'], x['stride']))
-        else:
-            if 'nExtraLayers' not in x:
-                x['nExtraLayers'] = 2
-            if 'growthRate' not in x:
-                x['growthRate'] = 16
-            m.add(
-                DenseNetBlock(
-                    dimension,
-                    nPlanes,
-                    x['nExtraLayers'],
-                    x['growthRate']))
-            nPlanes = nPlanes + x['nExtraLayers'] * x['growthRate']
-    m.nOutputPlanes = nPlanes
-    return m
--- a/PyTorch/sparseconvnet/legacy/networkInNetwork.py
+++ b/PyTorch/sparseconvnet/legacy/networkInNetwork.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-from . import SparseModule
-import sparseconvnet as s
-from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class NetworkInNetwork(SparseModule):
-    def __init__(self, nIn, nOut, bias=True):
-        SparseModule.__init__(self)
-        self.nIn = nIn
-        self.nOut = nOut
-        std = (2.0 / self.nIn)**0.5
-        self.weight = torch.Tensor(nIn, nOut).normal_(0, std)
-        self.gradWeight = torch.Tensor(nIn, nOut).fill_(std)
-        if bias:
-            self.bias = torch.Tensor(nOut).zero_()
-            self.gradBias = torch.Tensor(nOut).zero_()
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        s.forward_pass_multiplyAdd_count +=\
-            typed_fn(input.features, 'NetworkInNetwork_updateOutput')(
-                input.features,
-                self.output.features,
-                self.weight,
-                optionalTensor(self, 'bias'))
-        s.forward_pass_hidden_states += self.output.features.nelement()
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        typed_fn(input.features, 'NetworkInNetwork_updateGradInput')(
-            self.gradInput,
-            gradOutput,
-            self.weight)
-        return self.gradInput
-
-    def accGradParameters(self, input, gradOutput, scale=1):
-        assert scale == 1
-        typed_fn(input.features, 'NetworkInNetwork_accGradParameters')(
-            input.features,
-            gradOutput,
-            self.gradWeight,
-            optionalTensor(self, 'gradBias'))
-
-    def __repr__(self):
-        s = 'NetworkInNetwork' + str(self.nIn) + '->' + str(self.nOut)
-        return s
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self._type = t
-        self.weight = self.weight.type(t)
-        self.gradWeight = self.gradWeight.type(t)
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-        if hasattr(self, 'bias'):
-            self.bias = self.bias.type(t)
-            self.gradBias = self.gradBias.type(t)
--- a/PyTorch/sparseconvnet/legacy/reLU.py
+++ b/PyTorch/sparseconvnet/legacy/reLU.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-If a LeakyReLU has leakiness zero, what is it?
-
-Parameters
-ip : operate in place (default true)
-"""
-
-import torch
-import sparseconvnet
-from torch.legacy.nn import Module
-from .leakyReLU import LeakyReLU
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class ReLU(LeakyReLU):
-    def __init__(self, ip):
-        LeakyReLU.__init__(self, 0, ip)
--- a/PyTorch/sparseconvnet/legacy/sequential.py
+++ b/PyTorch/sparseconvnet/legacy/sequential.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from torch.legacy.nn import Sequential as S
-from ..utils import set
-
-
-class Sequential(S):
-    def __init__(self):
-        S.__init__(self)
-
-    def suggestInputSize(self, out_size):
-        for m in self.modules[::-1]:
-            out_size = m.suggestInputSize(out_size)
-        return out_size
-
-    def clearState(self):
-        set(self.output)
-        set(self.gradInput)
-        for m in self.modules:
-            m.clearState()
--- a/PyTorch/sparseconvnet/legacy/sparseModule.py
+++ b/PyTorch/sparseconvnet/legacy/sparseModule.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from torch.legacy.nn import Module
-from ..utils import set
-
-
-class SparseModule(Module):
-    def __init__(self):
-        Module.__init__(self)
-
-    def clearState(self):
-        set(self.output)
-        set(self.gradInput)
-
-    def suggestInputSize(self, out_size):
-        return out_size
--- a/PyTorch/sparseconvnet/legacy/sparseToDense.py
+++ b/PyTorch/sparseconvnet/legacy/sparseToDense.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Function to convert a SparseConvNet hidden layer to a dense convolutional
-layer. Put a SparseToDense convolutional layer (or an ActivePooling layer) at
-the top of your sparse network. The output can then pass to a dense
-convolutional layers or (if the spatial dimensions have become trivial) a
-linear classifier.
-
-Parameters:
-dimension : of the input field,
-"""
-
-import torch
-from . import SparseModule
-from ..utils import dim_typed_fn, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class SparseToDense(SparseModule):
-    def __init__(self, dimension, nPlanes=None):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.output = torch.Tensor()
-        self.gradInput = torch.FloatTensor()
-        self.nPlanes = nPlanes
-
-    def updateOutput(self, input):
-        if not self.nPlanes:
-            self.nPlanes = input.features.size(1)
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'SparseToDense_updateOutput')(
-            input.spatial_size,
-            input.metadata.ffi,
-            input.features,
-            self.output,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr,
-            self.nPlanes)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'SparseToDense_updateGradInput')(
-            input.spatial_size,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            gradOutput,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def __repr__(self):
-        return 'SparseToDense(' + str(self.dimension) + ')'
--- a/PyTorch/sparseconvnet/legacy/submanifoldConvolution.py
+++ b/PyTorch/sparseconvnet/legacy/submanifoldConvolution.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet as s
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class SubmanifoldConvolution(SparseModule):
-    def __init__(self, dimension, nIn, nOut, filter_size, bias):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.nIn = nIn
-        self.nOut = nOut
-        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_volume = self.filter_size.prod().item()
-        std = (2.0 / nIn / self.filter_volume)**0.5
-        self.weight = torch.Tensor(
-            nIn * self.filter_volume, nOut
-        ).normal_(0, std)
-        self.gradWeight = torch.Tensor(
-            nIn * self.filter_volume, nOut).fill_(std)
-        if bias:
-            self.bias = torch.Tensor(nOut).zero_()
-            self.gradBias = torch.Tensor(nOut).zero_()
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        s.forward_pass_multiplyAdd_count +=\
-            dim_typed_fn(self.dimension, input.features, 'SubmanifoldConvolution_updateOutput')(
-                input.spatial_size,
-                self.filter_size,
-                input.metadata.ffi,
-                input.features,
-                self.output.features,
-                self.weight,
-                optionalTensor(self, 'bias'),
-                self.filter_volume,
-                torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        s.forward_pass_hidden_states += self.output.features.nelement()
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'SubmanifoldConvolution_backward')(
-            input.spatial_size,
-            self.filter_size,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.weight,
-            self.gradWeight,
-            optionalTensor(
-                self,
-                'gradBias'),
-            self.filter_volume,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self._type = t
-        self.weight = self.weight.type(t)
-        self.gradWeight = self.gradWeight.type(t)
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-        if hasattr(self, 'bias'):
-            self.bias = self.bias.type(t)
-            self.gradBias = self.gradBias.type(t)
-
-    def __repr__(self):
-        s = 'SubmanifoldConvolution ' + \
-            str(self.nIn) + '->' + str(self.nOut) + ' C'
-        if self.filter_size.max() == self.filter_size.min():
-            s = s + str(self.filter_size[0].item())
-        else:
-            s = s + '(' + str(self.filter_size[0].item())
-            for i in self.filter_size[1:]:
-                s = s + ',' + str(i.item())
-            s = s + ')'
-        return s
--- a/PyTorch/sparseconvnet/networkArchitectures.py
+++ b/PyTorch/sparseconvnet/networkArchitectures.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from .averagePooling import AveragePooling
-from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
-from .convolution import Convolution
-from .sequential import Sequential
-from .submanifoldConvolution import SubmanifoldConvolution
-from .deconvolution import Deconvolution
-from .networkInNetwork import NetworkInNetwork
-from .maxPooling import MaxPooling
-from .identity import Identity
-from .sparseToDense import SparseToDense
-from .denseToSparse import DenseToSparse
-from .tables import *
-
-
-def SparseVggNet(dimension, nInputPlanes, layers):
-    """
-    VGG style nets
-    Use submanifold convolutions
-    Also implements 'Plus'-augmented nets
-    """
-    nPlanes = nInputPlanes
-    m = Sequential()
-    for x in layers:
-        if x == 'MP':
-            m.add(MaxPooling(dimension, 3, 2))
-        elif x[0] == 'MP':
-            m.add(MaxPooling(dimension, x[1], x[2]))
-        elif x[0] == 'C' and len(x) == 2:
-            m.add(SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False))
-            nPlanes = x[1]
-            m.add(BatchNormReLU(nPlanes))
-        elif x[0] == 'C' and len(x) == 3:
-            m.add(ConcatTable()
-                  .add(
-                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
-            ).add(
-                Sequential()
-                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
-                .add(BatchNormReLU(x[2]))
-                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
-                .add(BatchNormReLU(x[2]))
-                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
-            )).add(JoinTable())
-            nPlanes = x[1] + x[2]
-            m.add(BatchNormReLU(nPlanes))
-        elif x[0] == 'C' and len(x) == 4:
-            m.add(ConcatTable()
-                  .add(
-                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
-            )
-                .add(
-                Sequential()
-                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
-                .add(BatchNormReLU(x[2]))
-                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
-                .add(BatchNormReLU(x[2]))
-                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
-            )
-                .add(Sequential()
-                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     )).add(JoinTable())
-            nPlanes = x[1] + x[2] + x[3]
-            m.add(BatchNormReLU(nPlanes))
-        elif x[0] == 'C' and len(x) == 5:
-            m.add(ConcatTable()
-                  .add(
-                SubmanifoldConvolution(dimension, nPlanes, x[1], 3, False)
-            )
-                .add(
-                Sequential()
-                .add(Convolution(dimension, nPlanes, x[2], 3, 2, False))
-                .add(BatchNormReLU(x[2]))
-                .add(SubmanifoldConvolution(dimension, x[2], x[2], 3, False))
-                .add(BatchNormReLU(x[2]))
-                .add(Deconvolution(dimension, x[2], x[2], 3, 2, False))
-            )
-                .add(Sequential()
-                     .add(Convolution(dimension, nPlanes, x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Convolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(SubmanifoldConvolution(dimension, x[3], x[3], 3, False))
-                     .add(BatchNormReLU(x[3]))
-                     .add(Deconvolution(dimension, x[3], x[3], 3, 2, False))
-                     )
-                .add(Sequential()
-                     .add(Convolution(dimension, nPlanes, x[4], 3, 2, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(Convolution(dimension, x[4], x[4], 3, 2, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(Convolution(dimension, x[4], x[4], 3, 2, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(SubmanifoldConvolution(dimension, x[4], x[4], 3, False))
-                     .add(BatchNormReLU(x[4]))
-                     .add(Deconvolution(dimension, x[4], x[4], 3, 2, False))
-                     )).add(JoinTable())
-            nPlanes = x[1] + x[2] + x[3] + x[4]
-            m.add(BatchNormReLU(nPlanes))
-    return m
-
-
-def SparseResNet(dimension, nInputPlanes, layers):
-    """
-    pre-activated ResNet
-    e.g. layers = {{'basic',16,2,1},{'basic',32,2}}
-    """
-    nPlanes = nInputPlanes
-    m = Sequential()
-
-    def residual(nIn, nOut, stride):
-        if stride > 1:
-            return Convolution(dimension, nIn, nOut, 3, stride, False)
-        elif nIn != nOut:
-            return NetworkInNetwork(nIn, nOut, False)
-        else:
-            return Identity()
-    for blockType, n, reps, stride in layers:
-        for rep in range(reps):
-            if blockType[0] == 'b':  # basic block
-                if rep == 0:
-                    m.add(BatchNormReLU(nPlanes))
-                    m.add(
-                        ConcatTable().add(
-                            Sequential().add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    False) if stride == 1 else Convolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    stride,
-                                    False)) .add(
-                                BatchNormReLU(n)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    n,
-                                    n,
-                                    3,
-                                    False))) .add(
-                            residual(
-                                nPlanes,
-                                n,
-                                stride)))
-                else:
-                    m.add(
-                        ConcatTable().add(
-                            Sequential().add(
-                                BatchNormReLU(nPlanes)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    nPlanes,
-                                    n,
-                                    3,
-                                    False)) .add(
-                                BatchNormReLU(n)) .add(
-                                SubmanifoldConvolution(
-                                    dimension,
-                                    n,
-                                    n,
-                                    3,
-                                    False))) .add(
-                            Identity()))
-            nPlanes = n
-            m.add(AddTable())
-    m.add(BatchNormReLU(nPlanes))
-    return m
-
-
-def ResNetUNet(dimension, nPlanes, reps, depth=4):
-    """
-    U-Net style network with ResNet-style blocks.
-    For voxel level prediction:
-    import sparseconvnet as scn
-    import torch.nn
-    class Model(nn.Module):
-        def __init__(self):
-            nn.Module.__init__(self)
-            self.sparseModel = scn.Sequential().add(
-               scn.SubmanifoldConvolution(3, nInputFeatures, 64, 3, False)).add(
-               scn.ResNetUNet(3, 64, 2, 4))
-            self.linear = nn.Linear(64, nClasses)
-        def forward(self,x):
-            x=self.sparseModel(x).features
-            x=self.linear(x)
-            return x
-    """
-    def res(m, a, b):
-        m.add(ConcatTable()
-              .add(Identity() if a == b else NetworkInNetwork(a, b, False))
-              .add(Sequential()
-                   .add(BatchNormReLU(a))
-                   .add(SubmanifoldConvolution(dimension, a, b, 3, False))
-                   .add(BatchNormReLU(b))
-                   .add(SubmanifoldConvolution(dimension, b, b, 3, False))))\
-         .add(AddTable())
-
-    def v(depth, nPlanes):
-        m = Sequential()
-        if depth == 1:
-            for _ in range(reps):
-                res(m, nPlanes, nPlanes)
-        else:
-            m = Sequential()
-            for _ in range(reps):
-                res(m, nPlanes, nPlanes)
-            m.add(
-                ConcatTable() .add(
-                    Identity()) .add(
-                    Sequential() .add(
-                        BatchNormReLU(nPlanes)) .add(
-                        Convolution(
-                            dimension,
-                            nPlanes,
-                            nPlanes,
-                            2,
-                            2,
-                            False)) .add(
-                            v(
-                                depth - 1,
-                                nPlanes)) .add(
-                                    BatchNormReLU(nPlanes)) .add(
-                                        Deconvolution(
-                                            dimension,
-                                            nPlanes,
-                                            nPlanes,
-                                            2,
-                                            2,
-                                            False))))
-            m.add(JoinTable())
-            for i in range(reps):
-                res(m, 2 * nPlanes if i == 0 else nPlanes, nPlanes)
-        return m
-    m = v(depth, nPlanes)
-    m.add(BatchNormReLU(nPlanes))
-    return m
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ This is the Torch/PyTorch library for training Submanifold Sparse Convolutional

 ## Spatial sparsity

-This library brings [Spatially-sparse convolutional networks](https://github.com/btgraham/SparseConvNet) to PyTorch and [Torch classic](README_Torch.md). Moreover, it introduces **Submanifold Sparse Convolutions**, that can be used to build computationally efficient sparse VGG/ResNet/DenseNet-style networks.
+This library brings [Spatially-sparse convolutional networks](https://github.com/btgraham/SparseConvNet) to PyTorch. Moreover, it introduces **Submanifold Sparse Convolutions**, that can be used to build computationally efficient sparse VGG/ResNet/DenseNet-style networks.

 With regular 3x3 convolutions, the set of active (non-zero) sites grows rapidly:<br />
 ![submanifold](img/i.gif) <br />
@@ -50,7 +50,7 @@ In theory, the library supports up to 10 dimensions. In practice, ConvNets with



-## Hello World - PyTorch
+## Hello World
 SparseConvNets can be built either by [defining a function that inherits from torch.nn.Module](examples/Assamese_handwriting/VGGplus.py) or by stacking modules in a [sparseconvnet.Sequential](PyTorch/sparseconvnet/sequential.py):
 ```
 import torch
@@ -139,17 +139,13 @@ cd examples/Assamese_handwriting
 python VGGplus.py
 ```

-## PyTorch Setup
+## Setup

-Tested with Ubuntu 16.04, Python 3 in [Miniconda](https://conda.io/miniconda.html) and PyTorch master (v0.4 with merged Tensors/Variables).
+Tested with Ubuntu 16.04, Python 3 in [Miniconda](https://conda.io/miniconda.html) and PyTorch v0.4 (with merged Tensors/Variables).

 ```
-git clone https://github.com/pytorch/pytorch.git
-cd pytorch
-python setup.py install
-cd ..
-
-apt-get install libsparsehash-dev
+conda install -c pytorch # OR git clone https://github.com/pytorch/pytorch.git; cd pytorch; python setup.py install; cd ..
+conda install -c bioconda google-sparsehash # OR apt-get install libsparsehash-dev
 git clone git@github.com:facebookresearch/SparseConvNet.git
 cd SparseConvNet/PyTorch/
 python setup.py install

--- a/README_Torch.md
+++ b/README_Torch.md
-## Hello World - (Lua)Torch
-Convolutional networks are built with SparseConvNet in the same way as with Torch's nn/cunn/cudnn packages.
-```
--Train on the GPU if there is one, otherwise CPU
-scn=require 'sparseconvnet'
-tensorType = scn.cutorch and 'torch.CudaTensor' or 'torch.FloatTensor'
-
-
-model = scn.Sequential()
-:add(scn.SparseVggNet(2,1,{ --dimension 2, 1 input plane
-      {'C', 8}, -- 3x3 VSC convolution, 8 output planes, batchnorm, ReLU
-      {'C', 8}, -- and another
-      {'MP', 3, 2}, --max pooling, size 3, stride 2
-      {'C', 16}, -- etc
-      {'C', 16},
-      {'MP', 3, 2},
-      {'C', 24},
-      {'C', 24},
-      {'MP', 3, 2}}))
-:add(scn.Convolution(2,24,32,3,1,false)) --an SC convolution on top
-:add(scn.BatchNormReLU(32))
-:add(scn.SparseToDense(2))
-:type(tensorType)
-
--[[
-To use the network we must create an scn.InputBatch with right dimensionality.
-If we want the output to have spatial size 10x10, we can find the appropriate
-input size, give that we uses three layers of MP3/2 max-pooling, and finish
-with a SC convoluton
-]]
-
-inputSpatialSize=model:suggestInputSize(torch.LongTensor{10,10}) --103x103
-input=scn.InputBatch(2,inputSpatialSize)
-
--Now we build the input batch, sample by sample, and active site by active site.
-msg={
-  " O   O  OOO  O    O    OO     O       O   OO   OOO   O    OOO   ",
-  " O   O  O    O    O   O  O    O       O  O  O  O  O  O    O  O  ",
-  " OOOOO  OO   O    O   O  O    O   O   O  O  O  OOO   O    O   O ",
-  " O   O  O    O    O   O  O     O O O O   O  O  O  O  O    O  O  ",
-  " O   O  OOO  OOO  OOO  OO       O   O     OO   O  O  OOO  OOO   ",
-}
-
-input:addSample()
-for y,line in ipairs(msg) do
-  for x = 1,string.len(line) do
-    if string.sub(line,x,x) == 'O' then
-      local location = torch.LongTensor{x,y}
-      local featureVector = torch.FloatTensor{1}
-      input:setLocation(location,featureVector,0)
-    end
-  end
-end
-
--[[
-Optional: allow metadata preprocessing to be done in batch preparation threads
-to improve GPU utilization.
-
-Parameter:
-3 if using MP3/2 or size-3 stride-2 convolutions for downsizing,
-2 if using MP2
-]]
-input:precomputeMetadata(3)
-
-model:evaluate()
-input:type(tensorType)
-output = model:forward(input)
-
--[[
-Output is 1x32x10x10: our minibatch has 1 sample, the network has 32 output
-feature planes, and 10x10 is the spatial size of the output.
-]]
-print(output:size())
-```
-
-
-## Torch Setup
-
-Tested with Ubuntu 16.04.
-Install [Torch](http://torch.ch/docs/getting-started.html) then: <br />
-```
-apt-get install libsparsehash-dev
-git clone git@github.com:facebookresearch/SparseConvNet.git
-
-then
-
-cd SparseConvNet/Torch/
-luarocks make sparseconvnet-0.1-1.rockspec
-```
-To run the examples you may also need to install unrar and TorchNet:
-```
-apt-get install unrar
-
-and
-
-luarocks install torchnet
-```
--- a/Torch/AffineReluTrivialConvolution.lua
+++ b/Torch/AffineReluTrivialConvolution.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
-return function(sparseconvnet)
-  local C = sparseconvnet.C
-  local AF,parent = torch.class(
-    'sparseconvnet.AffineReluTrivialConvolution', 'nn.Module',sparseconvnet)
-
-  function AF:__init(nInputPlanes, nOutputPlanes,additiveGrad)
-    parent.__init(self)
-    self.nInputPlanes=nInputPlanes
-    self.nOutputPlanes=nOutputPlanes
-    self.affineWeight = torch.Tensor(nInputPlanes)
-    self.affineBias = torch.Tensor(nInputPlanes)
-    self.convWeight = torch.Tensor(nInputPlanes,nOutputPlanes)
-    self.gradAffineWeight = torch.Tensor(nInputPlanes):fill(0)
-    self.gradAffineBias = torch.Tensor(nInputPlanes):zero()
-    self.gradConvWeight = torch.Tensor(nInputPlanes,nOutputPlanes):zero()
-    self.additiveGrad=additiveGrad or false --boolean
-    self.output={
-      features=torch.Tensor(),
-    }
-    self:reset()
-  end
-
-  function AF:reset()
-    self.affineWeight:fill(1)
-    self.affineBias:zero()
-    self.convWeight:normal(0,math.sqrt(2/self.nInputPlanes)) --not 2/self.nOutputPlanes?
-  end
-
-  function AF:parameters()
-    return {self.affineWeight, self.affineBias, self.convWeight},
-    {self.gradAffineWeight, self.gradAffineBias, self.gradConvWeight}
-  end
-
-  function AF:updateOutput(input)
-    self.output.metadata = input.metadata
-    self.output.spatialSize = input.spatialSize
-    C.typedFn(self._type,'AffineReluTrivialConvolution_updateOutput')(
-      input.features:cdata(),
-      self.output.features:cdata(),
-      self.affineWeight:cdata(),
-      self.affineBias:cdata(),
-      self.convWeight:cdata())
-    self.shared.forwardPassMultiplyAddCount=
-    self.shared.forwardPassMultiplyAddCount+
-    input.features:size(1)*self.nInputPlanes*self.nOutputPlanes
-    self.shared.forwardPassHiddenStates=
-    self.shared.forwardPassHiddenStates+self.output.features:nElement()
-    return self.output
-  end
-
-  function AF:backward(input, gradOutput)
-    C.typedFn(self._type,'AffineReluTrivialConvolution_backward')(
-      input.features:cdata(),
-      self.gradInput.features:cdata(),
-      gradOutput.features:cdata(),
-      self.affineWeight:cdata(),
-      self.gradAffineWeight:cdata(),
-      self.affineBias:cdata(),
-      self.gradAffineBias:cdata(),
-      self.convWeight:cdata(),
-      self.gradConvWeight:cdata(),
-      self.additiveGrad)
-    return self.gradInput
-  end
-
-  function AF:updateGradInput(input, gradOutput)
-    assert(false) --just call backward
-  end
-
-  function AF:accGradParameters(input, gradOutput, scale)
-    assert(false) --just call backward
-  end
-
-  function AF:__tostring()
-    local s = 'AffineReluTrivialConvolution(' ..
-    self.nInputPlanes..'->' .. self.nOutputPlanes .. ')'
-    return s
-  end
-
-  function AF:clearState()
-    self.output={features=self.output.features:set()}
-    self.gradInput={features=self.gradInput.features:set()}
-    self.rules=nil
-  end
-
-  function AF:suggestInputSize(nOut)
-    return nOut
-  end
-end
--- a/Torch/AveragePooling.lua
+++ b/Torch/AveragePooling.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
-return function(sparseconvnet)
-  local C = sparseconvnet.C
-  local AveragePooling, parent = torch.class(
-    'sparseconvnet.AveragePooling', 'nn.Module', sparseconvnet)
-
-  function AveragePooling:__init(
-      dimension, poolSize, poolStride, nFeaturesToDrop)
-    parent.__init(self)
-    self.dimension = dimension
-    self.poolSize = sparseconvnet.toLongTensor(poolSize,dimension)
-    self.poolStride = sparseconvnet.toLongTensor(poolStride,dimension)
-    self.poolVolume = self.poolSize:prod()
-    self.nFeaturesToDrop = nFeaturesToDrop or 0
-    self.output = {
-      features = torch.FloatTensor(),
-    }
-    self.gradInput = {
-      features = torch.Tensor()
-    }
-  end
-
-  function AveragePooling:updateOutput(input)
-    self.output.metadata=input.metadata
-    self.output.spatialSize =
-    torch.cdiv(input.spatialSize-self.poolSize,self.poolStride)+1
-    C.dimTypedFn(self.dimension, self._type, 'AveragePooling_updateOutput')(
-      input.spatialSize:cdata(),
-      self.output.spatialSize:cdata(),
-      self.poolSize:cdata(),
-      self.poolStride:cdata(),
-      input.metadata.ffi,
-      input.features:cdata(),
-      self.output.features:cdata(),
-      self.nFeaturesToDrop,
-      self.shared.rulesBuffer and self.shared.rulesBuffer:cdata())
-    return self.output
-  end
-
-  function AveragePooling:updateGradInput(input, gradOutput)
-    C.dimTypedFn(self.dimension, self._type, 'AveragePooling_updateGradInput')(
-      input.spatialSize:cdata(),
-      self.output.spatialSize:cdata(),
-      self.poolSize:cdata(),
-      self.poolStride:cdata(),
-      input.metadata.ffi,
-      input.features:cdata(),
-      self.gradInput.features:cdata(),
-      gradOutput.features:cdata(),
-      self.nFeaturesToDrop,
-      self.shared.rulesBuffer and self.shared.rulesBuffer:cdata())
-    return self.gradInput
-  end
-
-  function AveragePooling:type(type,tensorCache)
-    if type==nil then
-      return self._type
-    end
-    self._type=type
-    self.output.features=self.output.features:type(type)
-    self.gradInput.features=self.gradInput.features:type(type)
-  end
-
-  function AveragePooling:__tostring()
-    local s = 'AveragePooling'
-    if self.poolSize:max()==self.poolSize:min()
-    and self.poolStride:max()==self.poolStride:min() then
-      s=s..self.poolSize[1] ..(self.poolStride[1]==1
-        and '' or '/'..self.poolStride[1])
-    else
-      s=s..'('..self.poolSize[1]
-      for i=2,self.dimension do
-        s=s..','..self.poolSize[i]
-      end
-      s=s..')/('..self.poolStride[1]
-      for i=2,self.dimension do
-        s=s..','..self.poolStride[i]
-      end
-      s=s..')'
-    end
-    if self.nFeaturesToDrop>0 then
-      s=s .. ' nFeaturesToDrop = ' .. self.nFeaturesToDrop
-    end
-    return s
-  end
-
-  function AveragePooling:clearState()
-    self.output={features=self.output.features:set()}
-    self.gradInput={features=self.gradInput.features:set()}
-  end
-
-  function AveragePooling:suggestInputSize(nOut)
-    return torch.cmul(nOut-1,self.poolStride)+self.poolSize
-  end
-end
--- a/Torch/BatchNormalization.lua
+++ b/Torch/BatchNormalization.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
--[[
-Parameters:
-nPlanes : number of input planes
-eps : small number used to stabilise standard deviation calculation
-momentum : for calculating running average for testing (default 0.9)
-affine : only 'true' is supported at present (default 'true')
-noise : add multiplicative and additive noise during training if >0.
-leakiness : Apply activation function inplace: 0<=leakiness<=1.
-0 for ReLU, values in (0,1) for LeakyReLU, 1 for no activation function.
-]]
-return function(sparseconvnet)
-  local C = sparseconvnet.C
-  local BN,parent = torch.class(
-    'sparseconvnet.BatchNormalization', 'nn.Module', sparseconvnet)
-
-  function BN:__init(nPlanes, eps, momentum, affine, leakiness)
-    parent.__init(self)
-    assert(nPlanes%4==0)
-    self.nPlanes=nPlanes
-    self.leakiness=leakiness or 1
-    if affine ~= nil then
-      assert(type(affine) == 'boolean', 'affine has to be true/false')
-      self.affine = affine
-    else
-      self.affine = true
-    end
-    self.eps = eps or 1e-5
-    self.saveMean = torch.Tensor(nPlanes)
-    self.saveInvStd = torch.Tensor(nPlanes)
-    self.momentum = momentum or 0.9
-    self.runningMean = torch.Tensor(nPlanes)
-    self.runningVar = torch.Tensor(nPlanes)
-    if self.affine then
-      self.weight = torch.Tensor(nPlanes)
-      self.bias = torch.Tensor(nPlanes)
-      self.gradWeight = torch.Tensor(nPlanes)
-      self.gradBias = torch.Tensor(nPlanes)
-    end
-    self.output = {
-      features = torch.Tensor()
-    }
-    self.gradInput = {
-      features = torch.Tensor()
-    }
-    self:reset()
-  end
-  function BN:reset()
-    if self.affine then
-      self.weight:fill(1)
-      self.bias:zero()
-    end
-    self.runningMean:zero()
-    self.runningVar:fill(1)
-    self.saveMean:zero()
-    self.saveInvStd:fill(1)
-  end
-
-  function BN:updateOutput(input)
-    assert(input.features:size(2)==self.nPlanes)
-    self.output.metadata = input.metadata
-    self.output.spatialSize=input.spatialSize
-    C.typedFn(self._type,'BatchNormalization_updateOutput')(
-      input.features:cdata(),
-      self.output.features:cdata(),
-      self.saveMean:cdata(),
-      self.saveInvStd:cdata(),
-      self.runningMean:cdata(),
-      self.runningVar:cdata(),
-      self.weight and self.weight:cdata(),
-      self.bias and self.bias:cdata(),
-      self.eps,
-      self.momentum,
-      self.train,
-      self.leakiness)
-    return self.output
-  end
-
-  function BN:backward(input, gradOutput)
-    assert(self.train)
-    C.typedFn(self._type,'BatchNormalization_backward')(
-      input.features:cdata(),
-      self.gradInput.features:cdata(),
-      self.output.features:cdata(),
-      gradOutput.features:cdata(),
-      self.saveMean:cdata(),
-      self.saveInvStd:cdata(),
-      self.runningMean:cdata(),
-      self.runningVar:cdata(),
-      self.weight and self.weight:cdata(),
-      self.bias and self.bias:cdata(),
-      self.gradWeight and self.gradWeight:cdata(),
-      self.gradBias and self.gradBias:cdata(),
-      self.leakiness)
-    return self.gradInput
-  end
-
-  function BN:updateGradInput(input, gradOutput)
-    assert(false) --just call backward
-  end
-
-  function BN:accGradParameters(input, gradOutput, scale)
-    assert(false) --just call backward
-  end
-
-  function BN:__tostring()
-    local l
-    if self.leakiness==0 then
-      l=',ReLU'
-    elseif self.leakiness==1/3 then
-      l=',LeakyReLU(0.333..)'
-    elseif self.leakiness<1 then
-      l=',LeakyReLU('..self.leakiness..')'
-    else
-      l=''
-    end
-    local s = 'BatchNormalization(' ..
-    'nPlanes=' .. self.nPlanes..',' ..
-    'eps=' .. self.eps .. ',' ..
-    'momentum=' .. self.momentum .. l .. ')'
-    return s
-  end
-
-  function BN:clearState()
-    self.output={features=self.output.features:set()}
-    self.gradInput={features=self.gradInput.features:set()}
-  end
-
-  function BN:suggestInputSize(nOut)
-    return nOut
-  end
-
-  local BN,parent = torch.class('sparseconvnet.BatchNormReLU',
-    'sparseconvnet.BatchNormalization', sparseconvnet)
-  function BN:__init(nPlanes, eps, momentum)
-    parent.__init(self, nPlanes, eps, momentum, true, 0)
-  end
-end
--- a/Torch/BatchNormalizationInTensor.lua
+++ b/Torch/BatchNormalizationInTensor.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
--[[
-Parameters:
-nPlanes : number of input planes
-eps : small number used to stabilise standard deviation calculation
-momentum : for calculating running average for testing (default 0.9)
-affine : only 'true' is supported at present (default 'true')
-noise : add multiplicative and additive noise during training if >0.
-leakiness : Apply activation function inplace: 0<=leakiness<=1.
-0 for ReLU, values in (0,1) for LeakyReLU, 1 for no activation function.
-]]
-return function(sparseconvnet)
-  local C = sparseconvnet.C
-  local BN,parent = torch.class(
-    'sparseconvnet.BatchNormalizationInTensor', 'sparseconvnet.BatchNormalization', sparseconvnet)
-
-  function BN:__init(nPlanes, eps, momentum, outputColumnOffset)
-    parent.__init(self,nPlanes,eps,momentum, false, 1)
-    self.outputColumnOffset=outputColumnOffset
-  end
-
-  function BN:updateOutput(input)
-    local o = self.output.features:narrow(2,1+self.outputColumnOffset,self.nPlanes)
-    self.output.metadata = input.metadata
-    self.output.spatialSize=input.spatialSize
-    C.typedFn(self._type,'BatchNormalizationInTensor_updateOutput')(
-      input.features:cdata(),
-      o:cdata(),
-      self.saveMean:cdata(),
-      self.saveInvStd:cdata(),
-      self.runningMean:cdata(),
-      self.runningVar:cdata(),
-      self.weight and self.weight:cdata(),
-      self.bias and self.bias:cdata(),
-      self.eps,
-      self.momentum,
-      self.train,
-      self.leakiness)
-    return self.output
-  end
-
-  function BN:backward(input, gradOutput)
-    assert(self.train)
-    local o = self.output.features:narrow(2,1+self.outputColumnOffset,self.nPlanes)
-    local d_o = gradOutput.features:narrow(2,1+self.outputColumnOffset,self.nPlanes)
-    C.typedFn(self._type,'BatchNormalization_backward')(
-      input.features:cdata(),
-      self.gradInput.features:cdata(),
-      o:cdata(),
-      d_o:cdata(),
-      self.saveMean:cdata(),
-      self.saveInvStd:cdata(),
-      self.runningMean:cdata(),
-      self.runningVar:cdata(),
-      self.weight and self.weight:cdata(),
-      self.bias and self.bias:cdata(),
-      self.gradWeight and self.gradWeight:cdata(),
-      self.gradBias and self.gradBias:cdata(),
-      self.leakiness)
-    return self.gradInput
-  end
-
-  function BN:updateGradInput(input, gradOutput)
-    assert(false) --just call backward
-  end
-
-  function BN:accGradParameters(input, gradOutput, scale)
-    assert(false) --just call backward
-  end
-
-  function BN:__tostring()
-    local l
-    if self.leakiness==0 then
-      l=',ReLU'
-    elseif self.leakiness==1/3 then
-      l=',LeakyReLU(0.333..)'
-    elseif self.leakiness<1 then
-      l=',LeakyReLU('..self.leakiness..')'
-    else
-      l=''
-    end
-    local s = 'BatchNormalizationInTensor(' ..
-    'nPlanes=' .. self.nPlanes..',' ..
-    'eps=' .. self.eps .. ',' ..
-    'momentum=' .. self.momentum .. l .. ')'
-    return s
-  end
-
-  function BN:clearState()
-    self.output={features=self.output.features:set()}
-    self.gradInput={features=self.gradInput.features:set()}
-  end
-
-  function BN:suggestInputSize(nOut)
-    return nOut
-  end
-end
--- a/Torch/BatchwiseDropout.lua
+++ b/Torch/BatchwiseDropout.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
--[[
-Implementation of batchwise dropout, optionally followed by LeakyReLU
-
-Parameters:
-p : dropout probability in the range [0,1]
-ip : perform dropout inplace (default true)
-leaky : in the range [0,1]. Set to zero to do ReLU after the dropout. Set to one
-just to do dropout. Set to 1/3 for LeakyReLU after the dropout, etc. (default 1)
-]]
-
-return function(sparseconvnet)
-  local C = sparseconvnet.C
-  local math = require 'math'
-  local BatchwiseDropout, parent = torch.class(
-    'sparseconvnet.BatchwiseDropout', 'nn.Module', sparseconvnet)
-
-  function BatchwiseDropout:__init(nPlanes,p,ip,leaky)
-    parent.__init(self)
-    self.inplace = (type(ip)~='boolean') or ip
-    self.p = p
-    self.leakiness=leaky or 1
-    self.noise=torch.Tensor(nPlanes)
-    self.nPlanes=nPlanes
-    self.output = ip and "Recycle" or {
-      features = torch.Tensor()
-    }
-    self.gradInput = ip and "Recycle" or {
-      features = torch.Tensor()
-    }
-  end
-
-  function BatchwiseDropout:updateOutput(input)
-    if self.train then
-      self.noise:bernoulli(1-self.p)
-    else
-      self.noise:fill(1-self.p)
-    end
-    if self.inplace then
-      self.output = input
-    else
-      self.output.metadata = input.metadata
-      self.output.spatialSize = input.spatialSize
-    end
-    C.typedFn(self._type,'BatchwiseMultiplicativeDropout_updateOutput')(
-      input.features:cdata(),
-      self.output.features:cdata(),
-      self.noise:cdata(),
-      self.leakiness)
-    return self.output
-  end
-
-  function BatchwiseDropout:updateGradInput(input, gradOutput)
-    if self.inplace then
-      self.gradInput = gradOutput
-    end
-    C.typedFn(self._type,'BatchwiseMultiplicativeDropout_updateGradInput')(
-      input.features:cdata(),
-      self.gradInput.features:cdata(),
-      gradOutput.features:cdata(),
-      self.noise:cdata(),
-      self.leakiness)
-    return self.gradInput
-  end
-
-  function BatchwiseDropout:type(type)
-    self._type=type
-    self.noise=self.noise:type(type)
-    if self.output.features then
-      self.output.features=self.output.features:type(type)
-    end
-    if self.gradInput.features then
-      self.gradInput.features=self.gradInput.features:type(type)
-    end
-  end
-
-  function BatchwiseDropout:__tostring()
-    local s = 'BatchwiseDropout('..self.p .. ", " .. self.leakiness..')'
-    return s
-  end
-
-  function BatchwiseDropout:clearState()
-    if self.inplace then
-      self.output=nil
-      self.gradInput=nil
-    else
-      self.output={features=self.output.features:set()}
-      self.gradInput={features=self.gradInput.features:set()}
-    end
-  end
-
-  function BatchwiseDropout:suggestInputSize(nOut)
-    return nOut
-  end
-end
--- a/Torch/C.lua
+++ b/Torch/C.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
-return function (sparseconvnet)
-  local ffi = require 'ffi'
-  local libpath, ok
-  libpath = package.searchpath('libcusparseconvnet', package.cpath)
-  if not libpath then
-    libpath = package.searchpath('libsparseconvnet', package.cpath)
-  end
-  assert(libpath)
-  local F = ffi.load(libpath)
-  --local fc=io.open('header_cpu.h','w')
-  --local fg=io.open('header_gpu.h','w')
-
-  local cdef = [[
-  void scn_set_THCState(void *state);
-  ]]
-  ffi.cdef(cdef)
-  if cutorch then
-    F['scn_set_THCState'](cutorch.getState())
-  end
-
-  cdef = [[
-void scn_ptrCopyA(long *dst, void **src);
-void scn_ptrCopyB(void **dst, long *src);
-double scn_ruleBookBits();
-void scn_2_drawCurve(void **m, THFloatTensor *features, THFloatTensor *stroke);
-  ]]
-  if fc then fc:write(cdef) end
-  ffi.cdef(cdef)
-  sparseconvnet.ruleBookBits=F['scn_ruleBookBits']()
-
-  cdef = [[
-double scn_DIMENSION_addSampleFromThresholdedTensor(
-  void **m, THFloatTensor *features_, THFloatTensor *tensor_,
-  THLongTensor *offset_, THLongTensor *spatialSize_, float threshold);
-void scn_DIMENSION_batchAddSample(void **m);
-void scn_DIMENSION_createMetadataForDenseToSparse(
-  void **m, THLongTensor *spatialSize_, THLongTensor *pad, THLongTensor *nz,
-  long batchSize);
-void scn_DIMENSION_freeMetadata(void **metadata);
-void scn_DIMENSION_generateRuleBooks3s2(void **m);
-void scn_DIMENSION_generateRuleBooks2s2(void **m);
-void scn_DIMENSION_setInputSpatialSize(void **m, THLongTensor *spatialSize);
-void scn_DIMENSION_setInputSpatialLocation(void **m, THFloatTensor *features,
-  THLongTensor *location, THFloatTensor *vec, bool overwrite);
-void scn_DIMENSION_setInputSpatialLocations(void **m, THFloatTensor *features,
-  THLongTensor *locations, THFloatTensor *vecs, bool overwrite);
-void scn_DIMENSION_getSpatialLocations(void **m, THLongTensor *spatialSize,
-  THLongTensor *locations);
-
-]]
-
-  for DIMENSION = 1,10 do
-    local def = string.gsub(cdef, 'DIMENSION', DIMENSION)
-    ffi.cdef(def)
-    if fc then
-      def=string.gsub(def,'bool','_Bool')
-      fc:write(def)
-    end
-  end
-
-  --types CPU float, double;
-  --type GPU half, float, double; int_cpu and int_gpu
-
-  cdef = [[
-void scn_ARCH_REAL_AffineReluTrivialConvolution_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  THTensor *affineWeight, THTensor *affineBias, THTensor *convWeight);
-void scn_ARCH_REAL_AffineReluTrivialConvolution_backward(
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, THTensor *affineWeight,
-  THTensor *d_affineWeight, THTensor *affineBias, THTensor *d_affineBias,
-  THTensor *convWeight, THTensor *d_convWeight, bool additiveGrad);
-
-// BatchwiseMultiplicativeDropout
-void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  THTensor *noise, long nPlanes, long input_stride, long output_stride,
-  float alpha);
-void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateGradInput(
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, THTensor *noise, long nPlanes,
-  long input_stride, long output_stride, float alpha);
-
-// BatchNormalization
-void scn_ARCH_REAL_BatchNormalization_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean,
-  THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps,
-  REAL momentum, bool train, REAL leakiness);
-void scn_ARCH_REAL_BatchNormalization_backward(
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *output_features, THTensor *d_output_features, THTensor *saveMean,
-  THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
-  THTensor *weight, THTensor *bias, THTensor *d_weight, THTensor *d_bias,
-  REAL leakiness);
-// BatchNormalizationInTensor
-void scn_ARCH_REAL_BatchNormalizationInTensor_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean,
-  THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps,
-  REAL momentum, bool train, REAL leakiness);
-
-// LeakyReLU
-void scn_ARCH_REAL_LeakyReLU_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  float alpha);
-void scn_ARCH_REAL_LeakyReLU_updateGradInput(
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, float alpha);
-
-// NetworkInNetwork
-double scn_ARCH_REAL_NetworkInNetwork_updateOutput(
-  THTensor *input_features, THTensor *output_features,
-  THTensor *weight, THTensor *bias);
-void scn_ARCH_REAL_NetworkInNetwork_updateGradInput(
-  THTensor *d_input_features, THTensor *d_output_features,
-  THTensor *weight);
-void scn_ARCH_REAL_NetworkInNetwork_accGradParameters(
-  THTensor *input_features, THTensor *d_output_features,
-  THTensor *d_weight, THTensor *d_bias);
-  ]]
-
-  for _,v in ipairs({{'float', 'THFloatTensor'}, {'double','THDoubleTensor'}}) do
-    local def = cdef
-    def = string.gsub(def, 'ARCH', 'cpu')
-    def = string.gsub(def, 'THITensor', 'void')
-    def = string.gsub(def, 'REAL', v[1])
-    def = string.gsub(def, 'THTensor', v[2])
-    ffi.cdef(def)
-    if fc then
-      def=string.gsub(def,'bool','_Bool')
-      fc:write(def)
-    end
-  end
-  if sparseconvnet.cutorch then
-    for k,v in ipairs({
-        {'float', 'THCudaTensor'},
-        --{'double', 'THCudaDoubleTensor'}
-                     })
-    do
-      local def = cdef
-      def = string.gsub(def, 'ARCH', 'gpu')
-      def = string.gsub(def, 'THITensor', sparseconvnet.ruleBookBits==64 and
-                          'THCudaLongTensor' or 'THCudaIntTensor')
-      def = string.gsub(def, 'REAL', v[1])
-      def = string.gsub(def, 'THTensor', v[2])
-      ffi.cdef(def)
-      if fg then
-        def=string.gsub(def,'bool','_Bool')
-        fg:write(def)
-      end
-    end
-  end
-
-  cdef = [[
-// ActivePooling
-void scn_ARCH_REAL_DIMENSIONActivePooling_updateOutput(
-  THLongTensor *inputSize, void **m, THTensor *input_features,
-  THTensor *output_features, THITensor *rulesBuffer, bool average);
-void scn_ARCH_REAL_DIMENSIONActivePooling_updateGradInput(
-  THLongTensor *inputSize, void **m,
-  THTensor *d_input_features, THTensor *d_output_features,
-  THITensor *rulesBuffer, bool average);
-
-// Average Pooling
-void scn_ARCH_REAL_DIMENSIONAveragePooling_updateOutput(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *poolSize, THLongTensor *poolStride, void **m,
-  THTensor *input_features, THTensor *output_features, long nFeaturesToDrop,
-  THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONAveragePooling_updateGradInput(
-  THLongTensor * inputSize, THLongTensor * outputSize,
-  THLongTensor * poolSize, THLongTensor * poolStride, void **m,
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, long nFeaturesToDrop,
-  THITensor *rulesBuffer);
-
-double scn_ARCH_REAL_DIMENSIONConvolution_updateOutput(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *filterSize, THLongTensor *filterStride, void **m,
-  THTensor *input_features, THTensor *output_features, THTensor *weight,
-  THTensor *bias, long filterVolume, THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONConvolution_backward(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *filterSize, THLongTensor *filterStride, void **m,
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
-  THTensor *d_bias, long filterVolume, THITensor *rulesBuffer);
-
-double scn_ARCH_REAL_DIMENSIONDeconvolution_updateOutput(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *filterSize, THLongTensor *filterStride, void **m,
-  THTensor *input_features, THTensor *output_features, THTensor *weight,
-  THTensor *bias, long filterVolume, THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONDeconvolution_backward(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *filterSize, THLongTensor *filterStride, void **m,
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
-  THTensor *d_bias, long filterVolume, THITensor *rulesBuffer);
-
-// Max Pooling
-void scn_ARCH_REAL_DIMENSIONMaxPooling_updateOutput(
-  THLongTensor *inputSize, THLongTensor *outputSize,
-  THLongTensor *poolSize, THLongTensor *poolStride, void **m,
-  THTensor *input_features, THTensor *output_features, long nFeaturesToDrop,
-  THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONMaxPooling_updateGradInput(
-  THLongTensor * inputSize, THLongTensor * outputSize,
-  THLongTensor * poolSize, THLongTensor * poolStride, void **m,
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *output_features, THTensor *d_output_features,
-  long nFeaturesToDrop, THITensor *rulesBuffer);
-
-// SparseToDense
-void scn_ARCH_REAL_DIMENSIONSparseToDense_updateOutput(
-  THLongTensor *inputSize, void **m, THTensor *input_features,
-  THTensor *output_features, THITensor *rulesBuffer, long nPlanes);
-void scn_ARCH_REAL_DIMENSIONSparseToDense_updateGradInput(
-  THLongTensor *inputSize, void **m, THTensor *input_features,
-  THTensor *d_input_features, THTensor *d_output_features,
-  THITensor *rulesBuffer);
-
-double scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_updateOutput(
-  THLongTensor *inputSize, THLongTensor *filterSize, void **m,
-  THTensor *input_features, THTensor *output_features, THTensor *weight,
-  THTensor *bias, long filterVolume, THITensor *rulesBuffer);
-void scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_backward(
-  THLongTensor *inputSize, THLongTensor *filterSize, void **m,
-  THTensor *input_features, THTensor *d_input_features,
-  THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
-  THTensor *d_bias, long filterVolume, THITensor *rulesBuffer);
-  ]]
-
-  for _,v in ipairs({{'float', 'THFloatTensor'}, {'double', 'THDoubleTensor'}}) do
-    for DIMENSION = 1,10 do
-      local def = cdef
-      def = string.gsub(def, 'ARCH', 'cpu')
-      def = string.gsub(def, '_DIMENSION', DIMENSION)
-      def = string.gsub(def, 'THITensor', 'void')
-      def = string.gsub(def, 'REAL', v[1])
-      def = string.gsub(def, 'THTensor', v[2])
-      ffi.cdef(def)
-      if fc then
-        def=string.gsub(def,'bool','_Bool')
-        fc:write(def)
-      end
-    end
-  end
-  if sparseconvnet.cutorch then
-    for k,v in ipairs({
-        {'float', 'THCudaTensor'},
-        --{'double', 'THCudaDoubleTensor'}
-    }) do
-      for DIMENSION = 1,10 do
-        local def = cdef
-        def = string.gsub(def, 'ARCH', 'gpu')
-        def = string.gsub(def, '_DIMENSION', DIMENSION)
-        def = string.gsub(def, 'THITensor', sparseconvnet.ruleBookBits==64 and
-                            'THCudaLongTensor' or 'THCudaIntTensor')
-        def = string.gsub(def, 'REAL', v[1])
-        def = string.gsub(def, 'THTensor', v[2])
-        ffi.cdef(def)
-        if fg then
-          def=string.gsub(def,'bool','_Bool')
-          fg:write(def)
-        end
-      end
-    end
-  end
-  if fc then
-    fc:close()
-    fg:close()
-  end
-  sparseconvnet.C = {}
-  local C = sparseconvnet.C
-
-  local typeTable={}
-  typeTable['torch.FloatTensor'] = 'cpu_float'
-  typeTable['torch.DoubleTensor'] = 'cpu_double'
-  typeTable['torch.CudaHalfTensor'] = 'gpu_half' --todo
-  typeTable['torch.CudaTensor'] = 'gpu_float'
-  typeTable['torch.CudaDoubleTensor'] = 'gpu_double'
-
-  function C.fn(name)
-    return F['scn_' .. name]
-  end
-  function C.typedFn(type,name)
-    return F['scn_' .. typeTable[type] .. '_' .. name]
-  end
-  function C.dimensionFn(dimension,name)
-    return F['scn_' .. dimension .. '_' .. name]
-  end
-  function C.dimTypedFn(dimension,type,name)
-    return F['scn_' .. typeTable[type] .. dimension .. name]
-  end
-
-  function C.copyFfiPtrToLong(dst,src)
-    F['scn_ptrCopyA'](dst:data(), src)
-  end
-  function C.copyLongToFfiPtr(dst,src)
-    F['scn_ptrCopyB'](dst, src:data())
-  end
-end
--- a/Torch/CAddTable.lua
+++ b/Torch/CAddTable.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
--[[
-Assume all the inputs have identical SparseGrids and input[i].nActive
-Assume input[1].nPlanes >= input[i].nPlanes for all i=1,#input
-output.submanifoldRules is taken from input[1].submanifoldRules (could do set union?)
-(for resnets, make sure the residual link is input[2])
-]]
-
-return function(sparseconvnet)
-  local CAddTable, parent = torch.class(
-    'sparseconvnet.CAddTable', 'nn.Module', sparseconvnet)
-
-  function CAddTable:__init(ip)
-    parent.__init(self)
-    self.inplace = type(ip)=='boolean' and ip
-    self.gradInput = {}
-    self.output = self.inplace and 'recycle' or {
-      features = torch.Tensor()
-    }
-    sparseconvnet.shareShared(self)
-  end
-
-  function CAddTable:add(module)
-    table.insert(self.modules,module)
-    sparseconvnet.shareShared(self)
-    return self
-  end
-
-  function CAddTable:updateOutput(input)
-    if self.inplace then
-      self.output=input[1]
-    else
-      self.output.features:resizeAs(input[1].features):copy(input[1].features)
-      self.output.metadata=input[1].metadata
-      self.output.spatialSize=input[1].spatialSize
-    end
-    for i=2,#input do
-      assert(input[i].nActive==input[1].nActive)
-      self.output.features:narrow(2,1,input[i].features:size(2)):add(input[i].features)
-    end
-    return self.output
-  end
-
-  function CAddTable:updateGradInput(input, gradOutput)
-    for i=1,#input do
-      if self.inplace and input[1].features:size(2) == input[i].features:size(2) then
-        self.gradInput[i]=self.gradInput[i] or {}
-        self.gradInput[i].features=gradOutput.features
-      else
-        self.gradInput[i]=self.gradInput[i] or {features=input[i].features.new()}
-        self.gradInput[i].features:resizeAs(input[i].features)
-        self.gradInput[i].features:copy(
-          gradOutput.features:narrow(2,1,input[i].features:size(2)))
-      end
-    end
-    for i=#input+1,#self.gradInput do
-      self.gradInput[i]=nil
-    end
-    return self.gradInput
-  end
-  function CAddTable:backwards(input, gradOutput)
-    for i=1,#input do
-      if self.inplace and input[1].features:size(2) == input[i].features:size(2) then
-        self.gradInput[i]=self.gradInput[i] or {}
-        self.gradInput[i].features=gradOutput.features
-      else
-        self.gradInput[i]=self.gradInput[i] or {features=input[i].features.new()}
-        self.gradInput[i].features:resizeAs(input[i].features)
-        self.gradInput[i].features:copy(
-          gradOutput.features:narrow(2,1,input[i].features:size(2)))
-      end
-    end
-    for i=#input+1,#self.gradInput do
-      self.gradInput[i]=nil
-    end
-    return self.gradInput
-  end
-
-  function CAddTable:clearState()
-    self.gradInput = {}
-    self.output = self.inplace and 'recycle' or {
-      features = self.output.features:set()
-    }
-  end
-
-  function CAddTable:suggestInputSize(nOut)
-    return nOut
-  end
-end
--- a/Torch/CMakeLists.txt
+++ b/Torch/CMakeLists.txt
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-set(CMAKE_VERBOSE_MAKEFILE on)
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR)
-CMAKE_POLICY(VERSION 2.8)
-
-SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
-FIND_PACKAGE(Torch REQUIRED)
-
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fopenmp")
-
-FILE(GLOB luasrc *.lua)
-
-SET(src SCN/init.cpp)
-
-ADD_TORCH_PACKAGE(sparseconvnet "${src}" "${luasrc}")
-
-LINK_DIRECTORIES("${Torch_INSTALL_LIB}")
-TARGET_LINK_LIBRARIES(sparseconvnet TH)
-
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../PyTorch/sparseconvnet/SCN)
-
-FIND_PACKAGE(CUDA 7.5)
-
-IF(CUDA_FOUND)
-  # Detect CUDA architecture and get best NVCC flags
-  INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/SCN/cmake/FindCudaArch.cmake)
-  SELECT_NVCC_ARCH_FLAGS(NVCC_FLAGS_EXTRA)
-  LIST(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
-
-  INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}/THC")
-  LINK_DIRECTORIES("${Torch_INSTALL_LIB}")
-
-  FILE(GLOB src-cuda SCN/init.cu)
-
-  CUDA_ADD_LIBRARY(cusparseconvnet MODULE ${src-cuda})
-  TARGET_LINK_LIBRARIES(cusparseconvnet THC TH)
-  IF(APPLE)
-    SET_TARGET_PROPERTIES(cusparseconvnet PROPERTIES
-      LINK_FLAGS "-undefined dynamic_lookup")
-  ENDIF()
-
-  ### Torch packages supposes libraries prefix is "lib"
-  SET_TARGET_PROPERTIES(cusparseconvnet PROPERTIES
-    PREFIX "lib"
-    IMPORT_PREFIX "lib")
-
-  INSTALL(TARGETS cusparseconvnet
-    RUNTIME DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}"
-    LIBRARY DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}")
-ENDIF(CUDA_FOUND)
--- a/Torch/ClassificationTrainValidate.lua
+++ b/Torch/ClassificationTrainValidate.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
-return function(sparseconvnet)
-  local function updateStats(stats, output, target, loss)
-    local batchSize = output:size(1)
-    stats.n = stats.n + batchSize
-    stats.nll = stats.nll + loss*batchSize
-    local _ , predictions = output:float():sort(2, true)
-    local correct = predictions:eq(
-      target:long():view(batchSize, 1):expandAs(output))
-    -- Top-1 score
-    stats.top1 = stats.top1 + correct:narrow(2, 1, 1):sum()
-    -- Top-5 score
-    local len = math.min(5, correct:size(2))
-    stats.top5 = stats.top5 + correct:narrow(2, 1, len):sum()
-  end
-
-  function sparseconvnet.ClassificationTrainValidate(model,dataset,p)
-    local t = model:type()
-    p.nEpochs=p.nEpochs or 100
-    p.initial_LR = p.initial_LR or 1e-2
-    p.LR_decay=p.LR_decay or 4e-2
-    p.weightDecay=p.weightDecay or 1e-4
-    p.momentum=p.momentum or 0.9
-    local optimState = {
-      learningRate=p.initial_LR,
-      learningRateDecay = 0.0,
-      momentum = p.momentum,
-      nesterov = true,
-      dampening = 0.0,
-      weightDecay = p.weightDecay,
-      epoch=1
-    }
-    if paths.filep('epoch.t7') then
-      model=torch.load('model.t7')
-      optimState.epoch=torch.load('epoch.t7')+1
-      print('Restarting at epoch '.. optimState.epoch ..' from model.t7 ..')
-    end
-    print(p)
-    local criterion = nn.CrossEntropyCriterion()
-    criterion:type(model:type())
-    local params, gradParams = model:getParameters()
-    print('#parameters', params:nElement())
-    local timer=torch.Timer()
-    for epoch = optimState.epoch,p.nEpochs do
-      model:training()
-      timer:reset()
-      local stats={top1=0, top5=0, n=0, nll=0}
-      optimState.learningRate = p.initial_LR*math.exp((1-epoch)*p.LR_decay)
-      for batch in dataset.train(epoch) do
-        batch.input:type(t)
-        batch.target=batch.target:type(t)
-        model:forward(batch.input)
-        criterion:forward(model.output, batch.target)
-        updateStats(stats,model.output,batch.target,criterion.output)
-        gradParams:zero() -- model:zeroGradParameters()
-        criterion:backward(model.output, batch.target)
-        model:backward(batch.input, criterion.gradInput)
-        local function feval()
-          return criterion.output, gradParams
-        end
-        optim.sgd(feval, params, optimState)
-      end
-      print(epoch,'train:',
-        string.format('top1=%.2f%%', 100*(1-stats.top1/stats.n)),
-        string.format('top5=%.2f%%', 100*(1-stats.top5/stats.n)),
-        string.format('nll: %.2f', stats.nll/stats.n),
-        string.format('%.1fs', timer:time().real))
-
-      if p.checkPoint then
-        model:clearState()
-        torch.save('model.t7',model)
-        torch.save('epoch.t7',epoch)
-      end
-      model:evaluate()
-      model.modules[1].shared.forwardPassMultiplyAddCount=0
-      model.modules[1].shared.forwardPassHiddenStates=0
-      timer:reset()
-      local stats={top1=0, top5=0, n=0, nll=0}
-      for batch in dataset.val() do
-        batch.input:type(t)
-        batch.target=batch.target:type(t)
-        model:forward(batch.input)
-        criterion:forward(model.output, batch.target)
-        updateStats(stats,model.output,batch.target,criterion.output)
-      end
-      print(epoch,'test:',
-        string.format('top1=%.2f%%', 100*(1-stats.top1/stats.n)),
-        string.format('top5=%.2f%%', 100*(1-stats.top5/stats.n)),
-        string.format('nll: %.2f', stats.nll/stats.n),
-        string.format('%.1fs', timer:time().real))
-      print(string.format('%.3e MultiplyAdds/sample %.3e HiddenStates/sample',
-          model.modules[1].shared.forwardPassMultiplyAddCount/stats.n,
-          model.modules[1].shared.forwardPassHiddenStates/stats.n))
-    end
-  end
-end
--- a/Torch/ConcatTable.lua
+++ b/Torch/ConcatTable.lua
-- Copyright 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the license found in the
-- LICENSE file in the root directory of this source tree.
-
-return function(sparseconvnet)
-  local ConcatTable, parent = torch.class(
-    'sparseconvnet.ConcatTable', 'nn.ConcatTable', sparseconvnet)
-
-  function ConcatTable:__init()
-    parent.__init(self)
-    self.modules={}
-    self.output={}
-    self.gradInput={
-      features=torch.Tensor()
-    }
-    sparseconvnet.shareShared(self)
-  end
-
-  function ConcatTable:add(module)
-    table.insert(self.modules,module)
-    sparseconvnet.shareShared(self)
-    return self
-  end
-
-  function ConcatTable:updateOutput(input)
-    for i = 1,#self.modules do
-      self.output[i]=self.modules[i]:forward(input)
-    end
-    for i = #self.modules+1,#self.output do
-      self.output[i]=nil
-    end
-    return self.output
-  end
-
-  function ConcatTable:backward(input, gradOutput)
-    local gradInputs={}
-    for i = 1,#self.modules do
-      gradInputs[i]=self.modules[i]:backward(input,gradOutput[i],scale)
-    end
-    self.gradInput.features:resizeAs(
-      gradInputs[1].features):copy(gradInputs[1].features)
-    for i=2,#self.modules do
-      self.gradInput.features:add(gradInputs[i].features)
-    end
-    return self.gradInput
-  end
-
-  function ConcatTable:clearState()
-    for _,m in ipairs(self.modules) do
-      m:clearState()
-    end
-    self.output={}
-    self.gradInput={features=self.gradInput.features:set()}
-  end
-
-  function ConcatTable:suggestInputSize(nOut)
-    return self.modules[1]:suggestInputSize(nOut)
-  end
-end