3d segmantation

1df7b845 · Benjamin Thomas Graham · f2e3800b · 1df7b845 · 1df7b845 · 1df7b845
Commit 1df7b845 authored May 14, 2018 by Benjamin Thomas Graham
20 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,4 @@ pickle
 *.pyc
 PyTorch/sparseconvnet.egg-info/
 PyTorch/sparseconvnet/SCN/__init__.py
-
+sparseconvnet.egg-info
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
+# Code of Conduct
+
+Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please read the [full text](https://code.facebook.com/pages/876921332402685/open-source-code-of-conduct) so that you can understand what actions will and will not be tolerated.
+
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
+# Contributing to SparseConvNet
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `master`.
+2. Ensure the examples still run.
+3. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Coding Style  
+We try to follow the PEP style guidelines and encourage you to as well.
+
+## License
+By contributing to SparseConvNet, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
--- a/PyTorch/sparseconvnet/legacy/__init__.py
+++ b/PyTorch/sparseconvnet/legacy/__init__.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from ..utils import *
-from ..metadata import Metadata
-from ..inputBatch import InputBatch
-from ..sparseConvNetTensor import SparseConvNetTensor
-from .sparseModule import SparseModule
-from .averagePooling import AveragePooling
-from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU, BatchNormalizationInTensor
-from .batchwiseDropout import BatchwiseDropout, BatchwiseDropoutInTensor
-from .concatTable import ConcatTable
-from .convolution import Convolution
-from .cAddTable import CAddTable
-from .deconvolution import Deconvolution
-from .denseToSparse import DenseToSparse
-from .identity import Identity
-from .joinTable import JoinTable
-from .leakyReLU import LeakyReLU
-from .maxPooling import MaxPooling
-from .networkInNetwork import NetworkInNetwork
-from .reLU import ReLU
-from .sequential import Sequential
-from .sparseToDense import SparseToDense
-from .submanifoldConvolution import SubmanifoldConvolution
-from .networkArchitectures import *
-from .classificationTrainValidate import ClassificationTrainValidate
-from .misc import *
--- a/PyTorch/sparseconvnet/legacy/affineReLUTrivialConvolution.py
+++ b/PyTorch/sparseconvnet/legacy/affineReLUTrivialConvolution.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-  Affine transformation (i.e. the second half of a typical batchnormalization layer)
-  Parameters:
-  nPlanes : number of input planes
-  noise : add multiplicative and additive noise during training if >0.
-  leakiness : Apply activation function inplace: 0<=leakiness<=1.
-  0 for ReLU, values in (0,1) for LeakyReLU, 1 for no activation function.
-"""
-
-import torch
-import sparseconvnet as s
-from . import SparseModule
-from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-import math
-
-
-class AffineReLUTrivialConvolution(SparseModule):
-    def __init__(self, nIn, nOut, additiveGrad=False):
-        SparseModule.__init__(self)
-        self.nIn = nIn
-        self.nOut = nOut
-        self.affineWeight = torch.Tensor(nIn).fill_(1)
-        self.affineBias = torch.Tensor(nIn).zero_()
-        std = math.sqrt(2.0 / nIn)
-        self.convWeight = torch.Tensor(nIn, nOut).normal_(0, std)
-        self.gradAffineWeight = torch.Tensor(nIn).fill_(0)
-        self.gradAffineBias = torch.Tensor(nIn).zero_(0.333)
-        self.gradConvWeight = torch.Tensor(nIn, nOut).fill_(std)
-        self.additiveGrad = additiveGrad
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def parameters(self):
-        return [self.affineWeight, self.affineBias, self.convWeight], [
-            self.gradAffineWeight, self.gradAffineBias, self.gradConvWeight]
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        typed_fn(input.features, 'AffineReluTrivialConvolution_updateOutput')(
-            input.features,
-            self.output.features,
-            self.affineWeight,
-            self.affineBias,
-            self.convWeight)
-        s.forward_pass_multiplyAdd_count += input.features.size(
-            0) * self.nIn * self.nOut
-        s.forward_pass_hidden_states += self.output.features.nelement()
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        typed_fn(input.features, 'AffineReluTrivialConvolution_backward')(
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.affineWeight,
-            self.gradAffineWeight,
-            self.affineBias,
-            self.gradAffineBias,
-            self.convWeight,
-            self.gradConvWeight,
-            self.additiveGrad)
-        return self.gradInput
-
-    def updateGradInput(self, input, gradOutput):
-        assert false  # just call backward
-
-    def accGradParameters(input, gradOutput, scale):
-        assert false  # just call backward
-
-    def __repr__(self):
-        s = 'AffineReluTrivialConvolution ' + \
-            str(self.nIn) + '->' + str(self.nOut)
-        return s
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self._type = t
-        self.affineWeight = self.affineWeight.type(t)
-        self.affineBias = self.affineBias.type(t)
-        self.convWeight = self.convWeight.type(t)
-        self.gradAffineWeight = self.gradAffineWeight.type(t)
-        self.gradAffineBias = self.gradAffineBias.type(t)
-        self.gradConvWeight = self.gradConvWeight.type(t)
-        self.gradInput = self.gradInput.type(t)
-        self.output.type(t)
--- a/PyTorch/sparseconvnet/legacy/averagePooling.py
+++ b/PyTorch/sparseconvnet/legacy/averagePooling.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class AveragePooling(SparseModule):
-    def __init__(self, dimension, pool_size, pool_stride, nFeaturesToDrop=0):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.pool_size = toLongTensor(dimension, pool_size)
-        self.pool_stride = toLongTensor(dimension, pool_stride)
-        self.nFeaturesToDrop = nFeaturesToDrop
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size =\
-            (input.spatial_size - self.pool_size) / self.pool_stride + 1
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'AveragePooling_updateOutput')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.pool_size,
-            self.pool_stride,
-            input.metadata.ffi,
-            input.features,
-            self.output.features,
-            self.nFeaturesToDrop,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        dim_typed_fn(
-            self.dimension, input.features, 'AveragePooling_updateGradInput')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.pool_size,
-            self.pool_stride,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.nFeaturesToDrop,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-
-    def __repr__(self):
-        s = 'AveragePooling'
-        if self.pool_size.max() == self.pool_size.min() and\
-                self.pool_stride.max() == self.pool_stride.min():
-            s = s + str(self.pool_size[0]) + '/' + str(self.pool_stride[0])
-        else:
-            s = s + '(' + str(self.pool_size[0])
-            for i in self.pool_size[1:]:
-                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.pool_stride[0])
-            for i in self.pool_stride[1:]:
-                s = s + ',' + str(i)
-            s = s + ')'
-
-        if self.nFeaturesToDrop > 0:
-            s = s + ' nFeaturesToDrop = ' + self.nFeaturesToDrop
-        return s
-
-    def suggestInputSize(self, out_size):
-        return (out_size - 1) * self.pool_stride + self.pool_size
--- a/PyTorch/sparseconvnet/legacy/batchNormalization.py
+++ b/PyTorch/sparseconvnet/legacy/batchNormalization.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Parameters:
-nPlanes : number of input planes
-eps : small number used to stabilise standard deviation calculation
-momentum : for calculating running average for testing (default 0.9)
-affine : only 'true' is supported at present (default 'true')
-noise : add multiplicative and additive noise during training if >0.
-leakiness : Apply activation def inplace: 0<=leakiness<=1.
-0 for ReLU, values in (0,1) for LeakyReLU, 1 for no activation def.
-"""
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class BatchNormalization(SparseModule):
-    def __init__(
-            self,
-            nPlanes,
-            eps=1e-4,
-            momentum=0.9,
-            affine=True,
-            leakiness=1):
-        SparseModule.__init__(self)
-        self.nPlanes = nPlanes
-        self.eps = eps
-        self.momentum = momentum
-        self.affine = affine
-        self.leakiness = leakiness
-        self.saveMean = torch.Tensor(nPlanes).fill_(0)
-        self.saveInvStd = torch.Tensor(nPlanes).fill_(1)
-        self.runningMean = torch.Tensor(nPlanes).fill_(0)
-        self.runningVar = torch.Tensor(nPlanes).fill_(1)
-        if affine:
-            self.weight = torch.Tensor(nPlanes).fill_(1)
-            self.bias = torch.Tensor(nPlanes).fill_(0)
-            self.gradWeight = torch.Tensor(nPlanes).fill_(0)
-            self.gradBias = torch.Tensor(nPlanes).fill_(0.333)
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        assert input.features.ndimension() == 0 or input.features.size(1) == self.nPlanes
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        typed_fn(input.features, 'BatchNormalization_updateOutput')(
-            input.features,
-            self.output.features,
-            self.saveMean,
-            self.saveInvStd,
-            self.runningMean,
-            self.runningVar,
-            optionalTensor(self, 'weight'),
-            optionalTensor(self, 'bias'),
-            self.eps,
-            self.momentum,
-            self.train,
-            self.leakiness)
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        assert self.train
-        typed_fn(input.features, 'BatchNormalization_backward')(
-            input.features,
-            self.gradInput,
-            self.output.features,
-            gradOutput,
-            self.saveMean,
-            self.saveInvStd,
-            self.runningMean,
-            self.runningVar,
-            optionalTensor(self, 'weight'),
-            optionalTensor(self, 'bias'),
-            optionalTensor(self, 'gradWeight'),
-            optionalTensor(self, 'gradBias'),
-            self.leakiness)
-        return self.gradInput
-
-    def updateGradInput(self, input, gradOutput):
-        assert false  # just call backward
-
-    def accGradParameters(self, input, gradOutput, scale):
-        assert false  # just call backward
-
-    def type(self, t=None, tensorCache=None):
-        self.output.type(t)
-        SparseModule.type(self, t, tensorCache)
-
-    def __repr__(self):
-        s = 'BatchNorm(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
-            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine)
-        if self.leakiness > 0:
-            s = s + ',leakiness=' + str(self.leakiness)
-        s = s + ')'
-        return s
-
-
-class BatchNormReLU(BatchNormalization):
-    def __init__(self, nPlanes, eps=1e-4, momentum=0.9):
-        BatchNormalization.__init__(self, nPlanes, eps, momentum, True, 0)
-
-    def __repr__(self):
-        s = 'BatchNormReLU(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
-            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'
-        return s
-
-
-class BatchNormLeakyReLU(BatchNormalization):
-    def __init__(self, nPlanes, eps=1e-4, momentum=0.9):
-        BatchNormalization.__init__(self, nPlanes, eps, momentum, True, 0.333)
-
-    def __repr__(self):
-        s = 'BatchNormReLU(' + str(self.nPlanes) + ',eps=' + str(self.eps) + \
-            ',momentum=' + str(self.momentum) + ',affine=' + str(self.affine) + ')'
-        return s
-
-
-class BatchNormalizationInTensor(BatchNormalization):
-    def __init__(
-            self,
-            nPlanes,
-            eps=1e-4,
-            momentum=0.9,
-            output_column_offset=0):
-        BatchNormalization.__init__(self, nPlanes, eps, momentum, False, 1)
-        self.output_column_offset = output_column_offset
-
-    def updateOutput(self, input):
-        o = self.output.features.narrow(
-            1, self.output_column_offset, self.nPlanes)
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        typed_fn(input.features, 'BatchNormalizationInTensor_updateOutput')(
-            input.features,
-            o,
-            self.saveMean,
-            self.saveInvStd,
-            self.runningMean,
-            self.runningVar,
-            optionalTensor(self, 'weight'),
-            optionalTensor(self, 'bias'),
-            self.eps,
-            self.momentum,
-            self.train,
-            self.leakiness)
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        assert self.train
-        o = self.output.features.narrow(
-            1, self.output_column_offset, self.nPlanes)
-        d_o = gradOutput.narrow(1, self.output_column_offset, self.nPlanes)
-        typed_fn(input.features, 'BatchNormalization_backward')(
-            input.features,
-            self.gradInput,
-            o,
-            d_o,
-            self.saveMean,
-            self.saveInvStd,
-            self.runningMean,
-            self.runningVar,
-            optionalTensor(self, 'weight'),
-            optionalTensor(self, 'bias'),
-            optionalTensor(self, 'gradWeight'),
-            optionalTensor(self, 'gradBias'),
-            self.leakiness)
-        return self.gradInput
--- a/PyTorch/sparseconvnet/legacy/batchwiseDropout.py
+++ b/PyTorch/sparseconvnet/legacy/batchwiseDropout.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Implementation of batchwise dropout, optionally followed by LeakyReLU
-
-Parameters:
-nPlanes: number of input planes
-p : dropout probability in the range [0,1]
-ip : perform dropout inplace (default true)
-leaky : in the range [0,1]. Set to zero to do ReLU after the dropout. Set to one
-just to do dropout. Set to 1/3 for LeakyReLU after the dropout, etc. (default 1)
-"""
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, typed_fn
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class BatchwiseDropout(SparseModule):
-    def __init__(
-            self,
-            nPlanes,
-            p,
-            ip=True,
-            leaky=1):
-
-        self.inplace = ip
-        self.p = p
-        self.leakiness = leaky
-        self.noise = torch.Tensor(nPlanes)
-        self.nPlanes = nPlanes
-        self.output = None if ip else SparseConvNetTensor(torch.Tensor())
-        self.gradInput = None if ip else torch.Tensor()
-
-    def updateOutput(self, input):
-        if self.train:
-            self.noise.bernoulli_(1 - self.p)
-        else:
-            self.noise.fill_(1 - self.p)
-
-        if self.inplace:
-            self.output = input
-        else:
-            self.output.metadata = input.metadata
-            self.output.spatialSize = input.spatialSize
-
-        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateOutput')(
-            input.features, self.output.features, self.noise, self.leakiness)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        if self.inplace:
-            self.gradInput = gradOutput
-
-        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateGradInput')(
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.noise,
-            self.leakiness
-        )
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        self.noise.type(t)
-
-        if not self.inplace:
-            self.output.features.type(t)
-            self.gradInput.type(t)
-
-        SparseModule.type(self, t, tensorCache)
-
-    def clearState(self):
-        if self.inPlace:
-            self.output = None
-            self.gradOutput = None
-        else:
-            SparseModule.clearState(self)
-
-    def __repr__(self):
-        s = 'BatchwiseDropout(' + str(self.nPlanes) + ',p=' + str(self.p) + \
-            ',ip=' + str(self.inplace)
-        if self.leakiness > 0:
-            s = s + ',leakiness=' + str(self.leakiness)
-        s = s + ')'
-        return s
-
-
-class BatchwiseDropoutInTensor(BatchwiseDropout):
-    def __init__(
-            self,
-            nPlanes,
-            p,
-            output_column_offset=0,
-            leaky=1):
-        BatchwiseDropout.__init__(self, nPlanes, p, False, leaky)
-        self.output_column_offset = output_column_offset
-
-    def updateOutput(self, input):
-        if self.train:
-            self.noise.bernoulli_(1 - self.p)
-        else:
-            self.noise.fill_(1 - self.p)
-
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-
-        o = self.output.features.narrow(
-            1, self.output_column_offset, self.nPlanes)
-
-        typed_fn(
-            input.features,
-            'BatchwiseMultiplicativeDropout_updateOutput')(
-            input.features,
-            o,
-            self.noise,
-            self.leakiness)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        assert self.train
-
-        d_o = gradOutput.narrow(1, self.output_column_offset, self.nPlanes)
-
-        typed_fn(input.features, 'BatchwiseMultiplicativeDropout_updateGradInput')(
-            input.features, self.gradInput, d_o, self.noise, self.leakiness)
-        return self.gradInput
-
-    def __repr__(self):
-        s = 'BatchwiseDropoutInTensor(' + str(self.nPlanes) + ',p=' + str(
-            self.p) + ',column_offset=' + str(self.output_column_offset)
-        if self.leakiness > 0:
-            s = s + ',leakiness=' + str(self.leakiness)
-        s = s + ')'
-        return s
--- a/PyTorch/sparseconvnet/legacy/cAddTable.py
+++ b/PyTorch/sparseconvnet/legacy/cAddTable.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Assume all the inputs have identical SparseGrids and input[i].nActive
-Assume input[0].nPlanes >= input[i].nPlanes for all i=1,#input
-output.submanifoldRules is taken from input[0].submanifoldRules (could do set union?)
-(for resnets, make sure the residual link is input[1])
-"""
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr, set
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class CAddTable(SparseModule):
-    def __init__(self, ip=False):
-        SparseModule.__init__(self)
-        self.inplace = ip
-        if ip:
-            self.output = None
-        else:
-            self.output = SparseConvNetTensor(torch.Tensor())
-
-    def updateOutput(self, input):
-        if self.inplace:
-            self.output = input[0]
-        else:
-            self.output.features.resize_as_(
-                input[0].features).copy_(
-                input[0].features)
-            self.output.metadata = input[0].metadata
-            self.output.spatial_size = input[0].spatial_size
-        for i in input[1:]:
-            self.output.features.narrow(
-                1, 0, i.features.size(1)).add_(
-                i.features)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        self.gradInput = []
-        n = input[0].features.size(1)
-        for i in input:
-            n_ = i.features.size(1)
-            if self.inplace and n_ == n:
-                self.gradInput.append(gradOutput)
-            else:
-                self.gradInput.append(gradOutput.narrow(1, 0, n_).clone())
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        if t and not self.inplace:
-            self.output.type(t)
-
-    def clearState(self):
-        if self.inplace:
-            self.output = None
-        else:
-            set(self.output)
-        self.gradInput = None
--- a/PyTorch/sparseconvnet/legacy/classificationTrainValidate.py
+++ b/PyTorch/sparseconvnet/legacy/classificationTrainValidate.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from torch.legacy import nn, optim
-import sparseconvnet as s
-import time
-import os
-import torch
-import math
-
-
-def updateStats(stats, output, target, loss):
-    batchSize = output.size(0)
-    stats['n'] = stats['n'] + batchSize
-    stats['nll'] = stats['nll'] + loss * batchSize
-    _, predictions = output.float().sort(1, True)
-    correct = predictions.eq(
-        target.long().view(batchSize, 1).expand_as(output))
-    # Top-1 score
-    stats['top1'] += correct.narrow(1, 0, 1).sum()
-    # Top-5 score
-    l = min(5, correct.size(1))
-    stats['top5'] += correct.narrow(1, 0, l).sum()
-
-
-def ClassificationTrainValidate(model, dataset, p):
-    t = model.type()
-    if 'nEpochs' not in p:
-        p['nEpochs'] = 100
-    if 'initial_LR' not in p:
-        p['initial_LR'] = 1e-1
-    if 'LR_decay' not in p:
-        p['LR_decay'] = 4e-2
-    if 'weightDecay' not in p:
-        p['weightDecay'] = 1e-4
-    if 'momentum' not in p:
-        p['momentum'] = 0.9
-    if 'checkPoint' not in p:
-        p['checkPoint'] = False
-    optimState = {
-        'learningRate': p['initial_LR'],
-        'learningRateDecay': 0.0,
-        'momentum': p['momentum'],
-        'nesterov': True,
-        'dampening': 0.0,
-        'weightDecay': p['weightDecay'],
-        'epoch': 1
-    }
-    if p['checkPoint'] and os.path.isfile('epoch.pth'):
-        optimState['epoch'] = torch.load('epoch.pth') + 1
-        print('Restarting at epoch ' +
-              str(optimState['epoch']) +
-              ' from model.pth ..')
-        model = torch.load('model.pth')
-
-    print(p)
-    criterion = nn.CrossEntropyCriterion()
-    criterion.type(model.type())
-    params, gradParams = model.flattenParameters()
-    print('#parameters', params.nelement())
-    for epoch in range(optimState['epoch'], p['nEpochs'] + 1):
-        model.training()
-        stats = {'top1': 0, 'top5': 0, 'n': 0, 'nll': 0}
-        optimState['learningRate'] = p['initial_LR'] * \
-            math.exp((1 - epoch) * p['LR_decay'])
-        start = time.time()
-        for batch in dataset['train']():
-            batch['input'].type(t)
-            batch['target'] = batch['target'].type(t)
-            model.forward(batch['input'])
-            criterion.forward(model.output, batch['target'])
-            updateStats(stats, model.output, batch['target'], criterion.output)
-            gradParams.zero_()  # model:zeroGradParameters()
-            criterion.backward(model.output, batch['target'])
-            model.backward(batch['input'], criterion.gradInput)
-
-            def feval(x):
-                return criterion.output, gradParams
-            optim.sgd(feval, params, optimState)
-        print(epoch, 'train: top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %
-              (100 *
-               (1 -
-                1.0 * stats['top1'] /
-                   stats['n']), 100 *
-                  (1 -
-                   1.0 * stats['top5'] /
-                   stats['n']), stats['nll'] /
-                  stats['n'], time.time() -
-                  start))
-
-        if p['checkPoint']:
-            model.modules[0].clearState()
-            torch.save(model, 'model.pth')
-            torch.save(epoch, 'epoch.pth')
-
-        model.evaluate()
-        s.forward_pass_multiplyAdd_count = 0
-        s.forward_pass_hidden_states = 0
-        stats = {'top1': 0, 'top5': 0, 'n': 0, 'nll': 0}
-        start = time.time()
-        for batch in dataset['val']():
-            batch['input'].type(t)
-            batch['target'] = batch['target'].type(t)
-            model.forward(batch['input'])
-            criterion.forward(model.output, batch['target'])
-            updateStats(stats, model.output, batch['target'], criterion.output)
-        print(epoch, 'test:  top1=%.2f%% top5=%.2f%% nll:%.2f time:%.1fs' %
-              (100 *
-               (1 -
-                1.0 * stats['top1'] /
-                   stats['n']), 100 *
-                  (1 -
-                   1.0 * stats['top5'] /
-                   stats['n']), stats['nll'] /
-                  stats['n'], time.time() -
-                  start))
-        print(
-            '%.3e MultiplyAdds/sample %.3e HiddenStates/sample' %
-            (s.forward_pass_multiplyAdd_count /
-             stats['n'],
-                s.forward_pass_hidden_states /
-                stats['n']))
--- a/PyTorch/sparseconvnet/legacy/concatTable.py
+++ b/PyTorch/sparseconvnet/legacy/concatTable.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet
-from torch.legacy.nn import ConcatTable as C
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr, set
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class ConcatTable(C):
-    def __init__(self):
-        C.__init__(self)
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        self.output = []
-        for m in self.modules:
-            self.output.append(m.forward(input))
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        self.gradInput.resize_as_(input.features).zero_()
-        for m, g in zip(self.modules, gradOutput):
-            self.gradInput.add_(m.backward(input, g, scale))
-        return self.gradInput
-
-    def clearState(self):
-        self.output = None
-        set(self.gradInput)
-        for m in self.modules:
-            m.clearState()
-
-    def suggestInputSize(self, nOut):
-        return self.modules[0].suggestInputSize(nOut)
--- a/PyTorch/sparseconvnet/legacy/convolution.py
+++ b/PyTorch/sparseconvnet/legacy/convolution.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-from . import SparseModule
-import sparseconvnet as s
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class Convolution(SparseModule):
-    def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.nIn = nIn
-        self.nOut = nOut
-        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_volume = self.filter_size.prod().item()
-        self.filter_stride = toLongTensor(dimension, filter_stride)
-        std = (2.0 / nIn / self.filter_volume)**0.5
-        self.weight = torch.Tensor(
-            nIn *
-            self.filter_volume,
-            nOut).normal_(
-            0,
-            std)
-        self.gradWeight = torch.Tensor(
-            nIn * self.filter_volume, nOut).fill_(std)
-        if bias:
-            self.bias = torch.Tensor(nOut).zero_()
-            self.gradBias = torch.Tensor(nOut).zero_()
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
-        self.output.metadata = input.metadata
-        self.output.spatial_size =\
-            (input.spatial_size - self.filter_size) / self.filter_stride + 1
-        s.forward_pass_multiplyAdd_count +=\
-            dim_typed_fn(
-                self.dimension, input.features, 'Convolution_updateOutput')(
-                input.spatial_size,
-                self.output.spatial_size,
-                self.filter_size,
-                self.filter_stride,
-                input.metadata.ffi,
-                input.features,
-                self.output.features,
-                self.weight,
-                optionalTensor(self, 'bias'),
-                self.filter_volume,
-                torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        s.forward_pass_hidden_states += self.output.features.nelement()
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        dim_typed_fn(
-            self.dimension, input.features, 'Convolution_backward')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.filter_size,
-            self.filter_stride,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.weight,
-            self.gradWeight,
-            optionalTensor(self, 'gradBias'),
-            self.filter_volume,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self._type = t
-        self.weight = self.weight.type(t)
-        self.gradWeight = self.gradWeight.type(t)
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-        if hasattr(self, 'bias'):
-            self.bias = self.bias.type(t)
-            self.gradBias = self.gradBias.type(t)
-
-    def __repr__(self):
-        s = 'Convolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
-        if self.filter_size.max() == self.filter_size.min() and\
-                self.filter_stride.max() == self.filter_stride.min():
-            s = s + str(self.filter_size[0].item()) + '/' + str(self.filter_stride[0].item())
-        else:
-            s = s + '(' + str(self.filter_size[0].item())
-            for i in self.filter_size[1:]:
-                s = s + ',' + str(i.item())
-            s = s + ')/(' + str(self.filter_stride[0].item())
-            for i in self.filter_stride[1:]:
-                s = s + ',' + str(i.item())
-            s = s + ')'
-        return s
-
-    def suggestInputSize(self, out_size):
-        return (out_size - 1) * self.filter_stride + self.filter_size
--- a/PyTorch/sparseconvnet/legacy/deconvolution.py
+++ b/PyTorch/sparseconvnet/legacy/deconvolution.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-from torch.legacy.nn import Module
-import sparseconvnet as s
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class Deconvolution(SparseModule):
-    def __init__(self, dimension, nIn, nOut,
-                 filter_size, filter_stride, bias):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.nIn = nIn
-        self.nOut = nOut
-        self.filter_size = toLongTensor(dimension, filter_size)
-        self.filter_stride = toLongTensor(dimension, filter_stride)
-        self.filter_volume = self.filter_size.prod().item()
-        std = (2.0 / nIn / self.filter_volume)**0.5
-        self.weight = torch.Tensor(
-            nIn * self.filter_volume, nOut
-        ).normal_(0, std)
-        self.gradWeight = torch.Tensor(
-            nIn * self.filter_volume, nOut).fill_(std)
-        if bias:
-            self.bias = torch.Tensor(nOut).zero_()
-            self.gradBias = torch.Tensor(nOut).zero_()
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        assert input.features.ndimension() == 0 or input.features.size(1) == self.nIn
-        self.output.metadata = input.metadata
-        self.output.spatial_size =\
-            (input.spatial_size - 1) * self.filter_stride + self.filter_size
-        s.forward_pass_multiplyAdd_count +=\
-            dim_typed_fn(
-                self.dimension, input.features, 'Deconvolution_updateOutput')(
-                input.spatial_size,
-                self.output.spatial_size,
-                self.filter_size,
-                self.filter_stride,
-                input.metadata.ffi,
-                input.features,
-                self.output.features,
-                self.weight,
-                optionalTensor(self, 'bias'),
-                self.filter_volume,
-                torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        s.forward_pass_hidden_states += self.output.features.nelement()
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        dim_typed_fn(
-            self.dimension, input.features, 'Deconvolution_backward')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.filter_size,
-            self.filter_stride,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.weight,
-            self.gradWeight,
-            optionalTensor(self, 'gradBias'),
-            self.filter_volume,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self._type = t
-        self.weight = self.weight.type(t)
-        self.gradWeight = self.gradWeight.type(t)
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-        if hasattr(self, 'bias'):
-            self.bias = self.bias.type(t)
-            self.gradBias = self.gradBias.type(t)
-
-    def __repr__(self):
-        s = 'Deconvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
-        if self.filter_size.max() == self.filter_size.min() and\
-                self.filter_stride.max() == self.filter_stride.min():
-            s = s + str(self.filter_size[0].item()) + '/' + str(self.filter_stride[0].item())
-        else:
-            s = s + '(' + str(self.filter_size[0].item())
-            for i in self.filter_size[1:]:
-                s = s + ',' + str(i.item())
-            s = s + ')/(' + str(self.filter_stride[0].item())
-            for i in self.filter_stride[1:]:
-                s = s + ',' + str(i.item())
-            s = s + ')'
-        return s
-
-    def suggestInputSize(self, out_size):
-        return (out_size - self.filter_size) / self.filter_stride + 1
--- a/PyTorch/sparseconvnet/legacy/denseNetBlock.py
+++ b/PyTorch/sparseconvnet/legacy/denseNetBlock.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet as s
-from torch.legacy.nn import Container
-from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr, set
-from ..sparseConvNetTensor import SparseConvNetTensor
-from .batchNormalization import *
-from .affineReLUTrivialConvolution import AffineReLUTrivialConvolution
-from .submanifoldConvolution import SubmanifoldConvolution
-import math
-
-
-class DenseNetBlock(Container):
-    def __init__(self, dimension, nInputPlanes, nExtraLayers=2, growthRate=16):
-        Container.__init__(self)
-        self.dimension = dimension
-        self.nInputPlanes = nInputPlanes
-        self.nExtraLayers = nExtraLayers
-        self.growthRate = growthRate
-        assert(self.nExtraLayers >= 1)
-        self.nOutputPlanes = nInputPlanes + nExtraLayers * growthRate
-        self.output = SparseConvNetTensor(torch.Tensor())
-
-        # Module 1: Batchnorm the input into the start of self.output
-        self.add(
-            BatchNormalizationInTensor(
-                nInputPlanes,
-                output_column_offset=0))
-        self.modules[0].output = self.output
-        self.gradInput = self.modules[0].gradInput
-
-        for i in range(nExtraLayers):
-            nFeatures = self.nInputPlanes + i * growthRate
-            nFeaturesB = 4 * growthRate
-            # Modules 4*i+1
-            self.add(AffineReLUTrivialConvolution(nFeatures, nFeaturesB, True))
-            # Module 4*i+2
-            self.add(BatchNormalization(nFeaturesB))
-            # Module 4*i+3
-            self.add(
-                SubmanifoldConvolution(
-                    dimension,
-                    nFeaturesB,
-                    growthRate,
-                    3,
-                    False))
-            # Module 4*i+4
-            self.add(
-                BatchNormalizationInTensor(
-                    growthRate,
-                    output_column_offset=self.nInputPlanes +
-                    i *
-                    growthRate))
-            self.modules[4 * i + 4].output = self.output
-
-    def updateOutput(self, input):
-        assert input.features.size(1) == self.nInputPlanes
-        self.output.spatial_size = input.spatial_size
-        self.output.metadata = input.metadata
-        self.output.features.resize_(
-            input.features.size(0), self.nOutputPlanes)
-        i = input
-        for m in self.modules:
-            i = m.updateOutput(i)
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        assert scale == 1
-        g = gradOutput
-        for i in range(self.nExtraLayers):
-            self.modules[4 * i + 1].gradInput = gradOutput
-        for m, m_ in zip(self.modules[:0:-1],
-                         self.modules[len(self.modules) - 2::-1]):
-            g = m.backward(m_.output, g)
-        self.modules[0].backward(input, g)
-        return self.gradInput
-
-    def type(self, type, tensorCache=None):
-        self._type = type
-        self.output.features = self.output.features.type(type)
-        for x in self.modules:
-            x.type(type)
-        self.gradInput = self.modules[0].gradInput
-
-    def __repr__(self):
-        s = 'DenseNetBlock(' + str(self.nInputPlanes) + '->' + str(self.nInputPlanes) + '+' + str(
-            self.nExtraLayers) + '*' + str(self.growthRate) + '=' + str(self.nOutputPlanes) + ')'
-        return s
-
-    def clearState(self):
-        for _, m in ipairs(self.modules):
-            m.clearState()
-        set(self.output)
-        set(self.gradInput)
-
-    def suggestInputSize(self, out_size):
-        return out_size
--- a/PyTorch/sparseconvnet/legacy/denseToSparse.py
+++ b/PyTorch/sparseconvnet/legacy/denseToSparse.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Function to convert a Dense Input into a sparse input.
-If possible, avoid using this module; build the hidden layer using InputBatch.
-
-Parameters:
-dimension : of the input field
-"""
-
-import torch
-from . import SparseModule
-from ..utils import dim_fn, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-from ..metadata import Metadata
-
-
-class DenseToSparse(SparseModule):
-    def __init__(self, dimension):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.output = SparseConvNetTensor(torch.Tensor(), Metadata(dimension))
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        a = input
-        aa = a.permute(
-            *([0, ] + list(range(2, 2 + self.dimension)) + [1, ])).clone()
-        self.aas = aa.size()
-        nz = aa.abs().sum(self.dimension + 1).view(aa.size()[0:-1])
-        s = torch.LongTensor(nz.stride()).view(1, self.dimension + 1)
-        nz = nz.nonzero()
-        s = s.type_as(nz)
-        aa = aa.view(-1, a.size(1))
-        self.aas2 = aa.size()
-        self.r = (nz * s.expand_as(nz)).sum(1).view(-1)
-        self.output.features = aa.index_select(0, self.r)
-        self.output.spatial_size = torch.LongTensor(list(input.size()[2:]))
-        dim_fn(self.dimension, 'createMetadataForDenseToSparse')(
-            self.output.metadata.ffi,
-            self.output.spatial_size,
-            nz.cpu(),
-            input.size(0))
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        self.gradInput.resize_(self.aas2).zero_()
-        self.gradInput.index_copy_(0, self.r, gradOutput)
-        self.gradInput = self.gradInput.view(self.aas).permute(
-            *([0, self.dimension + 1] + list(range(1, self.dimension + 1))))
-        return self.gradInput
-
-    def clearState(self):
-        SparseModule.clearState(self)
-        self.aas = None
-        self.r = None
-
-    def __repr__(self):
-        return 'DenseToSparse(' + str(self.dimension) + ')'
--- a/PyTorch/sparseconvnet/legacy/identity.py
+++ b/PyTorch/sparseconvnet/legacy/identity.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-from torch.legacy.nn import Identity as I
-from .sparseModule import SparseModule
-
-
-class Identity(SparseModule):
-    def forward(self, input):
-        self.output = input
-        return self.output
-
-    def backward(self, input, gradOutput, scale=1):
-        self.gradInput = gradOutput
-        return self.gradInput
-
-    def clearState(self):
-        self.output = None
-        self.gradInput = None
-
-    def suggestInputSize(self, out_size):
-        return out_size
--- a/PyTorch/sparseconvnet/legacy/joinTable.py
+++ b/PyTorch/sparseconvnet/legacy/joinTable.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr, set
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class JoinTable(SparseModule):
-    def __init__(self, nPlanes):
-        SparseModule.__init__(self)
-        self.nPlanes = nPlanes
-        self.gradInput = [torch.Tensor() for p in nPlanes]
-        self.nOutputPlanes = sum(nPlanes)
-        self.output = SparseConvNetTensor(torch.Tensor())
-
-    def updateOutput(self, input):
-        self.output.features.resize_(
-            input[0].features.size(0),
-            self.nOutputPlanes)
-        self.output.metadata = input[0].metadata
-        self.output.spatial_size = input[0].spatial_size
-        offset = 0
-        for i, n in zip(input, self.nPlanes):
-            self.output.features.narrow(1, offset, n).copy_(i.features)
-            offset += n
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        offset = 0
-        a = input[0].features.size(0)
-        for g, n in zip(self.gradInput, self.nPlanes):
-            g.resize_(a, n).copy_(gradOutput.narrow(1, offset, n))
-            offset += n
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        if t:
-            self.output.type(t)
-            self.gradInput = [g.type(t) for g in self.gradInput]
-
-    def clearState(self):
-        set(self.output)
-        for g in self.gradInput:
-            set(g)
-
-    def __repr__(self):
-        s = 'JoinTable: ' + str(self.nPlanes[0])
-        for n in self.nPlanes[1:]:
-            s = s + ' + ' + str(n)
-        s = s + ' -> ' + str(self.nOutputPlanes)
-        return s
--- a/PyTorch/sparseconvnet/legacy/leakyReLU.py
+++ b/PyTorch/sparseconvnet/legacy/leakyReLU.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class LeakyReLU(SparseModule):
-    def __init__(self, leakage=0.333, ip=True):
-        SparseModule.__init__(self)
-        self.inplace = ip
-        self.leakage = leakage
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = None if ip else torch.Tensor()
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        typed_fn(input.features, 'LeakyReLU_updateOutput')(
-            input.features,
-            self.output.features,
-            self.leakage)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        if self.inplace:
-            self.gradInput = gradOutput
-        typed_fn(input.features, 'LeakyReLU_updateGradInput')(
-            input.features,
-            self.gradInput,
-            gradOutput,
-            self.leakage)
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        if t:
-            self.output.type(t)
-            self.gradInput = self.gradInput.type(t)
--- a/PyTorch/sparseconvnet/legacy/maxPooling.py
+++ b/PyTorch/sparseconvnet/legacy/maxPooling.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import sparseconvnet
-from . import SparseModule
-from ..utils import toLongTensor, dim_typed_fn, optionalTensor, nullptr
-from ..sparseConvNetTensor import SparseConvNetTensor
-
-
-class MaxPooling(SparseModule):
-    def __init__(self, dimension, pool_size, pool_stride, nFeaturesToDrop=0):
-        SparseModule.__init__(self)
-        self.dimension = dimension
-        self.pool_size = toLongTensor(dimension, pool_size)
-        self.pool_stride = toLongTensor(dimension, pool_stride)
-        self.pool_volume = self.pool_size.prod()
-        self.nFeaturesToDrop = nFeaturesToDrop or 0
-        self.output = SparseConvNetTensor(torch.Tensor())
-        self.gradInput = torch.Tensor()
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size =\
-            (input.spatial_size - self.pool_size) / self.pool_stride + 1
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'MaxPooling_updateOutput')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.pool_size,
-            self.pool_stride,
-            input.metadata.ffi,
-            input.features,
-            self.output.features,
-            self.nFeaturesToDrop,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        dim_typed_fn(
-            self.dimension,
-            input.features,
-            'MaxPooling_updateGradInput')(
-            input.spatial_size,
-            self.output.spatial_size,
-            self.pool_size,
-            self.pool_stride,
-            input.metadata.ffi,
-            input.features,
-            self.gradInput,
-            self.output.features,
-            gradOutput,
-            self.nFeaturesToDrop,
-            torch.cuda.IntTensor() if input.features.is_cuda else nullptr)
-        return self.gradInput
-
-    def type(self, t=None, tensorCache=None):
-        if t is None:
-            return self._type
-        self.output.type(t)
-        self.gradInput = self.gradInput.type(t)
-
-    def __repr__(self):
-        s = 'MaxPooling'
-        if self.pool_size.max() == self.pool_size.min() and\
-                self.pool_stride.max() == self.pool_stride.min():
-            s = s + str(self.pool_size[0]) + '/' + str(self.pool_stride[0])
-        else:
-            s = s + '(' + str(self.pool_size[0])
-            for i in self.pool_size[1:]:
-                s = s + ',' + str(i)
-            s = s + ')/(' + str(self.pool_stride[0])
-            for i in self.pool_stride[1:]:
-                s = s + ',' + str(i)
-            s = s + ')'
-
-        if self.nFeaturesToDrop > 0:
-            s = s + ' nFeaturesToDrop = ' + self.nFeaturesToDrop
-        return s
-
-    def suggestInputSize(self, out_size):
-        return (out_size - 1) * self.pool_stride + self.pool_size
--- a/PyTorch/sparseconvnet/legacy/misc.py
+++ b/PyTorch/sparseconvnet/legacy/misc.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch.legacy.nn as nn
-from .sequential import Sequential
-from .sparseModule import SparseModule
-from ..sparseConvNetTensor import SparseConvNetTensor
-from .batchNormalization import BatchNormalization
-
-
-class Tanh(SparseModule):
-    def __init__(self):
-        SparseModule.__init__(self)
-        self.module = nn.Tanh()
-        self.output = SparseConvNetTensor()
-        self.output.features = self.module.output
-        self.gradInput = self.module.gradInput
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        self.module.forward(input.features)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        self.module.updateGradInput(input.features, gradOutput)
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        if t:
-            self.module.type(t, tensorCache)
-            self.output.features = self.module.output
-            self.gradInput = self.module.gradInput
-
-
-class ELU(SparseModule):
-    def __init__(self):
-        SparseModule.__init__(self)
-        self.module = nn.ELU()
-        self.output = SparseConvNetTensor()
-        self.gradInput = self.module.gradInput
-
-    def updateOutput(self, input):
-        self.output.metadata = input.metadata
-        self.output.spatial_size = input.spatial_size
-        self.module.forward(input.features)
-        return self.output
-
-    def updateGradInput(self, input, gradOutput):
-        self.module.updateGradInput(input.features, gradOutput)
-        return self.gradInput
-
-    def type(self, t, tensorCache=None):
-        if t:
-            self.module.type(t, tensorCache)
-            self.output.features = self.module.output
-            self.gradInput = self.module.gradInput
-
-
-def BatchNormELU(nPlanes, eps=1e-4, momentum=0.9):
-    return Sequential().add(
-        BatchNormalization(
-            nPlanes,
-            eps,
-            momentum)).add(
-        ELU())