Factor out CUDA code

de3743f6 · Benjamin Thomas Graham · f0407b36 · de3743f6 · de3743f6 · f0407b36
Commit de3743f6 authored Jul 13, 2018 by Benjamin Thomas Graham
20 changed files
--- a/sparseconvnet/SCN/CUDA/UnPooling.cpp
+++ b/sparseconvnet/SCN/CUDA/UnPooling.cpp
+// Copyright 2016-present, Facebook, Inc.
+// All rights reserved.
+//
+// This source code is licensed under the license found in the
+// LICENSE file in the root directory of this source tree.
+
+template <typename T>
+void cuda_UnPooling_ForwardPass(T *input_features, T *output_features,
+                                Int nPlanes, Int input_stride,
+                                Int output_stride, RuleBook _rules);
+template <typename T>
+void cuda_UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
+                                 Int nPlanes, Int input_stride,
+                                 Int output_stride, RuleBook _rules);
+
+template <typename T, Int Dimension>
+void cuda_UnPooling_updateOutput(
+    /*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
+    /*long*/ at::Tensor poolSize,
+    /*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
+    /*cuda float*/ at::Tensor input_features,
+    /*cuda float*/ at::Tensor output_features, long nFeaturesToDrop) {
+
+  Int nPlanes = input_features.size(1) - nFeaturesToDrop;
+  auto _rules =
+      m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
+  Int nActive = m.getNActive(outputSize);
+  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
+  output_features.zero_();
+
+  auto iF = input_features.data<T>() + nFeaturesToDrop;
+  auto oF = output_features.data<T>();
+
+  cuda_UnPooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1),
+                                output_features.size(1), _rules);
+}
+
+template <typename T, Int Dimension>
+void cuda_UnPooling_updateGradInput(
+    /*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
+    /*long*/ at::Tensor poolSize,
+    /*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
+    /*cuda float*/ at::Tensor input_features,
+    /*cuda float*/ at::Tensor d_input_features,
+    /*cuda float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
+
+  Int nPlanes = input_features.size(1) - nFeaturesToDrop;
+  auto _rules =
+      m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
+  d_input_features.resize_as_(input_features);
+  d_input_features.zero_();
+
+  auto diF = d_input_features.data<T>() + nFeaturesToDrop;
+  auto doF = d_output_features.data<T>();
+
+  cuda_UnPooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1),
+                                 d_output_features.size(1), _rules);
+}
--- a/sparseconvnet/SCN/CUDA/UnPooling.cu
+++ b/sparseconvnet/SCN/CUDA/UnPooling.cu
@@ -5,50 +5,67 @@
 // LICENSE file in the root directory of this source tree.

 #include "RuleBookIterator.h"
-#include "UnPooling.h"

-template <typename T, Int Dimension>
-void cuda_UnPooling_updateOutput(
-    /*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
-    /*long*/ at::Tensor poolSize,
-    /*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
-    /*cuda float*/ at::Tensor input_features,
-    /*cuda float*/ at::Tensor output_features, long nFeaturesToDrop) {
-
-  Int nPlanes = input_features.size(1) - nFeaturesToDrop;
-  auto _rules =
-      m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
-  Int nActive = m.getNActive(outputSize);
-  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
-  output_features.zero_();
+// NTX must be >=2 so r is filled properly
+template <typename T, Int NTX, Int NTY>
+__global__ void UnPooling_fp(T *input_features, T *output_features, Int nPlanes,
+                             Int input_stride, Int output_stride, Int *rules,
+                             Int nHot) {
+  __shared__ Int r[NTY * 2];
+  for (Int n = blockIdx.x * NTY; n < nHot; n += gridDim.x * NTY) {
+    {
+      Int i = threadIdx.x + NTX * threadIdx.y;
+      if (i < NTY * 2 and i < 2 * (nHot - n))
+        r[i] = rules[2 * n + i];
+    }
+    __syncthreads();
+    if (n + threadIdx.y < nHot) {
+      Int i = r[2 * threadIdx.y + 1] * input_stride;
+      Int o = r[2 * threadIdx.y] * output_stride;
+      for (Int plane = threadIdx.x; plane < nPlanes; plane += NTX)
+        output_features[o + plane] += input_features[i + plane];
+    }
+    __syncthreads();
+  }
+}

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
-  RULEBOOKITERATOR(
-      cuda_UnPooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1),
-                                    output_features.size(1), rbB, nHotB);
+template <typename T>
+void cuda_UnPooling_ForwardPass(T *input_features, T *output_features,
+                                Int nPlanes, Int input_stride,
+                                Int output_stride, RuleBook _rules) {
+  RULEBOOKITERATOR((UnPooling_fp<T, 32, 32><<<32, dim3(32, 32)>>>(
+      input_features, output_features, nPlanes, input_stride, output_stride,
+      rbB, nHotB));
                   , )
 }
+template <typename T, Int NTX, Int NTY>
+__global__ void UnPooling_bp(T *d_input_features, T *d_output_features,
+                             Int nPlanes, Int input_stride, Int output_stride,
+                             Int *rules, Int nHot) {
+  __shared__ Int r[NTY * 2];
+  for (Int n = blockIdx.x * NTY; n < nHot; n += gridDim.x * NTY) {
+    {
+      Int i = threadIdx.x + NTX * threadIdx.y;
+      if (i < NTY * 2 and i < 2 * (nHot - n))
+        r[i] = rules[2 * n + i];
+    }
+    __syncthreads();
+    if (n + threadIdx.y < nHot) {
+      Int i = r[2 * threadIdx.y + 1] * input_stride;
+      Int o = r[2 * threadIdx.y] * output_stride;
+      for (Int plane = threadIdx.x; plane < nPlanes; plane += NTX)
+        d_input_features[i + plane] += d_output_features[o + plane];
+    }
+    __syncthreads();
+  }
+}

-template <typename T, Int Dimension>
-void cuda_UnPooling_updateGradInput(
-    /*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
-    /*long*/ at::Tensor poolSize,
-    /*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
-    /*cuda float*/ at::Tensor input_features,
-    /*cuda float*/ at::Tensor d_input_features,
-    /*cuda float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
-
-  Int nPlanes = input_features.size(1) - nFeaturesToDrop;
-  auto _rules =
-      m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
-  d_input_features.resize_as_(input_features);
-  d_input_features.zero_();
-
-  auto diF = d_input_features.data<T>() + nFeaturesToDrop;
-  auto doF = d_output_features.data<T>();
-  RULEBOOKITERATOR(
-      cuda_UnPooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1),
-                                     d_output_features.size(1), rbB, nHotB);
+template <typename T>
+void cuda_UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
+                                 Int nPlanes, Int input_stride,
+                                 Int output_stride, RuleBook _rules) {
+  RULEBOOKITERATOR((UnPooling_bp<T, 32, 32><<<32, dim3(32, 32)>>>(
+      d_input_features, d_output_features, nPlanes, input_stride, output_stride,
+      rbB, nHotB));
                   , )
 }
--- a/sparseconvnet/SCN/CUDA/UnPooling.h
+++ b/sparseconvnet/SCN/CUDA/UnPooling.h
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#ifndef CUDA_UNPOOLING_H
-#define CUDA_UNPOOLING_H
-
-// NTX must be >=2 so r is filled properly
-template <typename T, Int NTX, Int NTY>
-__global__ void UnPooling_fp(T *input_features, T *output_features,
-                             Int nPlanes, Int input_stride,
-                             Int output_stride, Int *rules, Int nHot) {
-  __shared__ Int r[NTY * 2];
-  for (Int n = blockIdx.x * NTY; n < nHot; n += gridDim.x * NTY) {
-    {
-      Int i = threadIdx.x + NTX * threadIdx.y;
-      if (i < NTY * 2 and i < 2 * (nHot - n))
-        r[i] = rules[2 * n + i];
-    }
-    __syncthreads();
-    if (n + threadIdx.y < nHot) {
-      Int i = r[2 * threadIdx.y + 1] * input_stride;
-      Int o = r[2 * threadIdx.y] * output_stride;
-      for (Int plane = threadIdx.x; plane < nPlanes; plane += NTX)
-        output_features[o + plane] += input_features[i + plane];
-    }
-    __syncthreads();
-  }
-}
-
-template <typename T>
-void cuda_UnPooling_ForwardPass(T *input_features, T *output_features,
-                                Int nPlanes, Int input_stride,
-                                Int output_stride, Int *rules, Int nHot) {
-  UnPooling_fp<T, 32, 32><<<32, dim3(32, 32)>>>(input_features, output_features,
-                                                nPlanes, input_stride,
-                                                output_stride, rules, nHot);
-}
-template <typename T, Int NTX, Int NTY>
-__global__ void UnPooling_bp(T *d_input_features, T *d_output_features,
-                             Int nPlanes, Int input_stride,
-                             Int output_stride, Int *rules, Int nHot) {
-  __shared__ Int r[NTY * 2];
-  for (Int n = blockIdx.x * NTY; n < nHot; n += gridDim.x * NTY) {
-    {
-      Int i = threadIdx.x + NTX * threadIdx.y;
-      if (i < NTY * 2 and i < 2 * (nHot - n))
-        r[i] = rules[2 * n + i];
-    }
-    __syncthreads();
-    if (n + threadIdx.y < nHot) {
-      Int i = r[2 * threadIdx.y + 1] * input_stride;
-      Int o = r[2 * threadIdx.y] * output_stride;
-      for (Int plane = threadIdx.x; plane < nPlanes; plane += NTX)
-        d_input_features[i + plane] += d_output_features[o + plane];
-    }
-    __syncthreads();
-  }
-}
-
-template <typename T>
-void cuda_UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
-                                 Int nPlanes, Int input_stride,
-                                 Int output_stride, Int *rules, Int nHot) {
-  UnPooling_bp<T, 32, 32><<<32, dim3(32, 32)>>>(
-      d_input_features, d_output_features, nPlanes, input_stride, output_stride,
-      rules, nHot);
-}
-#endif /* CUDA_UNPOOLING_H */
--- a/sparseconvnet/SCN/Metadata/Metadata.cpp
+++ b/sparseconvnet/SCN/Metadata/Metadata.cpp
@@ -55,7 +55,7 @@ template <Int dimension> void Metadata<dimension>::clear() {
  inputLayerRuleBook.clear();
  validRuleBooks.clear();
  ruleBooks.clear();
-  fullConvolutionRuleBooks.clear();
+  fullConvolutionRuleBook.clear();
  sparseToDenseRuleBooks.clear();
  inputSGs = nullptr;
  inputSG = nullptr;
@@ -238,6 +238,53 @@ void Metadata<dimension>::sparsifyMetadata(Metadata<dimension> &mOut,
  }
 }

+template <Int dimension>
+void Metadata<dimension>::appendMetadata(Metadata<dimension> &mAdd,
+                                         /*long*/ at::Tensor spatialSize) {
+  auto p = LongTensorToPoint<dimension>(spatialSize);
+  auto &sgs1 = grids[p];
+  auto &sgs2 = mAdd.grids[p];
+  auto &nActive1 = nActive[p];
+  auto &nActive2 = mAdd.nActive[p];
+  Int bs1 = sgs1.size();
+  Int bs2 = sgs2.size();
+  sgs1.insert(sgs1.end(), sgs2.begin(), sgs2.end());
+  for (Int i = bs1; i < bs1 + bs2; ++i)
+    sgs1[i].ctr += nActive1;
+  nActive1 += nActive2;
+}
+
+template <Int dimension>
+at::Tensor
+Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mReference,
+                                     Metadata<dimension> &mSparsified,
+                                     /*long*/ at::Tensor spatialSize) {
+  auto p = LongTensorToPoint<dimension>(spatialSize);
+  at::Tensor delta = torch::CPU(at::kFloat).zeros(nActive[p]);
+  float *deltaPtr = delta.data<float>();
+  auto &sgsReference = mReference.grids[p];
+  auto &sgsFull = grids[p];
+  auto &sgsSparsified = mSparsified.grids[p];
+  Int batchSize = sgsFull.size();
+  Int sample;
+
+#pragma omp parallel for private(sample)
+  for (sample = 0; sample < (Int)batchSize; ++sample) {
+    auto &sgReference = sgsReference[sample];
+    auto &sgFull = sgsFull[sample];
+    auto &sgSparsified = sgsSparsified[sample];
+    for (auto const &iter : sgFull.mp) {
+      bool gt = sgReference.mp.find(iter.first) != sgReference.mp.end();
+      bool hot = sgSparsified.mp.find(iter.first) != sgSparsified.mp.end();
+      if (gt and not hot)
+        deltaPtr[iter.second + sgFull.ctr] = -1;
+      if (hot and not gt)
+        deltaPtr[iter.second + sgFull.ctr] = +1;
+    }
+  }
+  return delta;
+}
+
 // tensor is size[0] x .. x size[dimension-1] x size[dimension]
 // size[0] x .. x size[dimension-1] == spatial volume
 // size[dimension] == #feature planes
@@ -383,10 +430,9 @@ void Metadata<dimension>::blLayer(/*long*/ at::Tensor spatialSize,
                     coords.size(0), coords.size(1), mode, *inputNActive);
 }
 template <Int dimension>
-RuleBook &
-Metadata<dimension>::getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
-                                            /*long*/ at::Tensor size,
-                                            bool openMP) {
+RuleBook &Metadata<dimension>::getSubmanifoldRuleBook(
+    /*long*/ at::Tensor spatialSize,
+    /*long*/ at::Tensor size, bool openMP) {
  auto p = TwoLongTensorsToPoint<dimension>(spatialSize, size);
  auto &rb = validRuleBooks[p];
  if (rb.empty()) {
@@ -399,8 +445,8 @@ Metadata<dimension>::getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
  return rb;
 }
 template <Int dimension>
-RuleBook &
-Metadata<dimension>::getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize) {
+RuleBook &Metadata<dimension>::getActivePoolingRuleBook(
+    /*long*/ at::Tensor spatialSize) {
  auto spatialSz = LongTensorToPoint<dimension>(spatialSize);
  auto &SGs = grids[spatialSz];
  auto &rb = activePoolingRuleBooks[spatialSz];
@@ -409,9 +455,8 @@ Metadata<dimension>::getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize) {
  return rb;
 }
 template <Int dimension>
-RuleBook &
-Metadata<dimension>::getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
-                                              bool openMP) {
+RuleBook &Metadata<dimension>::getSparseToDenseRuleBook(
+    /*long*/ at::Tensor spatialSize, bool openMP) {
  auto ss = LongTensorToPoint<dimension>(spatialSize);
  auto &SGs = grids[ss];
  auto &rb = sparseToDenseRuleBooks[ss];
@@ -426,8 +471,8 @@ Metadata<dimension>::getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
  return rb;
 }
 template <Int dimension>
-RuleBook &
-Metadata<dimension>::getRuleBook(/*long*/ at::Tensor inputSpatialSize,
+RuleBook &Metadata<dimension>::getRuleBook(
+    /*long*/ at::Tensor inputSpatialSize,
    /*long*/ at::Tensor outputSpatialSize,
    /*long*/ at::Tensor size,
    /*long*/ at::Tensor stride, bool openMP) {
@@ -458,8 +503,7 @@ RuleBook &Metadata<dimension>::getFullConvolutionRuleBook(
    /*long*/ at::Tensor outputSpatialSize,
    /*long*/ at::Tensor size,
    /*long*/ at::Tensor stride, Metadata<dimension> &newM) {
-  auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
-  auto &rb = fullConvolutionRuleBooks[p];
+  auto &rb = newM.fullConvolutionRuleBook;
  if (rb.empty()) {
    newM.clear();
    auto iS = LongTensorToPoint<dimension>(inputSpatialSize);

--- a/sparseconvnet/SCN/Metadata/Metadata.h
+++ b/sparseconvnet/SCN/Metadata/Metadata.h
@@ -7,11 +7,13 @@
 #ifndef Metadata_H
 #define Metadata_H
 #include "32bits.h"
+#include <algorithm>
 #include <array>
 #include <chrono>
 #include <cstdint>
 #include <google/dense_hash_map>
 #include <iostream>
+#include <numeric>
 #include <random>
 #include <string>
 #include <unordered_map>
@@ -61,9 +63,7 @@ public:
                     IntArrayHash<3 * dimension>>
      ruleBooks;

-  std::unordered_map<Point<3 * dimension>, RuleBook,
-                     IntArrayHash<3 * dimension>>
-      fullConvolutionRuleBooks;
+  RuleBook fullConvolutionRuleBook;

  std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
      sparseToDenseRuleBooks;
@@ -97,6 +97,13 @@ public:
                        /*byte*/ at::Tensor filter,
                        /*long*/ at::Tensor cuSum);

+  void appendMetadata(Metadata<dimension> &mAdd,
+                      /*long*/ at::Tensor spatialSize);
+
+  at::Tensor sparsifyCompare(Metadata<dimension> &mReference,
+                             Metadata<dimension> &mSparsified,
+                             /*long*/ at::Tensor spatialSize);
+
  // tensor is size[0] x .. x size[dimension-1] x size[dimension]
  // size[0] x .. x size[dimension-1] == spatial volume
  // size[dimension] == #feature planes

--- a/sparseconvnet/SCN/cuda.cu
+++ b/sparseconvnet/SCN/cuda.cu
+#include <ATen/ATen.h>
+#include <Metadata/Metadata.h>
+
+#include "CUDA/ActivePooling.cu"
+#include "CUDA/AffineReluTrivialConvolution.cu"
+#include "CUDA/AveragePooling.cu"
+#include "CUDA/BatchNormalization.cu"
+#include "CUDA/BatchwiseMultiplicativeDropout.cu"
+#include "CUDA/Convolution.cu"
+#include "CUDA/Deconvolution.cu"
+#include "CUDA/IOLayers.cu"
+#include "CUDA/LeakyReLU.cu"
+#include "CUDA/MaxPooling.cu"
+#include "CUDA/SparseToDense.cu"
+#include "CUDA/UnPooling.cu"
+
+template void ActivePooling_ForwardPass<float>(float *input_features,
+                                               float *output_features,
+                                               Int batchSize, Int maxActive,
+                                               Int nPlanes, Int *rules,
+                                               bool average);
+template void ActivePooling_BackwardPass<float>(float *d_input_features,
+                                                float *d_output_features,
+                                                Int batchSize, Int maxActive,
+                                                Int nPlanes, Int *rules,
+                                                bool average);
+
+template void dAffineReluTrivialConvolution_forward<float>(
+    float *inFeatures, float *outFeatures, float *affineWeight,
+    float *affineBias, float *convWeight, Int input_nPlanes, Int input_stride,
+    Int output_nPlanes, Int output_stride, Int nActive);
+template void dAffineReluTrivialConvolution_backward_dW<float>(
+    float *inFeatures, float *dInFeatures, float *dOutFeatures,
+    float *affineWeight, float *dAffineWeight, float *affineBias,
+    float *dAffineBias, float *convWeight, float *dConvWeight,
+    Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride,
+    Int nActive, bool additiveGrad);
+
+template void cuda_AveragePooling_ForwardPass<float>(
+    float *input_features, float *output_features, Int nPlanes,
+    Int input_stride, Int output_stride, RuleBook _rules, Int filterVolume);
+template void cuda_AveragePooling_BackwardPass<float>(
+    float *d_input_features, float *d_output_features, Int nPlanes,
+    Int input_stride, Int output_stride, RuleBook _rules, Int filterVolume);
+
+template void Convolution_fp_bias<float>(float *of, float *b, Int op,
+                                         Int nActive);
+template void Convolution_bp_bias<float>(float *matrix, float *target,
+                                         Int nRows, Int nColumns, Int nCOLUMNS);
+template double dConvolution_forward2<float>(
+    float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
+    Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
+
+template void dConvolution_backward_dW2<float>(
+    float *inFeatures, float *dInFeatures, float *dOutFeatures, float *w,
+    float *dw, RuleBook _rules, Int input_nPlanes, Int input_stride,
+    Int output_nPlanes, Int output_stride);
+
+template double dDeconvolution_forward2<float>(
+    float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
+    Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
+
+template void dDeconvolution_backward_dW2<float>(
+    float *inFeatures, float *dInFeatures, float *dOutFeatures, float *w,
+    float *dw, RuleBook _rules, Int input_nPlanes, Int input_stride,
+    Int output_nPlanes, Int output_stride);
+
+template void InputLayer_fp<float>(float *input_features,
+                                   float *output_features, Int nRows,
+                                   Int maxActive, Int nPlanes, Int *rules_cpu,
+                                   Int *rules_gpu, bool average);
+template void InputLayer_bp<float>(float *d_input_features,
+                                   float *d_output_features, Int nRows,
+                                   Int maxActive, Int nPlanes, Int *rules_cpu,
+                                   Int *rules_gpu, bool average);
+
+template void LeakyReLU_fp<float>(float *input_features, float *output_features,
+                                  Int n, float alpha);
+template void LeakyReLU_bp<float>(float *input_features,
+                                  float *d_input_features,
+                                  float *output_features, Int n, float alpha);
+template void cuda_MaxPooling_ForwardPass<float>(float *input_features,
+                                                 float *output_features,
+                                                 Int nPlanes, Int input_stride,
+                                                 Int output_stride,
+                                                 RuleBook _rules);
+template void cuda_MaxPooling_BackwardPass<float>(
+    float *input_features, float *d_input_features, float *output_features,
+    float *d_output_features, Int nPlanes, Int input_stride, Int output_stride,
+    RuleBook _rules);
+template void cuda_SparseToDense_ForwardPass<float>(float *input_features,
+                                                    float *output_features,
+                                                    Int nPlanes,
+                                                    Int spatialVolume,
+                                                    RuleBook _rules);
+template void cuda_SparseToDense_BackwardPass<float>(float *d_input_features,
+                                                     float *d_output_features,
+                                                     Int nPlanes,
+                                                     Int spatialVolume,
+                                                     RuleBook _rules);
+template void cuda_UnPooling_ForwardPass<float>(float *input_features,
+                                                float *output_features,
+                                                Int nPlanes, Int input_stride,
+                                                Int output_stride,
+                                                RuleBook _rules);
+template void cuda_UnPooling_BackwardPass<float>(float *d_input_features,
+                                                 float *d_output_features,
+                                                 Int nPlanes, Int input_stride,
+                                                 Int output_stride,
+                                                 RuleBook _rules);
+
+template void bn_f<float>(float *iF, float *oF, Int nPlanes, Int input_stride,
+                          Int output_stride, Int nActive, float *saveMean,
+                          float *saveInvStd, float *runningMean,
+                          float *runningVar, float *weight, float *bias,
+                          float eps, float momentum, bool train,
+                          float leakiness);
+template void bn_b<float>(float *input_features, float *d_input_features,
+                          float *output_features, float *d_output_features,
+                          Int nPlanes, Int input_stride, Int output_stride,
+                          Int nActive, float *saveMean, float *saveInvStd,
+                          float *runningMean, float *runningVar, float *weight,
+                          float *bias, float *d_weight, float *d_bias,
+                          float leakiness);
+
+template void bmd_f<float>(float *input_features, float *output_features,
+                           float *noise, Int nActive, Int nPlanes, float alpha);
+template void bmd_b<float>(float *input_features, float *d_input_features,
+                           float *d_output_features, float *noise, Int nActive,
+                           Int nPlanes, float alpha);
--- a/sparseconvnet/SCN/instantiate_cpu.cpp
+++ b/sparseconvnet/SCN/instantiate_cpu.cpp
-
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#define ENABLE_OPENMP YES
-#if defined(ENABLE_OPENMP)
-#include <omp.h>
-#endif
-
-#include <torch/torch.h>
-
-#include "Metadata/Metadata.cpp"
-template class Metadata<1>;
-template class Metadata<2>;
-template class Metadata<3>;
-template class Metadata<4>;
-//template class Metadata<5>;
-//template class Metadata<6>;
-//template class Metadata<7>;
-//template class Metadata<8>;
-//template class Metadata<9>;
-//template class Metadata<10>;
-#include "CPU/ActivePooling.cpp"
-#include "CPU/AffineReluTrivialConvolution.cpp"
-#include "CPU/AveragePooling.cpp"
-#include "CPU/BatchNormalization.cpp"
-#include "CPU/BatchwiseMultiplicativeDropout.cpp"
-#include "CPU/Convolution.cpp"
-#include "CPU/Deconvolution.cpp"
-#include "CPU/IOLayers.cpp"
-#include "CPU/LeakyReLU.cpp"
-#include "CPU/MaxPooling.cpp"
-#include "CPU/NetworkInNetwork.cpp"
-#include "CPU/SparseToDense.cpp"
-#include "CPU/UnPooling.cpp"
-//#include "misc/drawCurve.cpp"
-
-
-template
-double cpu_AffineReluTrivialConvolution_updateOutput<float>(at::Tensor input_features,
-                                                   at::Tensor output_features,
-                                                   at::Tensor affineWeight,
-                                                   at::Tensor affineBias,
-                                                   at::Tensor convWeight);
-template
-void cpu_AffineReluTrivialConvolution_backward<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template
-void cpu_BatchNormalization_updateOutput<float>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, float eps, float momentum, bool train,
-    float leakiness);
-template
-void cpu_BatchNormalizationInTensor_updateOutput<float>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, float eps, float momentum, bool train,
-    float leakiness);
-template
-void cpu_BatchNormalization_backward<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, float leakiness);
-template
-void cpu_BatchwiseMultiplicativeDropout_updateOutput<float>(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template
-void cpu_BatchwiseMultiplicativeDropout_updateGradInput<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template
-void cpu_LeakyReLU_updateOutput<float>(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template
-void cpu_LeakyReLU_updateGradInput<float>(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template
-double cpu_NetworkInNetwork_updateOutput<float>(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template
-void cpu_NetworkInNetwork_updateGradInput<float>(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template
-void cpu_NetworkInNetwork_accGradParameters<float>(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-template
-double cpu_AffineReluTrivialConvolution_updateOutput<double>(at::Tensor input_features,
-                                                   at::Tensor output_features,
-                                                   at::Tensor affineWeight,
-                                                   at::Tensor affineBias,
-                                                   at::Tensor convWeight);
-template
-void cpu_AffineReluTrivialConvolution_backward<double>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template
-void cpu_BatchNormalization_updateOutput<double>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, double eps, double momentum, bool train,
-    double leakiness);
-template
-void cpu_BatchNormalizationInTensor_updateOutput<double>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, double eps, double momentum, bool train,
-    double leakiness);
-template
-void cpu_BatchNormalization_backward<double>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, double leakiness);
-template
-void cpu_BatchwiseMultiplicativeDropout_updateOutput<double>(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template
-void cpu_BatchwiseMultiplicativeDropout_updateGradInput<double>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template
-void cpu_LeakyReLU_updateOutput<double>(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template
-void cpu_LeakyReLU_updateGradInput<double>(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template
-double cpu_NetworkInNetwork_updateOutput<double>(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template
-void cpu_NetworkInNetwork_updateGradInput<double>(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template
-void cpu_NetworkInNetwork_accGradParameters<double>(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-
-template
-void cpu_ActivePooling_updateOutput<float,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<float,1>(
-    at::Tensor inputSize, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<1> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<float,1>(Metadata<1> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<1> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<float,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<float,1>(at::Tensor inputSize,
-                                       Metadata<1> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<1> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<double,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<double,1>(
-    at::Tensor inputSize, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<double,1>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<1> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<double,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<double,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<double,1>(Metadata<1> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<double,1>(Metadata<1> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<double,1>(Metadata<1> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<double,1>(Metadata<1> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<double,1>(Metadata<1> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<double,1>(Metadata<1> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<double,1>(Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<double,1>(Metadata<1> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<double,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<1> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<double,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<double,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<double,1>(at::Tensor inputSize,
-                                       Metadata<1> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<double,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<double,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<1> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<float,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<float,2>(
-    at::Tensor inputSize, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<2> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<float,2>(Metadata<2> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<2> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<float,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<float,2>(at::Tensor inputSize,
-                                       Metadata<2> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<2> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<double,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<double,2>(
-    at::Tensor inputSize, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<double,2>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<2> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<double,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<double,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<double,2>(Metadata<2> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<double,2>(Metadata<2> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<double,2>(Metadata<2> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<double,2>(Metadata<2> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<double,2>(Metadata<2> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<double,2>(Metadata<2> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<double,2>(Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<double,2>(Metadata<2> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<double,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<2> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<double,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<double,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<double,2>(at::Tensor inputSize,
-                                       Metadata<2> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<double,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<double,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<2> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<float,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<float,3>(
-    at::Tensor inputSize, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<3> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<float,3>(Metadata<3> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<3> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<float,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<float,3>(at::Tensor inputSize,
-                                       Metadata<3> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<3> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<double,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<double,3>(
-    at::Tensor inputSize, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<double,3>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<3> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<double,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<double,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<double,3>(Metadata<3> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<double,3>(Metadata<3> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<double,3>(Metadata<3> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<double,3>(Metadata<3> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<double,3>(Metadata<3> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<double,3>(Metadata<3> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<double,3>(Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<double,3>(Metadata<3> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<double,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<3> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<double,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<double,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<double,3>(at::Tensor inputSize,
-                                       Metadata<3> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<double,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<double,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<3> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<float,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<float,4>(
-    at::Tensor inputSize, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<4> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<float,4>(Metadata<4> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<4> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<float,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<float,4>(at::Tensor inputSize,
-                                       Metadata<4> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<4> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cpu_ActivePooling_updateOutput<double,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cpu_ActivePooling_updateGradInput<double,4>(
-    at::Tensor inputSize, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cpu_AveragePooling_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_AveragePooling_updateGradInput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cpu_Convolution_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Convolution_backward<double,4>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<4> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_SubmanifoldConvolution_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cpu_SubmanifoldConvolution_backward<double,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cpu_FullConvolution_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_FullConvolution_backward<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_RandomizedStrideConvolution_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_RandomizedStrideConvolution_backward<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cpu_Deconvolution_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cpu_Deconvolution_backward<double,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cpu_InputLayer_updateOutput<double,4>(Metadata<4> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cpu_InputLayer_updateGradInput<double,4>(Metadata<4> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cpu_OutputLayer_updateOutput<double,4>(Metadata<4> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cpu_OutputLayer_updateGradInput<double,4>(Metadata<4> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cpu_BLInputLayer_updateOutput<double,4>(Metadata<4> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cpu_BLInputLayer_updateGradInput<double,4>(Metadata<4> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cpu_BLOutputLayer_updateOutput<double,4>(Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cpu_BLOutputLayer_updateGradInput<double,4>(Metadata<4> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_MaxPooling_updateOutput<double,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<4> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cpu_MaxPooling_updateGradInput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateOutput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cpu_RandomizedStrideMaxPooling_updateGradInput<double,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cpu_SparseToDense_updateOutput<double,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cpu_SparseToDense_updateGradInput<double,4>(at::Tensor inputSize,
-                                       Metadata<4> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cpu_UnPooling_updateOutput<double,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cpu_UnPooling_updateGradInput<double,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<4> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
--- a/sparseconvnet/SCN/instantiate_cuda.cu
+++ b/sparseconvnet/SCN/instantiate_cuda.cu
-
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#define ENABLE_OPENMP YES
-#if defined(ENABLE_OPENMP)
-#include <omp.h>
-#endif
-
-#include <ATen/ATen.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include "Metadata/Metadata.h"
-#include "CUDA/ActivePooling.cu"
-#include "CUDA/AffineReluTrivialConvolution.cu"
-#include "CUDA/AveragePooling.cu"
-#include "CUDA/BatchNormalization.cu"
-#include "CUDA/BatchwiseMultiplicativeDropout.cu"
-#include "CUDA/Convolution.cu"
-#include "CUDA/Deconvolution.cu"
-#include "CUDA/IOLayers.cu"
-#include "CUDA/LeakyReLU.cu"
-#include "CUDA/MaxPooling.cu"
-#include "CUDA/NetworkInNetwork.cu"
-#include "CUDA/SparseToDense.cu"
-#include "CUDA/UnPooling.cu"
-template
-double cuda_AffineReluTrivialConvolution_updateOutput<float>(at::Tensor input_features,
-                                                   at::Tensor output_features,
-                                                   at::Tensor affineWeight,
-                                                   at::Tensor affineBias,
-                                                   at::Tensor convWeight);
-template
-void cuda_AffineReluTrivialConvolution_backward<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template
-void cuda_BatchNormalization_updateOutput<float>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, float eps, float momentum, bool train,
-    float leakiness);
-template
-void cuda_BatchNormalizationInTensor_updateOutput<float>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, float eps, float momentum, bool train,
-    float leakiness);
-template
-void cuda_BatchNormalization_backward<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, float leakiness);
-template
-void cuda_BatchwiseMultiplicativeDropout_updateOutput<float>(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template
-void cuda_BatchwiseMultiplicativeDropout_updateGradInput<float>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template
-void cuda_LeakyReLU_updateOutput<float>(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template
-void cuda_LeakyReLU_updateGradInput<float>(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template
-double cuda_NetworkInNetwork_updateOutput<float>(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template
-void cuda_NetworkInNetwork_updateGradInput<float>(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template
-void cuda_NetworkInNetwork_accGradParameters<float>(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-
-template
-void cuda_ActivePooling_updateOutput<float,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cuda_ActivePooling_updateGradInput<float,1>(
-    at::Tensor inputSize, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cuda_AveragePooling_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_AveragePooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cuda_Convolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Convolution_backward<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<1> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_SubmanifoldConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cuda_SubmanifoldConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<1> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cuda_FullConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_FullConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &mIn,
-    Metadata<1> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_RandomizedStrideConvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_RandomizedStrideConvolution_backward<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_Deconvolution_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Deconvolution_backward<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cuda_InputLayer_updateOutput<float,1>(Metadata<1> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cuda_InputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cuda_OutputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cuda_OutputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cuda_BLInputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cuda_BLInputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cuda_BLOutputLayer_updateOutput<float,1>(Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cuda_BLOutputLayer_updateGradInput<float,1>(Metadata<1> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_MaxPooling_updateOutput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<1> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cuda_MaxPooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateOutput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateGradInput<float,1>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<1> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_SparseToDense_updateOutput<float,1>(at::Tensor inputSize,
-                                    Metadata<1> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cuda_SparseToDense_updateGradInput<float,1>(at::Tensor inputSize,
-                                       Metadata<1> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_UnPooling_updateOutput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<1> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cuda_UnPooling_updateGradInput<float,1>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<1> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cuda_ActivePooling_updateOutput<float,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cuda_ActivePooling_updateGradInput<float,2>(
-    at::Tensor inputSize, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cuda_AveragePooling_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_AveragePooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cuda_Convolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Convolution_backward<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<2> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_SubmanifoldConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cuda_SubmanifoldConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<2> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cuda_FullConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_FullConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &mIn,
-    Metadata<2> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_RandomizedStrideConvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_RandomizedStrideConvolution_backward<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_Deconvolution_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Deconvolution_backward<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cuda_InputLayer_updateOutput<float,2>(Metadata<2> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cuda_InputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cuda_OutputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cuda_OutputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cuda_BLInputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cuda_BLInputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cuda_BLOutputLayer_updateOutput<float,2>(Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cuda_BLOutputLayer_updateGradInput<float,2>(Metadata<2> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_MaxPooling_updateOutput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<2> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cuda_MaxPooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateOutput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateGradInput<float,2>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<2> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_SparseToDense_updateOutput<float,2>(at::Tensor inputSize,
-                                    Metadata<2> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cuda_SparseToDense_updateGradInput<float,2>(at::Tensor inputSize,
-                                       Metadata<2> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_UnPooling_updateOutput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<2> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cuda_UnPooling_updateGradInput<float,2>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<2> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cuda_ActivePooling_updateOutput<float,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cuda_ActivePooling_updateGradInput<float,3>(
-    at::Tensor inputSize, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cuda_AveragePooling_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_AveragePooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cuda_Convolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Convolution_backward<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<3> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_SubmanifoldConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cuda_SubmanifoldConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<3> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cuda_FullConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_FullConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &mIn,
-    Metadata<3> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_RandomizedStrideConvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_RandomizedStrideConvolution_backward<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_Deconvolution_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Deconvolution_backward<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cuda_InputLayer_updateOutput<float,3>(Metadata<3> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cuda_InputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cuda_OutputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cuda_OutputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cuda_BLInputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cuda_BLInputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cuda_BLOutputLayer_updateOutput<float,3>(Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cuda_BLOutputLayer_updateGradInput<float,3>(Metadata<3> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_MaxPooling_updateOutput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<3> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cuda_MaxPooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateOutput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateGradInput<float,3>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<3> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_SparseToDense_updateOutput<float,3>(at::Tensor inputSize,
-                                    Metadata<3> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cuda_SparseToDense_updateGradInput<float,3>(at::Tensor inputSize,
-                                       Metadata<3> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_UnPooling_updateOutput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<3> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cuda_UnPooling_updateGradInput<float,3>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<3> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template
-void cuda_ActivePooling_updateOutput<float,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void cuda_ActivePooling_updateGradInput<float,4>(
-    at::Tensor inputSize, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void cuda_AveragePooling_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_AveragePooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double cuda_Convolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Convolution_backward<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<4> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_SubmanifoldConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void cuda_SubmanifoldConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<4> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double cuda_FullConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_FullConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &mIn,
-    Metadata<4> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_RandomizedStrideConvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_RandomizedStrideConvolution_backward<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double cuda_Deconvolution_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void cuda_Deconvolution_backward<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void cuda_InputLayer_updateOutput<float,4>(Metadata<4> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void cuda_InputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void cuda_OutputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void cuda_OutputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void cuda_BLInputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void cuda_BLInputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void cuda_BLOutputLayer_updateOutput<float,4>(Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void cuda_BLOutputLayer_updateGradInput<float,4>(Metadata<4> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_MaxPooling_updateOutput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<4> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void cuda_MaxPooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateOutput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void cuda_RandomizedStrideMaxPooling_updateGradInput<float,4>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<4> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void cuda_SparseToDense_updateOutput<float,4>(at::Tensor inputSize,
-                                    Metadata<4> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void cuda_SparseToDense_updateGradInput<float,4>(at::Tensor inputSize,
-                                       Metadata<4> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void cuda_UnPooling_updateOutput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<4> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void cuda_UnPooling_updateGradInput<float,4>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<4> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
--- a/sparseconvnet/SCN/pybind.cpp
+++ b/sparseconvnet/SCN/pybind.cpp
+// Copyright 2016-present, Facebook, Inc.
+// All rights reserved.
+//
+// This source code is licensed under the license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <torch/torch.h>
+
+#include "sparseconvnet.h"
+
+template <Int Dimension> void dimension(py::module &m, const char *name) {
+  pybind11::class_<Metadata<Dimension>>(m, name)
+      .def(pybind11::init<>())
+      .def("clear", &Metadata<Dimension>::clear)
+      .def("setInputSpatialSize", &Metadata<Dimension>::setInputSpatialSize)
+      .def("batchAddSample", &Metadata<Dimension>::batchAddSample)
+      .def("setInputSpatialLocation",
+           &Metadata<Dimension>::setInputSpatialLocation)
+      .def("setInputSpatialLocations",
+           &Metadata<Dimension>::setInputSpatialLocations)
+      .def("getSpatialLocations", &Metadata<Dimension>::getSpatialLocations)
+      .def("createMetadataForDenseToSparse",
+           &Metadata<Dimension>::createMetadataForDenseToSparse)
+      .def("sparsifyMetadata", &Metadata<Dimension>::sparsifyMetadata)
+      .def("appendMetadata", &Metadata<Dimension>::appendMetadata)
+      .def("sparsifyCompare", &Metadata<Dimension>::sparsifyCompare)
+      .def("addSampleFromThresholdedTensor",
+           &Metadata<Dimension>::addSampleFromThresholdedTensor)
+      .def("generateRuleBooks3s2", &Metadata<Dimension>::generateRuleBooks3s2)
+      .def("generateRuleBooks2s2", &Metadata<Dimension>::generateRuleBooks2s2);
+
+  m.def("ActivePooling_updateOutput",
+        (void (*)(at::Tensor, Metadata<Dimension> &, at::Tensor, at::Tensor,
+                  bool)) &
+            ActivePooling_updateOutput,
+        "");
+  m.def("ActivePooling_updateGradInput",
+        (void (*)(at::Tensor, Metadata<Dimension> &, at::Tensor, at::Tensor,
+                  at::Tensor, bool)) &
+            ActivePooling_updateGradInput,
+        "");
+  m.def("AveragePooling_updateOutput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, long)) &
+            AveragePooling_updateOutput,
+        "");
+  m.def("AveragePooling_updateGradInput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  long)) &
+            AveragePooling_updateGradInput,
+        "");
+  m.def("Convolution_updateOutput",
+        (double (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                    Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                    at::Tensor)) &
+            Convolution_updateOutput,
+        "");
+  m.def("Convolution_backward",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, at::Tensor, at::Tensor)) &
+            Convolution_backward,
+        "");
+  m.def("RandomizedStrideConvolution_updateOutput",
+        (double (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                    Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                    at::Tensor)) &
+            RandomizedStrideConvolution_updateOutput,
+        "");
+  m.def("RandomizedStrideConvolution_backward",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, at::Tensor, at::Tensor)) &
+            RandomizedStrideConvolution_backward,
+        "");
+  m.def("Deconvolution_updateOutput",
+        (double (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                    Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                    at::Tensor)) &
+            Deconvolution_updateOutput,
+        "");
+  m.def("Deconvolution_backward",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, at::Tensor, at::Tensor)) &
+            Deconvolution_backward,
+        "");
+  m.def("FullConvolution_updateOutput",
+        (double (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                    Metadata<Dimension> &, Metadata<Dimension> &, at::Tensor,
+                    at::Tensor, at::Tensor, at::Tensor)) &
+            FullConvolution_updateOutput,
+        "");
+  m.def("FullConvolution_backward",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, Metadata<Dimension> &, at::Tensor,
+                  at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor)) &
+            FullConvolution_backward,
+        "");
+  m.def("MaxPooling_updateOutput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, long)) &
+            MaxPooling_updateOutput,
+        "");
+  m.def("MaxPooling_updateGradInput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, long)) &
+            MaxPooling_updateGradInput,
+        "");
+  m.def("RandomizedStrideMaxPooling_updateOutput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, long)) &
+            RandomizedStrideMaxPooling_updateOutput,
+        "");
+  m.def("RandomizedStrideMaxPooling_updateGradInput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, long)) &
+            RandomizedStrideMaxPooling_updateGradInput,
+        "");
+  m.def("SparseToDense_updateOutput",
+        (void (*)(at::Tensor, Metadata<Dimension> &, at::Tensor, at::Tensor,
+                  long)) &
+            SparseToDense_updateOutput,
+        "");
+  m.def("SparseToDense_updateGradInput",
+        (void (*)(at::Tensor, Metadata<Dimension> &, at::Tensor, at::Tensor,
+                  at::Tensor)) &
+            SparseToDense_updateGradInput,
+        "");
+  m.def("SubmanifoldConvolution_updateOutput",
+        (double (*)(at::Tensor, at::Tensor, Metadata<Dimension> &, at::Tensor,
+                    at::Tensor, at::Tensor, at::Tensor)) &
+            SubmanifoldConvolution_updateOutput,
+        "");
+  m.def("SubmanifoldConvolution_backward",
+        (void (*)(at::Tensor, at::Tensor, Metadata<Dimension> &, at::Tensor,
+                  at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor)) &
+            SubmanifoldConvolution_backward,
+        "");
+  m.def("InputLayer_updateOutput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, long, long)) &
+            InputLayer_updateOutput,
+        "");
+  m.def("InputLayer_updateGradInput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            InputLayer_updateGradInput,
+        "");
+  m.def("OutputLayer_updateOutput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            OutputLayer_updateOutput,
+        "");
+  m.def("OutputLayer_updateGradInput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            OutputLayer_updateGradInput,
+        "");
+  m.def("BLInputLayer_updateOutput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  at::Tensor, long)) &
+            BLInputLayer_updateOutput,
+        "");
+  m.def("BLInputLayer_updateGradInput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            BLInputLayer_updateGradInput,
+        "");
+  m.def("BLOutputLayer_updateOutput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            BLOutputLayer_updateOutput,
+        "");
+  m.def("BLOutputLayer_updateGradInput",
+        (void (*)(Metadata<Dimension> &, at::Tensor, at::Tensor)) &
+            BLOutputLayer_updateGradInput,
+        "");
+  m.def("UnPooling_updateOutput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, long)) &
+            UnPooling_updateOutput,
+        "");
+  m.def("UnPooling_updateGradInput",
+        (void (*)(at::Tensor, at::Tensor, at::Tensor, at::Tensor,
+                  Metadata<Dimension> &, at::Tensor, at::Tensor, at::Tensor,
+                  long)) &
+            UnPooling_updateGradInput,
+        "");
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+
+  // dimension specific functions
+  dimension<1>(m, "Metadata_1");
+  dimension<2>(m, "Metadata_2");
+  dimension<3>(m, "Metadata_3");
+  dimension<4>(m, "Metadata_4");
+  dimension<5>(m, "Metadata_5");
+  dimension<6>(m, "Metadata_6");
+
+  // arbitrary dimension functions
+  m.def("AffineReluTrivialConvolution_updateOutput",
+        &AffineReluTrivialConvolution_updateOutput, "");
+  m.def("AffineReluTrivialConvolution_backward",
+        &AffineReluTrivialConvolution_backward, "");
+  m.def("BatchwiseMultiplicativeDropout_updateOutput",
+        &BatchwiseMultiplicativeDropout_updateOutput, "");
+  m.def("BatchwiseMultiplicativeDropout_updateGradInput",
+        &BatchwiseMultiplicativeDropout_updateGradInput, "");
+  m.def("BatchNormalization_updateOutput", &BatchNormalization_updateOutput,
+        "");
+  m.def("BatchNormalization_backward", &BatchNormalization_backward, "");
+  m.def("LeakyReLU_updateOutput", &LeakyReLU_updateOutput, "");
+  m.def("LeakyReLU_updateGradInput", &LeakyReLU_updateGradInput, "");
+  m.def("NetworkInNetwork_updateOutput", &NetworkInNetwork_updateOutput, "");
+  m.def("NetworkInNetwork_updateGradInput", &NetworkInNetwork_updateGradInput,
+        "");
+  m.def("NetworkInNetwork_accGradParameters",
+        &NetworkInNetwork_accGradParameters, "");
+
+  m.def("n_rulebook_bits", []() { return 8 * sizeof(Int); }, "");
+}
--- a/sparseconvnet/SCN/pybind.py
+++ b/sparseconvnet/SCN/pybind.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-f_cpu = open('instantiate_cpu.cpp', 'w')
-f_cuda = open('instantiate_cuda.cu', 'w')
-
-f_cpu.write("""
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#define ENABLE_OPENMP YES
-#if defined(ENABLE_OPENMP)
-#include <omp.h>
-#endif
-
-#include <torch/torch.h>
-
-#include "Metadata/Metadata.cpp"
-template class Metadata<1>;
-template class Metadata<2>;
-template class Metadata<3>;
-template class Metadata<4>;
-//template class Metadata<5>;
-//template class Metadata<6>;
-//template class Metadata<7>;
-//template class Metadata<8>;
-//template class Metadata<9>;
-//template class Metadata<10>;
-#include "CPU/ActivePooling.cpp"
-#include "CPU/AffineReluTrivialConvolution.cpp"
-#include "CPU/AveragePooling.cpp"
-#include "CPU/BatchNormalization.cpp"
-#include "CPU/BatchwiseMultiplicativeDropout.cpp"
-#include "CPU/Convolution.cpp"
-#include "CPU/Deconvolution.cpp"
-#include "CPU/IOLayers.cpp"
-#include "CPU/LeakyReLU.cpp"
-#include "CPU/MaxPooling.cpp"
-#include "CPU/NetworkInNetwork.cpp"
-#include "CPU/SparseToDense.cpp"
-#include "CPU/UnPooling.cpp"
-//#include "misc/drawCurve.cpp"
-
-
-""")
-
-f_cuda.write("""
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#define ENABLE_OPENMP YES
-#if defined(ENABLE_OPENMP)
-#include <omp.h>
-#endif
-
-#include <ATen/ATen.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include "Metadata/Metadata.h"
-#include "CUDA/ActivePooling.cu"
-#include "CUDA/AffineReluTrivialConvolution.cu"
-#include "CUDA/AveragePooling.cu"
-#include "CUDA/BatchNormalization.cu"
-#include "CUDA/BatchwiseMultiplicativeDropout.cu"
-#include "CUDA/Convolution.cu"
-#include "CUDA/Deconvolution.cu"
-#include "CUDA/IOLayers.cu"
-#include "CUDA/LeakyReLU.cu"
-#include "CUDA/MaxPooling.cu"
-#include "CUDA/NetworkInNetwork.cu"
-#include "CUDA/SparseToDense.cu"
-#include "CUDA/UnPooling.cu"
-""")
-
-# f_cpu.write("""void cpu_float_DrawCurve_2(Metadata<2> &m, at::Tensor features,
-#   at::Tensor stroke);""")
-
-
-code="""template
-double ARCH_AffineReluTrivialConvolution_updateOutput<REAL>(at::Tensor input_features,
-                                                   at::Tensor output_features,
-                                                   at::Tensor affineWeight,
-                                                   at::Tensor affineBias,
-                                                   at::Tensor convWeight);
-template
-void ARCH_AffineReluTrivialConvolution_backward<REAL>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template
-void ARCH_BatchNormalization_updateOutput<REAL>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, REAL eps, REAL momentum, bool train,
-    REAL leakiness);
-template
-void ARCH_BatchNormalizationInTensor_updateOutput<REAL>(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, REAL eps, REAL momentum, bool train,
-    REAL leakiness);
-template
-void ARCH_BatchNormalization_backward<REAL>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, REAL leakiness);
-template
-void ARCH_BatchwiseMultiplicativeDropout_updateOutput<REAL>(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template
-void ARCH_BatchwiseMultiplicativeDropout_updateGradInput<REAL>(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template
-void ARCH_LeakyReLU_updateOutput<REAL>(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template
-void ARCH_LeakyReLU_updateGradInput<REAL>(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template
-double ARCH_NetworkInNetwork_updateOutput<REAL>(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template
-void ARCH_NetworkInNetwork_updateGradInput<REAL>(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template
-void ARCH_NetworkInNetwork_accGradParameters<REAL>(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-"""
-f_cpu.write(code.replace('ARCH', 'cpu').replace('REAL', 'float'))
-f_cpu.write(code.replace('ARCH', 'cpu').replace('REAL', 'double'))
-f_cuda.write(code.replace('ARCH', 'cuda').replace('REAL', 'float'))
-
-code="""
-template
-void ARCH_ActivePooling_updateOutput<REAL,DIMENSION>(at::Tensor inputSize,
-                                    Metadata<DIMENSION> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template
-void ARCH_ActivePooling_updateGradInput<REAL,DIMENSION>(
-    at::Tensor inputSize, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template
-void ARCH_AveragePooling_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void ARCH_AveragePooling_updateGradInput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template
-double ARCH_Convolution_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void ARCH_Convolution_backward<REAL,DIMENSION>(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<DIMENSION> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template
-double ARCH_SubmanifoldConvolution_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template
-void ARCH_SubmanifoldConvolution_backward<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template
-double ARCH_FullConvolution_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &mIn,
-    Metadata<DIMENSION> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void ARCH_FullConvolution_backward<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &mIn,
-    Metadata<DIMENSION> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double ARCH_RandomizedStrideConvolution_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void ARCH_RandomizedStrideConvolution_backward<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template
-double ARCH_Deconvolution_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template
-void ARCH_Deconvolution_backward<REAL,DIMENSION>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<DIMENSION> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template
-void ARCH_InputLayer_updateOutput<REAL,DIMENSION>(Metadata<DIMENSION> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template
-void ARCH_InputLayer_updateGradInput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template
-void ARCH_OutputLayer_updateOutput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template
-void ARCH_OutputLayer_updateGradInput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template
-void ARCH_BLInputLayer_updateOutput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template
-void ARCH_BLInputLayer_updateGradInput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template
-void ARCH_BLOutputLayer_updateOutput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template
-void ARCH_BLOutputLayer_updateGradInput<REAL,DIMENSION>(Metadata<DIMENSION> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void ARCH_MaxPooling_updateOutput<REAL,DIMENSION>(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<DIMENSION> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template
-void ARCH_MaxPooling_updateGradInput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void ARCH_RandomizedStrideMaxPooling_updateOutput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template
-void ARCH_RandomizedStrideMaxPooling_updateGradInput<REAL,DIMENSION>(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<DIMENSION> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template
-void ARCH_SparseToDense_updateOutput<REAL,DIMENSION>(at::Tensor inputSize,
-                                    Metadata<DIMENSION> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template
-void ARCH_SparseToDense_updateGradInput<REAL,DIMENSION>(at::Tensor inputSize,
-                                       Metadata<DIMENSION> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template
-void ARCH_UnPooling_updateOutput<REAL,DIMENSION>(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<DIMENSION> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template
-void ARCH_UnPooling_updateGradInput<REAL,DIMENSION>(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<DIMENSION> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-"""
-for dimension in range(1,5):
-    f_cpu.write(code.replace('ARCH', 'cpu').replace('REAL', 'float').replace('DIMENSION', str(dimension)))
-    f_cpu.write(code.replace('ARCH', 'cpu').replace('REAL', 'double').replace('DIMENSION', str(dimension)))
-    f_cuda.write(code.replace('ARCH', 'cuda').replace('REAL', 'float').replace('DIMENSION', str(dimension)))
-
-f_cpu.close()
-f_cuda.close()
--- a/sparseconvnet/SCN/pybind2.py
+++ b/sparseconvnet/SCN/pybind2.py
-# Copyright 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-f_cpu = open('pybind_cpu.cpp', 'w')
-f_cuda = open('pybind_cuda.cpp', 'w')
-
-txt="""
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <torch/torch.h>
-
-#include "Metadata/Metadata.h"
-"""
-f_cpu.write(txt)
-f_cuda.write(txt)
-
-txt="""
-template <typename T>
-double cpu_AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor affineWeight,
-                                                     at::Tensor affineBias,
-                                                     at::Tensor convWeight);
-template <typename T>
-void cpu_AffineReluTrivialConvolution_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template <typename T>
-void cpu_BatchNormalization_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalizationInTensor_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalization_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, T leakiness);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateOutput(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateGradInput(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template <typename T>
-double cpu_NetworkInNetwork_updateOutput(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template <typename T>
-void cpu_NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template <typename T>
-void cpu_NetworkInNetwork_accGradParameters(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateGradInput(
-    at::Tensor inputSize, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template <typename T, Int Dimension>
-double cpu_Convolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<Dimension> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_SubmanifoldConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_SubmanifoldConvolution_backward(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_FullConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_FullConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_RandomizedStrideConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_Deconvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<Dimension> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateGradInput(at::Tensor inputSize,
-                                       Metadata<Dimension> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<Dimension> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-"""
-f_cpu.write(txt)
-f_cuda.write(txt)
-f_cuda.write(txt.replace('cpu','cuda'))
-
-
-# txt="""
-# void cpu_float_DrawCurve_2(Metadata<2> &m, at::Tensor features,
-#                            at::Tensor stroke);
-# """
-# f_cpu.write(txt)
-# f_cuda.write(txt)
-
-for f in [f_cpu, f_cuda]:
-    f.write("""
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-""")
-
-for f in [f_cpu, f_cuda]:
-    for DIMENSION in range(1,5):
-        f.write("""
-pybind11::class_<Metadata<DIMENSION>>(m, "Metadata_DIMENSION")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<DIMENSION>::clear)
-  .def("setInputSpatialSize", &Metadata<DIMENSION>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<DIMENSION>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<DIMENSION>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<DIMENSION>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<DIMENSION>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<DIMENSION>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<DIMENSION>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<DIMENSION>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<DIMENSION>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<DIMENSION>::generateRuleBooks2s2);
-""".replace('DIMENSION', str(DIMENSION)))
-
-def typed_fn(st):
-    st='m.def("ARCH_REAL_'+st+'", &ARCH_'+st+'<REAL>, "");\n'
-    for f in [f_cpu, f_cuda]:
-        f.write(st.replace('ARCH', 'cpu').replace('REAL', 'float'))
-        f.write(st.replace('ARCH', 'cpu').replace('REAL', 'double'))
-    f_cuda.write(st.replace('ARCH', 'cuda').replace('REAL', 'float'))
-
-def dim_typed_fn(st):
-    st='m.def("ARCH_REAL_'+st+'_DIMENSION", &ARCH_'+st+'<REAL,DIMENSION>, "");\n'
-    for DIMENSION in range(1,5):
-        for f in [f_cpu, f_cuda]:
-            f.write(st.replace('DIMENSION', str(DIMENSION)).replace('ARCH', 'cpu').replace('REAL', 'float'))
-            f.write(st.replace('DIMENSION', str(DIMENSION)).replace('ARCH', 'cpu').replace('REAL', 'double'))
-        f_cuda.write(st.replace('DIMENSION', str(DIMENSION)).replace('ARCH', 'cuda').replace('REAL', 'float'))
-
-typed_fn("AffineReluTrivialConvolution_updateOutput")
-typed_fn("AffineReluTrivialConvolution_backward")
-typed_fn("BatchwiseMultiplicativeDropout_updateOutput")
-typed_fn("BatchwiseMultiplicativeDropout_updateGradInput")
-typed_fn("BatchNormalization_updateOutput")
-typed_fn("BatchNormalization_backward")
-typed_fn("LeakyReLU_updateOutput")
-typed_fn("LeakyReLU_updateGradInput")
-typed_fn("NetworkInNetwork_updateOutput")
-typed_fn("NetworkInNetwork_updateGradInput")
-typed_fn("NetworkInNetwork_accGradParameters")
-dim_typed_fn("ActivePooling_updateOutput")
-dim_typed_fn("ActivePooling_updateGradInput")
-dim_typed_fn("AveragePooling_updateOutput")
-dim_typed_fn("AveragePooling_updateGradInput")
-dim_typed_fn("Convolution_updateOutput")
-dim_typed_fn("Convolution_backward")
-dim_typed_fn("RandomizedStrideConvolution_updateOutput")
-dim_typed_fn("RandomizedStrideConvolution_backward")
-dim_typed_fn("Deconvolution_updateOutput")
-dim_typed_fn("Deconvolution_backward")
-dim_typed_fn("FullConvolution_updateOutput")
-dim_typed_fn("FullConvolution_backward")
-dim_typed_fn("MaxPooling_updateOutput")
-dim_typed_fn("MaxPooling_updateGradInput")
-dim_typed_fn("RandomizedStrideMaxPooling_updateOutput")
-dim_typed_fn("RandomizedStrideMaxPooling_updateGradInput")
-dim_typed_fn("SparseToDense_updateOutput")
-dim_typed_fn("SparseToDense_updateGradInput")
-dim_typed_fn("SubmanifoldConvolution_updateOutput")
-dim_typed_fn("SubmanifoldConvolution_backward")
-dim_typed_fn("InputLayer_updateOutput")
-dim_typed_fn("InputLayer_updateGradInput")
-dim_typed_fn("OutputLayer_updateOutput")
-dim_typed_fn("OutputLayer_updateGradInput")
-dim_typed_fn("BLInputLayer_updateOutput")
-dim_typed_fn("BLInputLayer_updateGradInput")
-dim_typed_fn("BLOutputLayer_updateOutput")
-dim_typed_fn("BLOutputLayer_updateGradInput")
-dim_typed_fn("UnPooling_updateOutput")
-dim_typed_fn("UnPooling_updateGradInput")
-
-for f in [f_cpu, f_cuda]:
-    f.write(
-"""
-m.def("n_rulebook_bits", []() {return 8*sizeof(Int);}, "");
-}
-""")
-
-f_cpu.close()
-f_cuda.close()
--- a/sparseconvnet/SCN/pybind_cpu.cpp
+++ b/sparseconvnet/SCN/pybind_cpu.cpp
-
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <torch/torch.h>
-
-#include "Metadata/Metadata.h"
-
-template <typename T>
-double cpu_AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor affineWeight,
-                                                     at::Tensor affineBias,
-                                                     at::Tensor convWeight);
-template <typename T>
-void cpu_AffineReluTrivialConvolution_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template <typename T>
-void cpu_BatchNormalization_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalizationInTensor_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalization_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, T leakiness);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateOutput(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateGradInput(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template <typename T>
-double cpu_NetworkInNetwork_updateOutput(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template <typename T>
-void cpu_NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template <typename T>
-void cpu_NetworkInNetwork_accGradParameters(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateGradInput(
-    at::Tensor inputSize, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template <typename T, Int Dimension>
-double cpu_Convolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<Dimension> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_SubmanifoldConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_SubmanifoldConvolution_backward(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_FullConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_FullConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_RandomizedStrideConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_Deconvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<Dimension> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateGradInput(at::Tensor inputSize,
-                                       Metadata<Dimension> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<Dimension> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-
-pybind11::class_<Metadata<1>>(m, "Metadata_1")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<1>::clear)
-  .def("setInputSpatialSize", &Metadata<1>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<1>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<1>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<1>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<1>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<1>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<1>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<1>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<1>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<1>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<2>>(m, "Metadata_2")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<2>::clear)
-  .def("setInputSpatialSize", &Metadata<2>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<2>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<2>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<2>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<2>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<2>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<2>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<2>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<2>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<2>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<3>>(m, "Metadata_3")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<3>::clear)
-  .def("setInputSpatialSize", &Metadata<3>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<3>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<3>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<3>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<3>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<3>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<3>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<3>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<3>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<3>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<4>>(m, "Metadata_4")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<4>::clear)
-  .def("setInputSpatialSize", &Metadata<4>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<4>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<4>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<4>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<4>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<4>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<4>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<4>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<4>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<4>::generateRuleBooks2s2);
-m.def("cpu_float_AffineReluTrivialConvolution_updateOutput", &cpu_AffineReluTrivialConvolution_updateOutput<float>, "");
-m.def("cpu_double_AffineReluTrivialConvolution_updateOutput", &cpu_AffineReluTrivialConvolution_updateOutput<double>, "");
-m.def("cpu_float_AffineReluTrivialConvolution_backward", &cpu_AffineReluTrivialConvolution_backward<float>, "");
-m.def("cpu_double_AffineReluTrivialConvolution_backward", &cpu_AffineReluTrivialConvolution_backward<double>, "");
-m.def("cpu_float_BatchwiseMultiplicativeDropout_updateOutput", &cpu_BatchwiseMultiplicativeDropout_updateOutput<float>, "");
-m.def("cpu_double_BatchwiseMultiplicativeDropout_updateOutput", &cpu_BatchwiseMultiplicativeDropout_updateOutput<double>, "");
-m.def("cpu_float_BatchwiseMultiplicativeDropout_updateGradInput", &cpu_BatchwiseMultiplicativeDropout_updateGradInput<float>, "");
-m.def("cpu_double_BatchwiseMultiplicativeDropout_updateGradInput", &cpu_BatchwiseMultiplicativeDropout_updateGradInput<double>, "");
-m.def("cpu_float_BatchNormalization_updateOutput", &cpu_BatchNormalization_updateOutput<float>, "");
-m.def("cpu_double_BatchNormalization_updateOutput", &cpu_BatchNormalization_updateOutput<double>, "");
-m.def("cpu_float_BatchNormalization_backward", &cpu_BatchNormalization_backward<float>, "");
-m.def("cpu_double_BatchNormalization_backward", &cpu_BatchNormalization_backward<double>, "");
-m.def("cpu_float_LeakyReLU_updateOutput", &cpu_LeakyReLU_updateOutput<float>, "");
-m.def("cpu_double_LeakyReLU_updateOutput", &cpu_LeakyReLU_updateOutput<double>, "");
-m.def("cpu_float_LeakyReLU_updateGradInput", &cpu_LeakyReLU_updateGradInput<float>, "");
-m.def("cpu_double_LeakyReLU_updateGradInput", &cpu_LeakyReLU_updateGradInput<double>, "");
-m.def("cpu_float_NetworkInNetwork_updateOutput", &cpu_NetworkInNetwork_updateOutput<float>, "");
-m.def("cpu_double_NetworkInNetwork_updateOutput", &cpu_NetworkInNetwork_updateOutput<double>, "");
-m.def("cpu_float_NetworkInNetwork_updateGradInput", &cpu_NetworkInNetwork_updateGradInput<float>, "");
-m.def("cpu_double_NetworkInNetwork_updateGradInput", &cpu_NetworkInNetwork_updateGradInput<double>, "");
-m.def("cpu_float_NetworkInNetwork_accGradParameters", &cpu_NetworkInNetwork_accGradParameters<float>, "");
-m.def("cpu_double_NetworkInNetwork_accGradParameters", &cpu_NetworkInNetwork_accGradParameters<double>, "");
-m.def("cpu_float_ActivePooling_updateOutput_1", &cpu_ActivePooling_updateOutput<float,1>, "");
-m.def("cpu_double_ActivePooling_updateOutput_1", &cpu_ActivePooling_updateOutput<double,1>, "");
-m.def("cpu_float_ActivePooling_updateOutput_2", &cpu_ActivePooling_updateOutput<float,2>, "");
-m.def("cpu_double_ActivePooling_updateOutput_2", &cpu_ActivePooling_updateOutput<double,2>, "");
-m.def("cpu_float_ActivePooling_updateOutput_3", &cpu_ActivePooling_updateOutput<float,3>, "");
-m.def("cpu_double_ActivePooling_updateOutput_3", &cpu_ActivePooling_updateOutput<double,3>, "");
-m.def("cpu_float_ActivePooling_updateOutput_4", &cpu_ActivePooling_updateOutput<float,4>, "");
-m.def("cpu_double_ActivePooling_updateOutput_4", &cpu_ActivePooling_updateOutput<double,4>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_1", &cpu_ActivePooling_updateGradInput<float,1>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_1", &cpu_ActivePooling_updateGradInput<double,1>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_2", &cpu_ActivePooling_updateGradInput<float,2>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_2", &cpu_ActivePooling_updateGradInput<double,2>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_3", &cpu_ActivePooling_updateGradInput<float,3>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_3", &cpu_ActivePooling_updateGradInput<double,3>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_4", &cpu_ActivePooling_updateGradInput<float,4>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_4", &cpu_ActivePooling_updateGradInput<double,4>, "");
-m.def("cpu_float_AveragePooling_updateOutput_1", &cpu_AveragePooling_updateOutput<float,1>, "");
-m.def("cpu_double_AveragePooling_updateOutput_1", &cpu_AveragePooling_updateOutput<double,1>, "");
-m.def("cpu_float_AveragePooling_updateOutput_2", &cpu_AveragePooling_updateOutput<float,2>, "");
-m.def("cpu_double_AveragePooling_updateOutput_2", &cpu_AveragePooling_updateOutput<double,2>, "");
-m.def("cpu_float_AveragePooling_updateOutput_3", &cpu_AveragePooling_updateOutput<float,3>, "");
-m.def("cpu_double_AveragePooling_updateOutput_3", &cpu_AveragePooling_updateOutput<double,3>, "");
-m.def("cpu_float_AveragePooling_updateOutput_4", &cpu_AveragePooling_updateOutput<float,4>, "");
-m.def("cpu_double_AveragePooling_updateOutput_4", &cpu_AveragePooling_updateOutput<double,4>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_1", &cpu_AveragePooling_updateGradInput<float,1>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_1", &cpu_AveragePooling_updateGradInput<double,1>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_2", &cpu_AveragePooling_updateGradInput<float,2>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_2", &cpu_AveragePooling_updateGradInput<double,2>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_3", &cpu_AveragePooling_updateGradInput<float,3>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_3", &cpu_AveragePooling_updateGradInput<double,3>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_4", &cpu_AveragePooling_updateGradInput<float,4>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_4", &cpu_AveragePooling_updateGradInput<double,4>, "");
-m.def("cpu_float_Convolution_updateOutput_1", &cpu_Convolution_updateOutput<float,1>, "");
-m.def("cpu_double_Convolution_updateOutput_1", &cpu_Convolution_updateOutput<double,1>, "");
-m.def("cpu_float_Convolution_updateOutput_2", &cpu_Convolution_updateOutput<float,2>, "");
-m.def("cpu_double_Convolution_updateOutput_2", &cpu_Convolution_updateOutput<double,2>, "");
-m.def("cpu_float_Convolution_updateOutput_3", &cpu_Convolution_updateOutput<float,3>, "");
-m.def("cpu_double_Convolution_updateOutput_3", &cpu_Convolution_updateOutput<double,3>, "");
-m.def("cpu_float_Convolution_updateOutput_4", &cpu_Convolution_updateOutput<float,4>, "");
-m.def("cpu_double_Convolution_updateOutput_4", &cpu_Convolution_updateOutput<double,4>, "");
-m.def("cpu_float_Convolution_backward_1", &cpu_Convolution_backward<float,1>, "");
-m.def("cpu_double_Convolution_backward_1", &cpu_Convolution_backward<double,1>, "");
-m.def("cpu_float_Convolution_backward_2", &cpu_Convolution_backward<float,2>, "");
-m.def("cpu_double_Convolution_backward_2", &cpu_Convolution_backward<double,2>, "");
-m.def("cpu_float_Convolution_backward_3", &cpu_Convolution_backward<float,3>, "");
-m.def("cpu_double_Convolution_backward_3", &cpu_Convolution_backward<double,3>, "");
-m.def("cpu_float_Convolution_backward_4", &cpu_Convolution_backward<float,4>, "");
-m.def("cpu_double_Convolution_backward_4", &cpu_Convolution_backward<double,4>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_1", &cpu_RandomizedStrideConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_1", &cpu_RandomizedStrideConvolution_updateOutput<double,1>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_2", &cpu_RandomizedStrideConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_2", &cpu_RandomizedStrideConvolution_updateOutput<double,2>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_3", &cpu_RandomizedStrideConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_3", &cpu_RandomizedStrideConvolution_updateOutput<double,3>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_4", &cpu_RandomizedStrideConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_4", &cpu_RandomizedStrideConvolution_updateOutput<double,4>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_1", &cpu_RandomizedStrideConvolution_backward<float,1>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_1", &cpu_RandomizedStrideConvolution_backward<double,1>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_2", &cpu_RandomizedStrideConvolution_backward<float,2>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_2", &cpu_RandomizedStrideConvolution_backward<double,2>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_3", &cpu_RandomizedStrideConvolution_backward<float,3>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_3", &cpu_RandomizedStrideConvolution_backward<double,3>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_4", &cpu_RandomizedStrideConvolution_backward<float,4>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_4", &cpu_RandomizedStrideConvolution_backward<double,4>, "");
-m.def("cpu_float_Deconvolution_updateOutput_1", &cpu_Deconvolution_updateOutput<float,1>, "");
-m.def("cpu_double_Deconvolution_updateOutput_1", &cpu_Deconvolution_updateOutput<double,1>, "");
-m.def("cpu_float_Deconvolution_updateOutput_2", &cpu_Deconvolution_updateOutput<float,2>, "");
-m.def("cpu_double_Deconvolution_updateOutput_2", &cpu_Deconvolution_updateOutput<double,2>, "");
-m.def("cpu_float_Deconvolution_updateOutput_3", &cpu_Deconvolution_updateOutput<float,3>, "");
-m.def("cpu_double_Deconvolution_updateOutput_3", &cpu_Deconvolution_updateOutput<double,3>, "");
-m.def("cpu_float_Deconvolution_updateOutput_4", &cpu_Deconvolution_updateOutput<float,4>, "");
-m.def("cpu_double_Deconvolution_updateOutput_4", &cpu_Deconvolution_updateOutput<double,4>, "");
-m.def("cpu_float_Deconvolution_backward_1", &cpu_Deconvolution_backward<float,1>, "");
-m.def("cpu_double_Deconvolution_backward_1", &cpu_Deconvolution_backward<double,1>, "");
-m.def("cpu_float_Deconvolution_backward_2", &cpu_Deconvolution_backward<float,2>, "");
-m.def("cpu_double_Deconvolution_backward_2", &cpu_Deconvolution_backward<double,2>, "");
-m.def("cpu_float_Deconvolution_backward_3", &cpu_Deconvolution_backward<float,3>, "");
-m.def("cpu_double_Deconvolution_backward_3", &cpu_Deconvolution_backward<double,3>, "");
-m.def("cpu_float_Deconvolution_backward_4", &cpu_Deconvolution_backward<float,4>, "");
-m.def("cpu_double_Deconvolution_backward_4", &cpu_Deconvolution_backward<double,4>, "");
-m.def("cpu_float_FullConvolution_updateOutput_1", &cpu_FullConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_FullConvolution_updateOutput_1", &cpu_FullConvolution_updateOutput<double,1>, "");
-m.def("cpu_float_FullConvolution_updateOutput_2", &cpu_FullConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_FullConvolution_updateOutput_2", &cpu_FullConvolution_updateOutput<double,2>, "");
-m.def("cpu_float_FullConvolution_updateOutput_3", &cpu_FullConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_FullConvolution_updateOutput_3", &cpu_FullConvolution_updateOutput<double,3>, "");
-m.def("cpu_float_FullConvolution_updateOutput_4", &cpu_FullConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_FullConvolution_updateOutput_4", &cpu_FullConvolution_updateOutput<double,4>, "");
-m.def("cpu_float_FullConvolution_backward_1", &cpu_FullConvolution_backward<float,1>, "");
-m.def("cpu_double_FullConvolution_backward_1", &cpu_FullConvolution_backward<double,1>, "");
-m.def("cpu_float_FullConvolution_backward_2", &cpu_FullConvolution_backward<float,2>, "");
-m.def("cpu_double_FullConvolution_backward_2", &cpu_FullConvolution_backward<double,2>, "");
-m.def("cpu_float_FullConvolution_backward_3", &cpu_FullConvolution_backward<float,3>, "");
-m.def("cpu_double_FullConvolution_backward_3", &cpu_FullConvolution_backward<double,3>, "");
-m.def("cpu_float_FullConvolution_backward_4", &cpu_FullConvolution_backward<float,4>, "");
-m.def("cpu_double_FullConvolution_backward_4", &cpu_FullConvolution_backward<double,4>, "");
-m.def("cpu_float_MaxPooling_updateOutput_1", &cpu_MaxPooling_updateOutput<float,1>, "");
-m.def("cpu_double_MaxPooling_updateOutput_1", &cpu_MaxPooling_updateOutput<double,1>, "");
-m.def("cpu_float_MaxPooling_updateOutput_2", &cpu_MaxPooling_updateOutput<float,2>, "");
-m.def("cpu_double_MaxPooling_updateOutput_2", &cpu_MaxPooling_updateOutput<double,2>, "");
-m.def("cpu_float_MaxPooling_updateOutput_3", &cpu_MaxPooling_updateOutput<float,3>, "");
-m.def("cpu_double_MaxPooling_updateOutput_3", &cpu_MaxPooling_updateOutput<double,3>, "");
-m.def("cpu_float_MaxPooling_updateOutput_4", &cpu_MaxPooling_updateOutput<float,4>, "");
-m.def("cpu_double_MaxPooling_updateOutput_4", &cpu_MaxPooling_updateOutput<double,4>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_1", &cpu_MaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_1", &cpu_MaxPooling_updateGradInput<double,1>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_2", &cpu_MaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_2", &cpu_MaxPooling_updateGradInput<double,2>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_3", &cpu_MaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_3", &cpu_MaxPooling_updateGradInput<double,3>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_4", &cpu_MaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_4", &cpu_MaxPooling_updateGradInput<double,4>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_1", &cpu_RandomizedStrideMaxPooling_updateOutput<float,1>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_1", &cpu_RandomizedStrideMaxPooling_updateOutput<double,1>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_2", &cpu_RandomizedStrideMaxPooling_updateOutput<float,2>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_2", &cpu_RandomizedStrideMaxPooling_updateOutput<double,2>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_3", &cpu_RandomizedStrideMaxPooling_updateOutput<float,3>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_3", &cpu_RandomizedStrideMaxPooling_updateOutput<double,3>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_4", &cpu_RandomizedStrideMaxPooling_updateOutput<float,4>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_4", &cpu_RandomizedStrideMaxPooling_updateOutput<double,4>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_1", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_1", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,1>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_2", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_2", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,2>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_3", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_3", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,3>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_4", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_4", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,4>, "");
-m.def("cpu_float_SparseToDense_updateOutput_1", &cpu_SparseToDense_updateOutput<float,1>, "");
-m.def("cpu_double_SparseToDense_updateOutput_1", &cpu_SparseToDense_updateOutput<double,1>, "");
-m.def("cpu_float_SparseToDense_updateOutput_2", &cpu_SparseToDense_updateOutput<float,2>, "");
-m.def("cpu_double_SparseToDense_updateOutput_2", &cpu_SparseToDense_updateOutput<double,2>, "");
-m.def("cpu_float_SparseToDense_updateOutput_3", &cpu_SparseToDense_updateOutput<float,3>, "");
-m.def("cpu_double_SparseToDense_updateOutput_3", &cpu_SparseToDense_updateOutput<double,3>, "");
-m.def("cpu_float_SparseToDense_updateOutput_4", &cpu_SparseToDense_updateOutput<float,4>, "");
-m.def("cpu_double_SparseToDense_updateOutput_4", &cpu_SparseToDense_updateOutput<double,4>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_1", &cpu_SparseToDense_updateGradInput<float,1>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_1", &cpu_SparseToDense_updateGradInput<double,1>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_2", &cpu_SparseToDense_updateGradInput<float,2>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_2", &cpu_SparseToDense_updateGradInput<double,2>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_3", &cpu_SparseToDense_updateGradInput<float,3>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_3", &cpu_SparseToDense_updateGradInput<double,3>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_4", &cpu_SparseToDense_updateGradInput<float,4>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_4", &cpu_SparseToDense_updateGradInput<double,4>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_1", &cpu_SubmanifoldConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_1", &cpu_SubmanifoldConvolution_updateOutput<double,1>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_2", &cpu_SubmanifoldConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_2", &cpu_SubmanifoldConvolution_updateOutput<double,2>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_3", &cpu_SubmanifoldConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_3", &cpu_SubmanifoldConvolution_updateOutput<double,3>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_4", &cpu_SubmanifoldConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_4", &cpu_SubmanifoldConvolution_updateOutput<double,4>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_1", &cpu_SubmanifoldConvolution_backward<float,1>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_1", &cpu_SubmanifoldConvolution_backward<double,1>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_2", &cpu_SubmanifoldConvolution_backward<float,2>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_2", &cpu_SubmanifoldConvolution_backward<double,2>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_3", &cpu_SubmanifoldConvolution_backward<float,3>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_3", &cpu_SubmanifoldConvolution_backward<double,3>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_4", &cpu_SubmanifoldConvolution_backward<float,4>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_4", &cpu_SubmanifoldConvolution_backward<double,4>, "");
-m.def("cpu_float_InputLayer_updateOutput_1", &cpu_InputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_InputLayer_updateOutput_1", &cpu_InputLayer_updateOutput<double,1>, "");
-m.def("cpu_float_InputLayer_updateOutput_2", &cpu_InputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_InputLayer_updateOutput_2", &cpu_InputLayer_updateOutput<double,2>, "");
-m.def("cpu_float_InputLayer_updateOutput_3", &cpu_InputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_InputLayer_updateOutput_3", &cpu_InputLayer_updateOutput<double,3>, "");
-m.def("cpu_float_InputLayer_updateOutput_4", &cpu_InputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_InputLayer_updateOutput_4", &cpu_InputLayer_updateOutput<double,4>, "");
-m.def("cpu_float_InputLayer_updateGradInput_1", &cpu_InputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_InputLayer_updateGradInput_1", &cpu_InputLayer_updateGradInput<double,1>, "");
-m.def("cpu_float_InputLayer_updateGradInput_2", &cpu_InputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_InputLayer_updateGradInput_2", &cpu_InputLayer_updateGradInput<double,2>, "");
-m.def("cpu_float_InputLayer_updateGradInput_3", &cpu_InputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_InputLayer_updateGradInput_3", &cpu_InputLayer_updateGradInput<double,3>, "");
-m.def("cpu_float_InputLayer_updateGradInput_4", &cpu_InputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_InputLayer_updateGradInput_4", &cpu_InputLayer_updateGradInput<double,4>, "");
-m.def("cpu_float_OutputLayer_updateOutput_1", &cpu_OutputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_OutputLayer_updateOutput_1", &cpu_OutputLayer_updateOutput<double,1>, "");
-m.def("cpu_float_OutputLayer_updateOutput_2", &cpu_OutputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_OutputLayer_updateOutput_2", &cpu_OutputLayer_updateOutput<double,2>, "");
-m.def("cpu_float_OutputLayer_updateOutput_3", &cpu_OutputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_OutputLayer_updateOutput_3", &cpu_OutputLayer_updateOutput<double,3>, "");
-m.def("cpu_float_OutputLayer_updateOutput_4", &cpu_OutputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_OutputLayer_updateOutput_4", &cpu_OutputLayer_updateOutput<double,4>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_1", &cpu_OutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_1", &cpu_OutputLayer_updateGradInput<double,1>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_2", &cpu_OutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_2", &cpu_OutputLayer_updateGradInput<double,2>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_3", &cpu_OutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_3", &cpu_OutputLayer_updateGradInput<double,3>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_4", &cpu_OutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_4", &cpu_OutputLayer_updateGradInput<double,4>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_1", &cpu_BLInputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_1", &cpu_BLInputLayer_updateOutput<double,1>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_2", &cpu_BLInputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_2", &cpu_BLInputLayer_updateOutput<double,2>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_3", &cpu_BLInputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_3", &cpu_BLInputLayer_updateOutput<double,3>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_4", &cpu_BLInputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_4", &cpu_BLInputLayer_updateOutput<double,4>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_1", &cpu_BLInputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_1", &cpu_BLInputLayer_updateGradInput<double,1>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_2", &cpu_BLInputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_2", &cpu_BLInputLayer_updateGradInput<double,2>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_3", &cpu_BLInputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_3", &cpu_BLInputLayer_updateGradInput<double,3>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_4", &cpu_BLInputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_4", &cpu_BLInputLayer_updateGradInput<double,4>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_1", &cpu_BLOutputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_1", &cpu_BLOutputLayer_updateOutput<double,1>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_2", &cpu_BLOutputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_2", &cpu_BLOutputLayer_updateOutput<double,2>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_3", &cpu_BLOutputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_3", &cpu_BLOutputLayer_updateOutput<double,3>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_4", &cpu_BLOutputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_4", &cpu_BLOutputLayer_updateOutput<double,4>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_1", &cpu_BLOutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_1", &cpu_BLOutputLayer_updateGradInput<double,1>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_2", &cpu_BLOutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_2", &cpu_BLOutputLayer_updateGradInput<double,2>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_3", &cpu_BLOutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_3", &cpu_BLOutputLayer_updateGradInput<double,3>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_4", &cpu_BLOutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_4", &cpu_BLOutputLayer_updateGradInput<double,4>, "");
-m.def("cpu_float_UnPooling_updateOutput_1", &cpu_UnPooling_updateOutput<float,1>, "");
-m.def("cpu_double_UnPooling_updateOutput_1", &cpu_UnPooling_updateOutput<double,1>, "");
-m.def("cpu_float_UnPooling_updateOutput_2", &cpu_UnPooling_updateOutput<float,2>, "");
-m.def("cpu_double_UnPooling_updateOutput_2", &cpu_UnPooling_updateOutput<double,2>, "");
-m.def("cpu_float_UnPooling_updateOutput_3", &cpu_UnPooling_updateOutput<float,3>, "");
-m.def("cpu_double_UnPooling_updateOutput_3", &cpu_UnPooling_updateOutput<double,3>, "");
-m.def("cpu_float_UnPooling_updateOutput_4", &cpu_UnPooling_updateOutput<float,4>, "");
-m.def("cpu_double_UnPooling_updateOutput_4", &cpu_UnPooling_updateOutput<double,4>, "");
-m.def("cpu_float_UnPooling_updateGradInput_1", &cpu_UnPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_UnPooling_updateGradInput_1", &cpu_UnPooling_updateGradInput<double,1>, "");
-m.def("cpu_float_UnPooling_updateGradInput_2", &cpu_UnPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_UnPooling_updateGradInput_2", &cpu_UnPooling_updateGradInput<double,2>, "");
-m.def("cpu_float_UnPooling_updateGradInput_3", &cpu_UnPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_UnPooling_updateGradInput_3", &cpu_UnPooling_updateGradInput<double,3>, "");
-m.def("cpu_float_UnPooling_updateGradInput_4", &cpu_UnPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_UnPooling_updateGradInput_4", &cpu_UnPooling_updateGradInput<double,4>, "");
-
-m.def("n_rulebook_bits", []() {return 8*sizeof(Int);}, "");
-}
--- a/sparseconvnet/SCN/pybind_cuda.cpp
+++ b/sparseconvnet/SCN/pybind_cuda.cpp
-
-// Copyright 2016-present, Facebook, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <torch/torch.h>
-
-#include "Metadata/Metadata.h"
-
-template <typename T>
-double cpu_AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor affineWeight,
-                                                     at::Tensor affineBias,
-                                                     at::Tensor convWeight);
-template <typename T>
-void cpu_AffineReluTrivialConvolution_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template <typename T>
-void cpu_BatchNormalization_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalizationInTensor_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cpu_BatchNormalization_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, T leakiness);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template <typename T>
-void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateOutput(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template <typename T>
-void cpu_LeakyReLU_updateGradInput(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template <typename T>
-double cpu_NetworkInNetwork_updateOutput(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template <typename T>
-void cpu_NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template <typename T>
-void cpu_NetworkInNetwork_accGradParameters(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_ActivePooling_updateGradInput(
-    at::Tensor inputSize, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_AveragePooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template <typename T, Int Dimension>
-double cpu_Convolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<Dimension> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_SubmanifoldConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_SubmanifoldConvolution_backward(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_FullConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_FullConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_RandomizedStrideConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cpu_Deconvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cpu_Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template <typename T, Int Dimension>
-void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template <typename T, Int Dimension>
-void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template <typename T, Int Dimension>
-void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<Dimension> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_MaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_RandomizedStrideMaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template <typename T, Int Dimension>
-void cpu_SparseToDense_updateGradInput(at::Tensor inputSize,
-                                       Metadata<Dimension> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cpu_UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<Dimension> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-template <typename T>
-double cuda_AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor affineWeight,
-                                                     at::Tensor affineBias,
-                                                     at::Tensor convWeight);
-template <typename T>
-void cuda_AffineReluTrivialConvolution_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor affineWeight,
-    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
-    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
-template <typename T>
-void cuda_BatchNormalization_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cuda_BatchNormalizationInTensor_updateOutput(
-    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
-    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
-    at::Tensor weight, at::Tensor bias, T eps, T momentum, bool train,
-    T leakiness);
-template <typename T>
-void cuda_BatchNormalization_backward(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor output_features, at::Tensor d_output_features,
-    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
-    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
-    at::Tensor d_weight, at::Tensor d_bias, T leakiness);
-template <typename T>
-void cuda_BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
-                                                     at::Tensor output_features,
-                                                     at::Tensor noise,
-                                                     float alpha);
-template <typename T>
-void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor noise, float alpha);
-template <typename T>
-void cuda_LeakyReLU_updateOutput(at::Tensor input_features,
-                                at::Tensor output_features, float alpha);
-template <typename T>
-void cuda_LeakyReLU_updateGradInput(at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features, float alpha);
-template <typename T>
-double cuda_NetworkInNetwork_updateOutput(at::Tensor input_features,
-                                         at::Tensor output_features,
-                                         at::Tensor weight, at::Tensor bias);
-template <typename T>
-void cuda_NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
-                                          at::Tensor d_output_features,
-                                          at::Tensor weight);
-template <typename T>
-void cuda_NetworkInNetwork_accGradParameters(at::Tensor input_features,
-                                            at::Tensor d_output_features,
-                                            at::Tensor d_weight,
-                                            at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cuda_ActivePooling_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, bool average);
-template <typename T, Int Dimension>
-void cuda_ActivePooling_updateGradInput(
-    at::Tensor inputSize, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features, bool average);
-template <typename T, Int Dimension>
-void cuda_AveragePooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_AveragePooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    long nFeaturesToDrop);
-template <typename T, Int Dimension>
-double cuda_Convolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cuda_Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                              at::Tensor filterSize, at::Tensor filterStride,
-                              Metadata<Dimension> &m, at::Tensor input_features,
-                              at::Tensor d_input_features,
-                              at::Tensor d_output_features, at::Tensor weight,
-                              at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cuda_SubmanifoldConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor output_features, at::Tensor weight,
-    at::Tensor bias);
-template <typename T, Int Dimension>
-void cuda_SubmanifoldConvolution_backward(
-    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
-    at::Tensor input_features, at::Tensor d_input_features,
-    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
-    at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cuda_FullConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cuda_FullConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &mIn,
-    Metadata<Dimension> &mOut, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cuda_RandomizedStrideConvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cuda_RandomizedStrideConvolution_backward(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor d_output_features,
-    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-double cuda_Deconvolution_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
-    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
-template <typename T, Int Dimension>
-void cuda_Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor filterSize, at::Tensor filterStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor d_input_features,
-                                at::Tensor d_output_features, at::Tensor weight,
-                                at::Tensor d_weight, at::Tensor d_bias);
-template <typename T, Int Dimension>
-void cuda_InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
-                                 at::Tensor input_coords,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features, long batchSize,
-                                 long mode);
-template <typename T, Int Dimension>
-void cuda_InputLayer_updateGradInput(Metadata<Dimension> &m,
-                                    at::Tensor d_input_features,
-                                    at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
-                                  at::Tensor input_features,
-                                  at::Tensor output_features);
-template <typename T, Int Dimension>
-void cuda_OutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                     at::Tensor d_input_features,
-                                     at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
-                                   at::Tensor spatialSize,
-                                   at::Tensor input_coords,
-                                   at::Tensor input_features,
-                                   at::Tensor output_features, long mode);
-template <typename T, Int Dimension>
-void cuda_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
-                                      at::Tensor d_input_features,
-                                      at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cuda_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features);
-template <typename T, Int Dimension>
-void cuda_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cuda_MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                 at::Tensor poolSize, at::Tensor poolStride,
-                                 Metadata<Dimension> &m,
-                                 at::Tensor input_features,
-                                 at::Tensor output_features,
-                                 long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_MaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_RandomizedStrideMaxPooling_updateOutput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_RandomizedStrideMaxPooling_updateGradInput(
-    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
-    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
-    at::Tensor d_input_features, at::Tensor output_features,
-    at::Tensor d_output_features, long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_SparseToDense_updateOutput(at::Tensor inputSize,
-                                    Metadata<Dimension> &m,
-                                    at::Tensor input_features,
-                                    at::Tensor output_features, long nPlanes);
-template <typename T, Int Dimension>
-void cuda_SparseToDense_updateGradInput(at::Tensor inputSize,
-                                       Metadata<Dimension> &m,
-                                       at::Tensor input_features,
-                                       at::Tensor d_input_features,
-                                       at::Tensor d_output_features);
-template <typename T, Int Dimension>
-void cuda_UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
-                                at::Tensor poolSize, at::Tensor poolStride,
-                                Metadata<Dimension> &m,
-                                at::Tensor input_features,
-                                at::Tensor output_features,
-                                long nFeaturesToDrop);
-template <typename T, Int Dimension>
-void cuda_UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
-                                   at::Tensor poolSize, at::Tensor poolStride,
-                                   Metadata<Dimension> &m,
-                                   at::Tensor input_features,
-                                   at::Tensor d_input_features,
-                                   at::Tensor d_output_features,
-                                   long nFeaturesToDrop);
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-
-pybind11::class_<Metadata<1>>(m, "Metadata_1")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<1>::clear)
-  .def("setInputSpatialSize", &Metadata<1>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<1>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<1>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<1>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<1>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<1>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<1>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<1>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<1>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<1>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<2>>(m, "Metadata_2")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<2>::clear)
-  .def("setInputSpatialSize", &Metadata<2>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<2>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<2>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<2>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<2>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<2>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<2>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<2>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<2>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<2>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<3>>(m, "Metadata_3")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<3>::clear)
-  .def("setInputSpatialSize", &Metadata<3>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<3>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<3>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<3>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<3>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<3>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<3>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<3>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<3>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<3>::generateRuleBooks2s2);
-
-pybind11::class_<Metadata<4>>(m, "Metadata_4")
-  .def(pybind11::init<>())
-  .def("clear", &Metadata<4>::clear)
-  .def("setInputSpatialSize", &Metadata<4>::setInputSpatialSize)
-  .def("batchAddSample", &Metadata<4>::batchAddSample)
-  .def("setInputSpatialLocation", &Metadata<4>::setInputSpatialLocation)
-  .def("setInputSpatialLocations", &Metadata<4>::setInputSpatialLocations)
-  .def("getSpatialLocations", &Metadata<4>::getSpatialLocations)
-  .def("createMetadataForDenseToSparse", &Metadata<4>::createMetadataForDenseToSparse)
-  .def("sparsifyMetadata", &Metadata<4>::sparsifyMetadata)
-  .def("addSampleFromThresholdedTensor", &Metadata<4>::addSampleFromThresholdedTensor)
-  .def("generateRuleBooks3s2", &Metadata<4>::generateRuleBooks3s2)
-  .def("generateRuleBooks2s2", &Metadata<4>::generateRuleBooks2s2);
-m.def("cpu_float_AffineReluTrivialConvolution_updateOutput", &cpu_AffineReluTrivialConvolution_updateOutput<float>, "");
-m.def("cpu_double_AffineReluTrivialConvolution_updateOutput", &cpu_AffineReluTrivialConvolution_updateOutput<double>, "");
-m.def("cuda_float_AffineReluTrivialConvolution_updateOutput", &cuda_AffineReluTrivialConvolution_updateOutput<float>, "");
-m.def("cpu_float_AffineReluTrivialConvolution_backward", &cpu_AffineReluTrivialConvolution_backward<float>, "");
-m.def("cpu_double_AffineReluTrivialConvolution_backward", &cpu_AffineReluTrivialConvolution_backward<double>, "");
-m.def("cuda_float_AffineReluTrivialConvolution_backward", &cuda_AffineReluTrivialConvolution_backward<float>, "");
-m.def("cpu_float_BatchwiseMultiplicativeDropout_updateOutput", &cpu_BatchwiseMultiplicativeDropout_updateOutput<float>, "");
-m.def("cpu_double_BatchwiseMultiplicativeDropout_updateOutput", &cpu_BatchwiseMultiplicativeDropout_updateOutput<double>, "");
-m.def("cuda_float_BatchwiseMultiplicativeDropout_updateOutput", &cuda_BatchwiseMultiplicativeDropout_updateOutput<float>, "");
-m.def("cpu_float_BatchwiseMultiplicativeDropout_updateGradInput", &cpu_BatchwiseMultiplicativeDropout_updateGradInput<float>, "");
-m.def("cpu_double_BatchwiseMultiplicativeDropout_updateGradInput", &cpu_BatchwiseMultiplicativeDropout_updateGradInput<double>, "");
-m.def("cuda_float_BatchwiseMultiplicativeDropout_updateGradInput", &cuda_BatchwiseMultiplicativeDropout_updateGradInput<float>, "");
-m.def("cpu_float_BatchNormalization_updateOutput", &cpu_BatchNormalization_updateOutput<float>, "");
-m.def("cpu_double_BatchNormalization_updateOutput", &cpu_BatchNormalization_updateOutput<double>, "");
-m.def("cuda_float_BatchNormalization_updateOutput", &cuda_BatchNormalization_updateOutput<float>, "");
-m.def("cpu_float_BatchNormalization_backward", &cpu_BatchNormalization_backward<float>, "");
-m.def("cpu_double_BatchNormalization_backward", &cpu_BatchNormalization_backward<double>, "");
-m.def("cuda_float_BatchNormalization_backward", &cuda_BatchNormalization_backward<float>, "");
-m.def("cpu_float_LeakyReLU_updateOutput", &cpu_LeakyReLU_updateOutput<float>, "");
-m.def("cpu_double_LeakyReLU_updateOutput", &cpu_LeakyReLU_updateOutput<double>, "");
-m.def("cuda_float_LeakyReLU_updateOutput", &cuda_LeakyReLU_updateOutput<float>, "");
-m.def("cpu_float_LeakyReLU_updateGradInput", &cpu_LeakyReLU_updateGradInput<float>, "");
-m.def("cpu_double_LeakyReLU_updateGradInput", &cpu_LeakyReLU_updateGradInput<double>, "");
-m.def("cuda_float_LeakyReLU_updateGradInput", &cuda_LeakyReLU_updateGradInput<float>, "");
-m.def("cpu_float_NetworkInNetwork_updateOutput", &cpu_NetworkInNetwork_updateOutput<float>, "");
-m.def("cpu_double_NetworkInNetwork_updateOutput", &cpu_NetworkInNetwork_updateOutput<double>, "");
-m.def("cuda_float_NetworkInNetwork_updateOutput", &cuda_NetworkInNetwork_updateOutput<float>, "");
-m.def("cpu_float_NetworkInNetwork_updateGradInput", &cpu_NetworkInNetwork_updateGradInput<float>, "");
-m.def("cpu_double_NetworkInNetwork_updateGradInput", &cpu_NetworkInNetwork_updateGradInput<double>, "");
-m.def("cuda_float_NetworkInNetwork_updateGradInput", &cuda_NetworkInNetwork_updateGradInput<float>, "");
-m.def("cpu_float_NetworkInNetwork_accGradParameters", &cpu_NetworkInNetwork_accGradParameters<float>, "");
-m.def("cpu_double_NetworkInNetwork_accGradParameters", &cpu_NetworkInNetwork_accGradParameters<double>, "");
-m.def("cuda_float_NetworkInNetwork_accGradParameters", &cuda_NetworkInNetwork_accGradParameters<float>, "");
-m.def("cpu_float_ActivePooling_updateOutput_1", &cpu_ActivePooling_updateOutput<float,1>, "");
-m.def("cpu_double_ActivePooling_updateOutput_1", &cpu_ActivePooling_updateOutput<double,1>, "");
-m.def("cuda_float_ActivePooling_updateOutput_1", &cuda_ActivePooling_updateOutput<float,1>, "");
-m.def("cpu_float_ActivePooling_updateOutput_2", &cpu_ActivePooling_updateOutput<float,2>, "");
-m.def("cpu_double_ActivePooling_updateOutput_2", &cpu_ActivePooling_updateOutput<double,2>, "");
-m.def("cuda_float_ActivePooling_updateOutput_2", &cuda_ActivePooling_updateOutput<float,2>, "");
-m.def("cpu_float_ActivePooling_updateOutput_3", &cpu_ActivePooling_updateOutput<float,3>, "");
-m.def("cpu_double_ActivePooling_updateOutput_3", &cpu_ActivePooling_updateOutput<double,3>, "");
-m.def("cuda_float_ActivePooling_updateOutput_3", &cuda_ActivePooling_updateOutput<float,3>, "");
-m.def("cpu_float_ActivePooling_updateOutput_4", &cpu_ActivePooling_updateOutput<float,4>, "");
-m.def("cpu_double_ActivePooling_updateOutput_4", &cpu_ActivePooling_updateOutput<double,4>, "");
-m.def("cuda_float_ActivePooling_updateOutput_4", &cuda_ActivePooling_updateOutput<float,4>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_1", &cpu_ActivePooling_updateGradInput<float,1>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_1", &cpu_ActivePooling_updateGradInput<double,1>, "");
-m.def("cuda_float_ActivePooling_updateGradInput_1", &cuda_ActivePooling_updateGradInput<float,1>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_2", &cpu_ActivePooling_updateGradInput<float,2>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_2", &cpu_ActivePooling_updateGradInput<double,2>, "");
-m.def("cuda_float_ActivePooling_updateGradInput_2", &cuda_ActivePooling_updateGradInput<float,2>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_3", &cpu_ActivePooling_updateGradInput<float,3>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_3", &cpu_ActivePooling_updateGradInput<double,3>, "");
-m.def("cuda_float_ActivePooling_updateGradInput_3", &cuda_ActivePooling_updateGradInput<float,3>, "");
-m.def("cpu_float_ActivePooling_updateGradInput_4", &cpu_ActivePooling_updateGradInput<float,4>, "");
-m.def("cpu_double_ActivePooling_updateGradInput_4", &cpu_ActivePooling_updateGradInput<double,4>, "");
-m.def("cuda_float_ActivePooling_updateGradInput_4", &cuda_ActivePooling_updateGradInput<float,4>, "");
-m.def("cpu_float_AveragePooling_updateOutput_1", &cpu_AveragePooling_updateOutput<float,1>, "");
-m.def("cpu_double_AveragePooling_updateOutput_1", &cpu_AveragePooling_updateOutput<double,1>, "");
-m.def("cuda_float_AveragePooling_updateOutput_1", &cuda_AveragePooling_updateOutput<float,1>, "");
-m.def("cpu_float_AveragePooling_updateOutput_2", &cpu_AveragePooling_updateOutput<float,2>, "");
-m.def("cpu_double_AveragePooling_updateOutput_2", &cpu_AveragePooling_updateOutput<double,2>, "");
-m.def("cuda_float_AveragePooling_updateOutput_2", &cuda_AveragePooling_updateOutput<float,2>, "");
-m.def("cpu_float_AveragePooling_updateOutput_3", &cpu_AveragePooling_updateOutput<float,3>, "");
-m.def("cpu_double_AveragePooling_updateOutput_3", &cpu_AveragePooling_updateOutput<double,3>, "");
-m.def("cuda_float_AveragePooling_updateOutput_3", &cuda_AveragePooling_updateOutput<float,3>, "");
-m.def("cpu_float_AveragePooling_updateOutput_4", &cpu_AveragePooling_updateOutput<float,4>, "");
-m.def("cpu_double_AveragePooling_updateOutput_4", &cpu_AveragePooling_updateOutput<double,4>, "");
-m.def("cuda_float_AveragePooling_updateOutput_4", &cuda_AveragePooling_updateOutput<float,4>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_1", &cpu_AveragePooling_updateGradInput<float,1>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_1", &cpu_AveragePooling_updateGradInput<double,1>, "");
-m.def("cuda_float_AveragePooling_updateGradInput_1", &cuda_AveragePooling_updateGradInput<float,1>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_2", &cpu_AveragePooling_updateGradInput<float,2>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_2", &cpu_AveragePooling_updateGradInput<double,2>, "");
-m.def("cuda_float_AveragePooling_updateGradInput_2", &cuda_AveragePooling_updateGradInput<float,2>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_3", &cpu_AveragePooling_updateGradInput<float,3>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_3", &cpu_AveragePooling_updateGradInput<double,3>, "");
-m.def("cuda_float_AveragePooling_updateGradInput_3", &cuda_AveragePooling_updateGradInput<float,3>, "");
-m.def("cpu_float_AveragePooling_updateGradInput_4", &cpu_AveragePooling_updateGradInput<float,4>, "");
-m.def("cpu_double_AveragePooling_updateGradInput_4", &cpu_AveragePooling_updateGradInput<double,4>, "");
-m.def("cuda_float_AveragePooling_updateGradInput_4", &cuda_AveragePooling_updateGradInput<float,4>, "");
-m.def("cpu_float_Convolution_updateOutput_1", &cpu_Convolution_updateOutput<float,1>, "");
-m.def("cpu_double_Convolution_updateOutput_1", &cpu_Convolution_updateOutput<double,1>, "");
-m.def("cuda_float_Convolution_updateOutput_1", &cuda_Convolution_updateOutput<float,1>, "");
-m.def("cpu_float_Convolution_updateOutput_2", &cpu_Convolution_updateOutput<float,2>, "");
-m.def("cpu_double_Convolution_updateOutput_2", &cpu_Convolution_updateOutput<double,2>, "");
-m.def("cuda_float_Convolution_updateOutput_2", &cuda_Convolution_updateOutput<float,2>, "");
-m.def("cpu_float_Convolution_updateOutput_3", &cpu_Convolution_updateOutput<float,3>, "");
-m.def("cpu_double_Convolution_updateOutput_3", &cpu_Convolution_updateOutput<double,3>, "");
-m.def("cuda_float_Convolution_updateOutput_3", &cuda_Convolution_updateOutput<float,3>, "");
-m.def("cpu_float_Convolution_updateOutput_4", &cpu_Convolution_updateOutput<float,4>, "");
-m.def("cpu_double_Convolution_updateOutput_4", &cpu_Convolution_updateOutput<double,4>, "");
-m.def("cuda_float_Convolution_updateOutput_4", &cuda_Convolution_updateOutput<float,4>, "");
-m.def("cpu_float_Convolution_backward_1", &cpu_Convolution_backward<float,1>, "");
-m.def("cpu_double_Convolution_backward_1", &cpu_Convolution_backward<double,1>, "");
-m.def("cuda_float_Convolution_backward_1", &cuda_Convolution_backward<float,1>, "");
-m.def("cpu_float_Convolution_backward_2", &cpu_Convolution_backward<float,2>, "");
-m.def("cpu_double_Convolution_backward_2", &cpu_Convolution_backward<double,2>, "");
-m.def("cuda_float_Convolution_backward_2", &cuda_Convolution_backward<float,2>, "");
-m.def("cpu_float_Convolution_backward_3", &cpu_Convolution_backward<float,3>, "");
-m.def("cpu_double_Convolution_backward_3", &cpu_Convolution_backward<double,3>, "");
-m.def("cuda_float_Convolution_backward_3", &cuda_Convolution_backward<float,3>, "");
-m.def("cpu_float_Convolution_backward_4", &cpu_Convolution_backward<float,4>, "");
-m.def("cpu_double_Convolution_backward_4", &cpu_Convolution_backward<double,4>, "");
-m.def("cuda_float_Convolution_backward_4", &cuda_Convolution_backward<float,4>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_1", &cpu_RandomizedStrideConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_1", &cpu_RandomizedStrideConvolution_updateOutput<double,1>, "");
-m.def("cuda_float_RandomizedStrideConvolution_updateOutput_1", &cuda_RandomizedStrideConvolution_updateOutput<float,1>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_2", &cpu_RandomizedStrideConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_2", &cpu_RandomizedStrideConvolution_updateOutput<double,2>, "");
-m.def("cuda_float_RandomizedStrideConvolution_updateOutput_2", &cuda_RandomizedStrideConvolution_updateOutput<float,2>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_3", &cpu_RandomizedStrideConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_3", &cpu_RandomizedStrideConvolution_updateOutput<double,3>, "");
-m.def("cuda_float_RandomizedStrideConvolution_updateOutput_3", &cuda_RandomizedStrideConvolution_updateOutput<float,3>, "");
-m.def("cpu_float_RandomizedStrideConvolution_updateOutput_4", &cpu_RandomizedStrideConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_RandomizedStrideConvolution_updateOutput_4", &cpu_RandomizedStrideConvolution_updateOutput<double,4>, "");
-m.def("cuda_float_RandomizedStrideConvolution_updateOutput_4", &cuda_RandomizedStrideConvolution_updateOutput<float,4>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_1", &cpu_RandomizedStrideConvolution_backward<float,1>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_1", &cpu_RandomizedStrideConvolution_backward<double,1>, "");
-m.def("cuda_float_RandomizedStrideConvolution_backward_1", &cuda_RandomizedStrideConvolution_backward<float,1>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_2", &cpu_RandomizedStrideConvolution_backward<float,2>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_2", &cpu_RandomizedStrideConvolution_backward<double,2>, "");
-m.def("cuda_float_RandomizedStrideConvolution_backward_2", &cuda_RandomizedStrideConvolution_backward<float,2>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_3", &cpu_RandomizedStrideConvolution_backward<float,3>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_3", &cpu_RandomizedStrideConvolution_backward<double,3>, "");
-m.def("cuda_float_RandomizedStrideConvolution_backward_3", &cuda_RandomizedStrideConvolution_backward<float,3>, "");
-m.def("cpu_float_RandomizedStrideConvolution_backward_4", &cpu_RandomizedStrideConvolution_backward<float,4>, "");
-m.def("cpu_double_RandomizedStrideConvolution_backward_4", &cpu_RandomizedStrideConvolution_backward<double,4>, "");
-m.def("cuda_float_RandomizedStrideConvolution_backward_4", &cuda_RandomizedStrideConvolution_backward<float,4>, "");
-m.def("cpu_float_Deconvolution_updateOutput_1", &cpu_Deconvolution_updateOutput<float,1>, "");
-m.def("cpu_double_Deconvolution_updateOutput_1", &cpu_Deconvolution_updateOutput<double,1>, "");
-m.def("cuda_float_Deconvolution_updateOutput_1", &cuda_Deconvolution_updateOutput<float,1>, "");
-m.def("cpu_float_Deconvolution_updateOutput_2", &cpu_Deconvolution_updateOutput<float,2>, "");
-m.def("cpu_double_Deconvolution_updateOutput_2", &cpu_Deconvolution_updateOutput<double,2>, "");
-m.def("cuda_float_Deconvolution_updateOutput_2", &cuda_Deconvolution_updateOutput<float,2>, "");
-m.def("cpu_float_Deconvolution_updateOutput_3", &cpu_Deconvolution_updateOutput<float,3>, "");
-m.def("cpu_double_Deconvolution_updateOutput_3", &cpu_Deconvolution_updateOutput<double,3>, "");
-m.def("cuda_float_Deconvolution_updateOutput_3", &cuda_Deconvolution_updateOutput<float,3>, "");
-m.def("cpu_float_Deconvolution_updateOutput_4", &cpu_Deconvolution_updateOutput<float,4>, "");
-m.def("cpu_double_Deconvolution_updateOutput_4", &cpu_Deconvolution_updateOutput<double,4>, "");
-m.def("cuda_float_Deconvolution_updateOutput_4", &cuda_Deconvolution_updateOutput<float,4>, "");
-m.def("cpu_float_Deconvolution_backward_1", &cpu_Deconvolution_backward<float,1>, "");
-m.def("cpu_double_Deconvolution_backward_1", &cpu_Deconvolution_backward<double,1>, "");
-m.def("cuda_float_Deconvolution_backward_1", &cuda_Deconvolution_backward<float,1>, "");
-m.def("cpu_float_Deconvolution_backward_2", &cpu_Deconvolution_backward<float,2>, "");
-m.def("cpu_double_Deconvolution_backward_2", &cpu_Deconvolution_backward<double,2>, "");
-m.def("cuda_float_Deconvolution_backward_2", &cuda_Deconvolution_backward<float,2>, "");
-m.def("cpu_float_Deconvolution_backward_3", &cpu_Deconvolution_backward<float,3>, "");
-m.def("cpu_double_Deconvolution_backward_3", &cpu_Deconvolution_backward<double,3>, "");
-m.def("cuda_float_Deconvolution_backward_3", &cuda_Deconvolution_backward<float,3>, "");
-m.def("cpu_float_Deconvolution_backward_4", &cpu_Deconvolution_backward<float,4>, "");
-m.def("cpu_double_Deconvolution_backward_4", &cpu_Deconvolution_backward<double,4>, "");
-m.def("cuda_float_Deconvolution_backward_4", &cuda_Deconvolution_backward<float,4>, "");
-m.def("cpu_float_FullConvolution_updateOutput_1", &cpu_FullConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_FullConvolution_updateOutput_1", &cpu_FullConvolution_updateOutput<double,1>, "");
-m.def("cuda_float_FullConvolution_updateOutput_1", &cuda_FullConvolution_updateOutput<float,1>, "");
-m.def("cpu_float_FullConvolution_updateOutput_2", &cpu_FullConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_FullConvolution_updateOutput_2", &cpu_FullConvolution_updateOutput<double,2>, "");
-m.def("cuda_float_FullConvolution_updateOutput_2", &cuda_FullConvolution_updateOutput<float,2>, "");
-m.def("cpu_float_FullConvolution_updateOutput_3", &cpu_FullConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_FullConvolution_updateOutput_3", &cpu_FullConvolution_updateOutput<double,3>, "");
-m.def("cuda_float_FullConvolution_updateOutput_3", &cuda_FullConvolution_updateOutput<float,3>, "");
-m.def("cpu_float_FullConvolution_updateOutput_4", &cpu_FullConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_FullConvolution_updateOutput_4", &cpu_FullConvolution_updateOutput<double,4>, "");
-m.def("cuda_float_FullConvolution_updateOutput_4", &cuda_FullConvolution_updateOutput<float,4>, "");
-m.def("cpu_float_FullConvolution_backward_1", &cpu_FullConvolution_backward<float,1>, "");
-m.def("cpu_double_FullConvolution_backward_1", &cpu_FullConvolution_backward<double,1>, "");
-m.def("cuda_float_FullConvolution_backward_1", &cuda_FullConvolution_backward<float,1>, "");
-m.def("cpu_float_FullConvolution_backward_2", &cpu_FullConvolution_backward<float,2>, "");
-m.def("cpu_double_FullConvolution_backward_2", &cpu_FullConvolution_backward<double,2>, "");
-m.def("cuda_float_FullConvolution_backward_2", &cuda_FullConvolution_backward<float,2>, "");
-m.def("cpu_float_FullConvolution_backward_3", &cpu_FullConvolution_backward<float,3>, "");
-m.def("cpu_double_FullConvolution_backward_3", &cpu_FullConvolution_backward<double,3>, "");
-m.def("cuda_float_FullConvolution_backward_3", &cuda_FullConvolution_backward<float,3>, "");
-m.def("cpu_float_FullConvolution_backward_4", &cpu_FullConvolution_backward<float,4>, "");
-m.def("cpu_double_FullConvolution_backward_4", &cpu_FullConvolution_backward<double,4>, "");
-m.def("cuda_float_FullConvolution_backward_4", &cuda_FullConvolution_backward<float,4>, "");
-m.def("cpu_float_MaxPooling_updateOutput_1", &cpu_MaxPooling_updateOutput<float,1>, "");
-m.def("cpu_double_MaxPooling_updateOutput_1", &cpu_MaxPooling_updateOutput<double,1>, "");
-m.def("cuda_float_MaxPooling_updateOutput_1", &cuda_MaxPooling_updateOutput<float,1>, "");
-m.def("cpu_float_MaxPooling_updateOutput_2", &cpu_MaxPooling_updateOutput<float,2>, "");
-m.def("cpu_double_MaxPooling_updateOutput_2", &cpu_MaxPooling_updateOutput<double,2>, "");
-m.def("cuda_float_MaxPooling_updateOutput_2", &cuda_MaxPooling_updateOutput<float,2>, "");
-m.def("cpu_float_MaxPooling_updateOutput_3", &cpu_MaxPooling_updateOutput<float,3>, "");
-m.def("cpu_double_MaxPooling_updateOutput_3", &cpu_MaxPooling_updateOutput<double,3>, "");
-m.def("cuda_float_MaxPooling_updateOutput_3", &cuda_MaxPooling_updateOutput<float,3>, "");
-m.def("cpu_float_MaxPooling_updateOutput_4", &cpu_MaxPooling_updateOutput<float,4>, "");
-m.def("cpu_double_MaxPooling_updateOutput_4", &cpu_MaxPooling_updateOutput<double,4>, "");
-m.def("cuda_float_MaxPooling_updateOutput_4", &cuda_MaxPooling_updateOutput<float,4>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_1", &cpu_MaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_1", &cpu_MaxPooling_updateGradInput<double,1>, "");
-m.def("cuda_float_MaxPooling_updateGradInput_1", &cuda_MaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_2", &cpu_MaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_2", &cpu_MaxPooling_updateGradInput<double,2>, "");
-m.def("cuda_float_MaxPooling_updateGradInput_2", &cuda_MaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_3", &cpu_MaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_3", &cpu_MaxPooling_updateGradInput<double,3>, "");
-m.def("cuda_float_MaxPooling_updateGradInput_3", &cuda_MaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_float_MaxPooling_updateGradInput_4", &cpu_MaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_MaxPooling_updateGradInput_4", &cpu_MaxPooling_updateGradInput<double,4>, "");
-m.def("cuda_float_MaxPooling_updateGradInput_4", &cuda_MaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_1", &cpu_RandomizedStrideMaxPooling_updateOutput<float,1>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_1", &cpu_RandomizedStrideMaxPooling_updateOutput<double,1>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateOutput_1", &cuda_RandomizedStrideMaxPooling_updateOutput<float,1>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_2", &cpu_RandomizedStrideMaxPooling_updateOutput<float,2>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_2", &cpu_RandomizedStrideMaxPooling_updateOutput<double,2>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateOutput_2", &cuda_RandomizedStrideMaxPooling_updateOutput<float,2>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_3", &cpu_RandomizedStrideMaxPooling_updateOutput<float,3>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_3", &cpu_RandomizedStrideMaxPooling_updateOutput<double,3>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateOutput_3", &cuda_RandomizedStrideMaxPooling_updateOutput<float,3>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateOutput_4", &cpu_RandomizedStrideMaxPooling_updateOutput<float,4>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateOutput_4", &cpu_RandomizedStrideMaxPooling_updateOutput<double,4>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateOutput_4", &cuda_RandomizedStrideMaxPooling_updateOutput<float,4>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_1", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_1", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,1>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateGradInput_1", &cuda_RandomizedStrideMaxPooling_updateGradInput<float,1>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_2", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_2", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,2>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateGradInput_2", &cuda_RandomizedStrideMaxPooling_updateGradInput<float,2>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_3", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_3", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,3>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateGradInput_3", &cuda_RandomizedStrideMaxPooling_updateGradInput<float,3>, "");
-m.def("cpu_float_RandomizedStrideMaxPooling_updateGradInput_4", &cpu_RandomizedStrideMaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_RandomizedStrideMaxPooling_updateGradInput_4", &cpu_RandomizedStrideMaxPooling_updateGradInput<double,4>, "");
-m.def("cuda_float_RandomizedStrideMaxPooling_updateGradInput_4", &cuda_RandomizedStrideMaxPooling_updateGradInput<float,4>, "");
-m.def("cpu_float_SparseToDense_updateOutput_1", &cpu_SparseToDense_updateOutput<float,1>, "");
-m.def("cpu_double_SparseToDense_updateOutput_1", &cpu_SparseToDense_updateOutput<double,1>, "");
-m.def("cuda_float_SparseToDense_updateOutput_1", &cuda_SparseToDense_updateOutput<float,1>, "");
-m.def("cpu_float_SparseToDense_updateOutput_2", &cpu_SparseToDense_updateOutput<float,2>, "");
-m.def("cpu_double_SparseToDense_updateOutput_2", &cpu_SparseToDense_updateOutput<double,2>, "");
-m.def("cuda_float_SparseToDense_updateOutput_2", &cuda_SparseToDense_updateOutput<float,2>, "");
-m.def("cpu_float_SparseToDense_updateOutput_3", &cpu_SparseToDense_updateOutput<float,3>, "");
-m.def("cpu_double_SparseToDense_updateOutput_3", &cpu_SparseToDense_updateOutput<double,3>, "");
-m.def("cuda_float_SparseToDense_updateOutput_3", &cuda_SparseToDense_updateOutput<float,3>, "");
-m.def("cpu_float_SparseToDense_updateOutput_4", &cpu_SparseToDense_updateOutput<float,4>, "");
-m.def("cpu_double_SparseToDense_updateOutput_4", &cpu_SparseToDense_updateOutput<double,4>, "");
-m.def("cuda_float_SparseToDense_updateOutput_4", &cuda_SparseToDense_updateOutput<float,4>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_1", &cpu_SparseToDense_updateGradInput<float,1>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_1", &cpu_SparseToDense_updateGradInput<double,1>, "");
-m.def("cuda_float_SparseToDense_updateGradInput_1", &cuda_SparseToDense_updateGradInput<float,1>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_2", &cpu_SparseToDense_updateGradInput<float,2>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_2", &cpu_SparseToDense_updateGradInput<double,2>, "");
-m.def("cuda_float_SparseToDense_updateGradInput_2", &cuda_SparseToDense_updateGradInput<float,2>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_3", &cpu_SparseToDense_updateGradInput<float,3>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_3", &cpu_SparseToDense_updateGradInput<double,3>, "");
-m.def("cuda_float_SparseToDense_updateGradInput_3", &cuda_SparseToDense_updateGradInput<float,3>, "");
-m.def("cpu_float_SparseToDense_updateGradInput_4", &cpu_SparseToDense_updateGradInput<float,4>, "");
-m.def("cpu_double_SparseToDense_updateGradInput_4", &cpu_SparseToDense_updateGradInput<double,4>, "");
-m.def("cuda_float_SparseToDense_updateGradInput_4", &cuda_SparseToDense_updateGradInput<float,4>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_1", &cpu_SubmanifoldConvolution_updateOutput<float,1>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_1", &cpu_SubmanifoldConvolution_updateOutput<double,1>, "");
-m.def("cuda_float_SubmanifoldConvolution_updateOutput_1", &cuda_SubmanifoldConvolution_updateOutput<float,1>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_2", &cpu_SubmanifoldConvolution_updateOutput<float,2>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_2", &cpu_SubmanifoldConvolution_updateOutput<double,2>, "");
-m.def("cuda_float_SubmanifoldConvolution_updateOutput_2", &cuda_SubmanifoldConvolution_updateOutput<float,2>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_3", &cpu_SubmanifoldConvolution_updateOutput<float,3>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_3", &cpu_SubmanifoldConvolution_updateOutput<double,3>, "");
-m.def("cuda_float_SubmanifoldConvolution_updateOutput_3", &cuda_SubmanifoldConvolution_updateOutput<float,3>, "");
-m.def("cpu_float_SubmanifoldConvolution_updateOutput_4", &cpu_SubmanifoldConvolution_updateOutput<float,4>, "");
-m.def("cpu_double_SubmanifoldConvolution_updateOutput_4", &cpu_SubmanifoldConvolution_updateOutput<double,4>, "");
-m.def("cuda_float_SubmanifoldConvolution_updateOutput_4", &cuda_SubmanifoldConvolution_updateOutput<float,4>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_1", &cpu_SubmanifoldConvolution_backward<float,1>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_1", &cpu_SubmanifoldConvolution_backward<double,1>, "");
-m.def("cuda_float_SubmanifoldConvolution_backward_1", &cuda_SubmanifoldConvolution_backward<float,1>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_2", &cpu_SubmanifoldConvolution_backward<float,2>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_2", &cpu_SubmanifoldConvolution_backward<double,2>, "");
-m.def("cuda_float_SubmanifoldConvolution_backward_2", &cuda_SubmanifoldConvolution_backward<float,2>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_3", &cpu_SubmanifoldConvolution_backward<float,3>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_3", &cpu_SubmanifoldConvolution_backward<double,3>, "");
-m.def("cuda_float_SubmanifoldConvolution_backward_3", &cuda_SubmanifoldConvolution_backward<float,3>, "");
-m.def("cpu_float_SubmanifoldConvolution_backward_4", &cpu_SubmanifoldConvolution_backward<float,4>, "");
-m.def("cpu_double_SubmanifoldConvolution_backward_4", &cpu_SubmanifoldConvolution_backward<double,4>, "");
-m.def("cuda_float_SubmanifoldConvolution_backward_4", &cuda_SubmanifoldConvolution_backward<float,4>, "");
-m.def("cpu_float_InputLayer_updateOutput_1", &cpu_InputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_InputLayer_updateOutput_1", &cpu_InputLayer_updateOutput<double,1>, "");
-m.def("cuda_float_InputLayer_updateOutput_1", &cuda_InputLayer_updateOutput<float,1>, "");
-m.def("cpu_float_InputLayer_updateOutput_2", &cpu_InputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_InputLayer_updateOutput_2", &cpu_InputLayer_updateOutput<double,2>, "");
-m.def("cuda_float_InputLayer_updateOutput_2", &cuda_InputLayer_updateOutput<float,2>, "");
-m.def("cpu_float_InputLayer_updateOutput_3", &cpu_InputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_InputLayer_updateOutput_3", &cpu_InputLayer_updateOutput<double,3>, "");
-m.def("cuda_float_InputLayer_updateOutput_3", &cuda_InputLayer_updateOutput<float,3>, "");
-m.def("cpu_float_InputLayer_updateOutput_4", &cpu_InputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_InputLayer_updateOutput_4", &cpu_InputLayer_updateOutput<double,4>, "");
-m.def("cuda_float_InputLayer_updateOutput_4", &cuda_InputLayer_updateOutput<float,4>, "");
-m.def("cpu_float_InputLayer_updateGradInput_1", &cpu_InputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_InputLayer_updateGradInput_1", &cpu_InputLayer_updateGradInput<double,1>, "");
-m.def("cuda_float_InputLayer_updateGradInput_1", &cuda_InputLayer_updateGradInput<float,1>, "");
-m.def("cpu_float_InputLayer_updateGradInput_2", &cpu_InputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_InputLayer_updateGradInput_2", &cpu_InputLayer_updateGradInput<double,2>, "");
-m.def("cuda_float_InputLayer_updateGradInput_2", &cuda_InputLayer_updateGradInput<float,2>, "");
-m.def("cpu_float_InputLayer_updateGradInput_3", &cpu_InputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_InputLayer_updateGradInput_3", &cpu_InputLayer_updateGradInput<double,3>, "");
-m.def("cuda_float_InputLayer_updateGradInput_3", &cuda_InputLayer_updateGradInput<float,3>, "");
-m.def("cpu_float_InputLayer_updateGradInput_4", &cpu_InputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_InputLayer_updateGradInput_4", &cpu_InputLayer_updateGradInput<double,4>, "");
-m.def("cuda_float_InputLayer_updateGradInput_4", &cuda_InputLayer_updateGradInput<float,4>, "");
-m.def("cpu_float_OutputLayer_updateOutput_1", &cpu_OutputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_OutputLayer_updateOutput_1", &cpu_OutputLayer_updateOutput<double,1>, "");
-m.def("cuda_float_OutputLayer_updateOutput_1", &cuda_OutputLayer_updateOutput<float,1>, "");
-m.def("cpu_float_OutputLayer_updateOutput_2", &cpu_OutputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_OutputLayer_updateOutput_2", &cpu_OutputLayer_updateOutput<double,2>, "");
-m.def("cuda_float_OutputLayer_updateOutput_2", &cuda_OutputLayer_updateOutput<float,2>, "");
-m.def("cpu_float_OutputLayer_updateOutput_3", &cpu_OutputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_OutputLayer_updateOutput_3", &cpu_OutputLayer_updateOutput<double,3>, "");
-m.def("cuda_float_OutputLayer_updateOutput_3", &cuda_OutputLayer_updateOutput<float,3>, "");
-m.def("cpu_float_OutputLayer_updateOutput_4", &cpu_OutputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_OutputLayer_updateOutput_4", &cpu_OutputLayer_updateOutput<double,4>, "");
-m.def("cuda_float_OutputLayer_updateOutput_4", &cuda_OutputLayer_updateOutput<float,4>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_1", &cpu_OutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_1", &cpu_OutputLayer_updateGradInput<double,1>, "");
-m.def("cuda_float_OutputLayer_updateGradInput_1", &cuda_OutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_2", &cpu_OutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_2", &cpu_OutputLayer_updateGradInput<double,2>, "");
-m.def("cuda_float_OutputLayer_updateGradInput_2", &cuda_OutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_3", &cpu_OutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_3", &cpu_OutputLayer_updateGradInput<double,3>, "");
-m.def("cuda_float_OutputLayer_updateGradInput_3", &cuda_OutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_float_OutputLayer_updateGradInput_4", &cpu_OutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_OutputLayer_updateGradInput_4", &cpu_OutputLayer_updateGradInput<double,4>, "");
-m.def("cuda_float_OutputLayer_updateGradInput_4", &cuda_OutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_1", &cpu_BLInputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_1", &cpu_BLInputLayer_updateOutput<double,1>, "");
-m.def("cuda_float_BLInputLayer_updateOutput_1", &cuda_BLInputLayer_updateOutput<float,1>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_2", &cpu_BLInputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_2", &cpu_BLInputLayer_updateOutput<double,2>, "");
-m.def("cuda_float_BLInputLayer_updateOutput_2", &cuda_BLInputLayer_updateOutput<float,2>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_3", &cpu_BLInputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_3", &cpu_BLInputLayer_updateOutput<double,3>, "");
-m.def("cuda_float_BLInputLayer_updateOutput_3", &cuda_BLInputLayer_updateOutput<float,3>, "");
-m.def("cpu_float_BLInputLayer_updateOutput_4", &cpu_BLInputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_BLInputLayer_updateOutput_4", &cpu_BLInputLayer_updateOutput<double,4>, "");
-m.def("cuda_float_BLInputLayer_updateOutput_4", &cuda_BLInputLayer_updateOutput<float,4>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_1", &cpu_BLInputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_1", &cpu_BLInputLayer_updateGradInput<double,1>, "");
-m.def("cuda_float_BLInputLayer_updateGradInput_1", &cuda_BLInputLayer_updateGradInput<float,1>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_2", &cpu_BLInputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_2", &cpu_BLInputLayer_updateGradInput<double,2>, "");
-m.def("cuda_float_BLInputLayer_updateGradInput_2", &cuda_BLInputLayer_updateGradInput<float,2>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_3", &cpu_BLInputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_3", &cpu_BLInputLayer_updateGradInput<double,3>, "");
-m.def("cuda_float_BLInputLayer_updateGradInput_3", &cuda_BLInputLayer_updateGradInput<float,3>, "");
-m.def("cpu_float_BLInputLayer_updateGradInput_4", &cpu_BLInputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_BLInputLayer_updateGradInput_4", &cpu_BLInputLayer_updateGradInput<double,4>, "");
-m.def("cuda_float_BLInputLayer_updateGradInput_4", &cuda_BLInputLayer_updateGradInput<float,4>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_1", &cpu_BLOutputLayer_updateOutput<float,1>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_1", &cpu_BLOutputLayer_updateOutput<double,1>, "");
-m.def("cuda_float_BLOutputLayer_updateOutput_1", &cuda_BLOutputLayer_updateOutput<float,1>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_2", &cpu_BLOutputLayer_updateOutput<float,2>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_2", &cpu_BLOutputLayer_updateOutput<double,2>, "");
-m.def("cuda_float_BLOutputLayer_updateOutput_2", &cuda_BLOutputLayer_updateOutput<float,2>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_3", &cpu_BLOutputLayer_updateOutput<float,3>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_3", &cpu_BLOutputLayer_updateOutput<double,3>, "");
-m.def("cuda_float_BLOutputLayer_updateOutput_3", &cuda_BLOutputLayer_updateOutput<float,3>, "");
-m.def("cpu_float_BLOutputLayer_updateOutput_4", &cpu_BLOutputLayer_updateOutput<float,4>, "");
-m.def("cpu_double_BLOutputLayer_updateOutput_4", &cpu_BLOutputLayer_updateOutput<double,4>, "");
-m.def("cuda_float_BLOutputLayer_updateOutput_4", &cuda_BLOutputLayer_updateOutput<float,4>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_1", &cpu_BLOutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_1", &cpu_BLOutputLayer_updateGradInput<double,1>, "");
-m.def("cuda_float_BLOutputLayer_updateGradInput_1", &cuda_BLOutputLayer_updateGradInput<float,1>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_2", &cpu_BLOutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_2", &cpu_BLOutputLayer_updateGradInput<double,2>, "");
-m.def("cuda_float_BLOutputLayer_updateGradInput_2", &cuda_BLOutputLayer_updateGradInput<float,2>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_3", &cpu_BLOutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_3", &cpu_BLOutputLayer_updateGradInput<double,3>, "");
-m.def("cuda_float_BLOutputLayer_updateGradInput_3", &cuda_BLOutputLayer_updateGradInput<float,3>, "");
-m.def("cpu_float_BLOutputLayer_updateGradInput_4", &cpu_BLOutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_double_BLOutputLayer_updateGradInput_4", &cpu_BLOutputLayer_updateGradInput<double,4>, "");
-m.def("cuda_float_BLOutputLayer_updateGradInput_4", &cuda_BLOutputLayer_updateGradInput<float,4>, "");
-m.def("cpu_float_UnPooling_updateOutput_1", &cpu_UnPooling_updateOutput<float,1>, "");
-m.def("cpu_double_UnPooling_updateOutput_1", &cpu_UnPooling_updateOutput<double,1>, "");
-m.def("cuda_float_UnPooling_updateOutput_1", &cuda_UnPooling_updateOutput<float,1>, "");
-m.def("cpu_float_UnPooling_updateOutput_2", &cpu_UnPooling_updateOutput<float,2>, "");
-m.def("cpu_double_UnPooling_updateOutput_2", &cpu_UnPooling_updateOutput<double,2>, "");
-m.def("cuda_float_UnPooling_updateOutput_2", &cuda_UnPooling_updateOutput<float,2>, "");
-m.def("cpu_float_UnPooling_updateOutput_3", &cpu_UnPooling_updateOutput<float,3>, "");
-m.def("cpu_double_UnPooling_updateOutput_3", &cpu_UnPooling_updateOutput<double,3>, "");
-m.def("cuda_float_UnPooling_updateOutput_3", &cuda_UnPooling_updateOutput<float,3>, "");
-m.def("cpu_float_UnPooling_updateOutput_4", &cpu_UnPooling_updateOutput<float,4>, "");
-m.def("cpu_double_UnPooling_updateOutput_4", &cpu_UnPooling_updateOutput<double,4>, "");
-m.def("cuda_float_UnPooling_updateOutput_4", &cuda_UnPooling_updateOutput<float,4>, "");
-m.def("cpu_float_UnPooling_updateGradInput_1", &cpu_UnPooling_updateGradInput<float,1>, "");
-m.def("cpu_double_UnPooling_updateGradInput_1", &cpu_UnPooling_updateGradInput<double,1>, "");
-m.def("cuda_float_UnPooling_updateGradInput_1", &cuda_UnPooling_updateGradInput<float,1>, "");
-m.def("cpu_float_UnPooling_updateGradInput_2", &cpu_UnPooling_updateGradInput<float,2>, "");
-m.def("cpu_double_UnPooling_updateGradInput_2", &cpu_UnPooling_updateGradInput<double,2>, "");
-m.def("cuda_float_UnPooling_updateGradInput_2", &cuda_UnPooling_updateGradInput<float,2>, "");
-m.def("cpu_float_UnPooling_updateGradInput_3", &cpu_UnPooling_updateGradInput<float,3>, "");
-m.def("cpu_double_UnPooling_updateGradInput_3", &cpu_UnPooling_updateGradInput<double,3>, "");
-m.def("cuda_float_UnPooling_updateGradInput_3", &cuda_UnPooling_updateGradInput<float,3>, "");
-m.def("cpu_float_UnPooling_updateGradInput_4", &cpu_UnPooling_updateGradInput<float,4>, "");
-m.def("cpu_double_UnPooling_updateGradInput_4", &cpu_UnPooling_updateGradInput<double,4>, "");
-m.def("cuda_float_UnPooling_updateGradInput_4", &cuda_UnPooling_updateGradInput<float,4>, "");
-
-m.def("n_rulebook_bits", []() {return 8*sizeof(Int);}, "");
-}
--- a/sparseconvnet/SCN/sparseconvnet.h
+++ b/sparseconvnet/SCN/sparseconvnet.h
+#include "Metadata/Metadata.h"
+
+double AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor affineWeight,
+                                                 at::Tensor affineBias,
+                                                 at::Tensor convWeight);
+
+void AffineReluTrivialConvolution_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor affineWeight,
+    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
+    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad);
+
+void BatchNormalization_updateOutput(
+    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
+    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
+    at::Tensor weight, at::Tensor bias, double eps, double momentum, bool train,
+    double leakiness);
+
+void BatchNormalization_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor output_features, at::Tensor d_output_features,
+    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
+    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
+    at::Tensor d_weight, at::Tensor d_bias, double leakiness);
+
+void BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor noise,
+                                                 double alpha);
+
+void BatchwiseMultiplicativeDropout_updateGradInput(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor noise, double alpha);
+
+void LeakyReLU_updateOutput(at::Tensor input_features,
+                            at::Tensor output_features, double alpha);
+
+void LeakyReLU_updateGradInput(at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features, double alpha);
+
+double NetworkInNetwork_updateOutput(at::Tensor input_features,
+                                     at::Tensor output_features,
+                                     at::Tensor weight, at::Tensor bias);
+
+void NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
+                                      at::Tensor d_output_features,
+                                      at::Tensor weight);
+
+void NetworkInNetwork_accGradParameters(at::Tensor input_features,
+                                        at::Tensor d_output_features,
+                                        at::Tensor d_weight, at::Tensor d_bias);
+template <Int Dimension>
+void ActivePooling_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, bool average);
+template <Int Dimension>
+void ActivePooling_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features, bool average);
+template <Int Dimension>
+void AveragePooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                 at::Tensor poolSize, at::Tensor poolStride,
+                                 Metadata<Dimension> &m,
+                                 at::Tensor input_features,
+                                 at::Tensor output_features,
+                                 long nFeaturesToDrop);
+template <Int Dimension>
+void AveragePooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                                    at::Tensor poolSize, at::Tensor poolStride,
+                                    Metadata<Dimension> &m,
+                                    at::Tensor input_features,
+                                    at::Tensor d_input_features,
+                                    at::Tensor d_output_features,
+                                    long nFeaturesToDrop);
+template <Int Dimension>
+double Convolution_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                at::Tensor filterSize, at::Tensor filterStride,
+                                Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, at::Tensor weight,
+                                at::Tensor bias);
+template <Int Dimension>
+void Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                          at::Tensor filterSize, at::Tensor filterStride,
+                          Metadata<Dimension> &m, at::Tensor input_features,
+                          at::Tensor d_input_features,
+                          at::Tensor d_output_features, at::Tensor weight,
+                          at::Tensor d_weight, at::Tensor d_bias);
+template <Int Dimension>
+double SubmanifoldConvolution_updateOutput(at::Tensor inputSize,
+                                           at::Tensor filterSize,
+                                           Metadata<Dimension> &m,
+                                           at::Tensor input_features,
+                                           at::Tensor output_features,
+                                           at::Tensor weight, at::Tensor bias);
+template <Int Dimension>
+void SubmanifoldConvolution_backward(
+    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
+    at::Tensor d_bias);
+template <Int Dimension>
+double FullConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &mIn,
+    Metadata<Dimension> &mOut, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
+template <Int Dimension>
+void FullConvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                              at::Tensor filterSize, at::Tensor filterStride,
+                              Metadata<Dimension> &mIn,
+                              Metadata<Dimension> &mOut,
+                              at::Tensor input_features,
+                              at::Tensor d_input_features,
+                              at::Tensor d_output_features, at::Tensor weight,
+                              at::Tensor d_weight, at::Tensor d_bias);
+template <Int Dimension>
+double RandomizedStrideConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
+template <Int Dimension>
+void RandomizedStrideConvolution_backward(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor d_output_features,
+    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);
+template <Int Dimension>
+double Deconvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias);
+template <Int Dimension>
+void Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor filterSize, at::Tensor filterStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor d_input_features,
+                            at::Tensor d_output_features, at::Tensor weight,
+                            at::Tensor d_weight, at::Tensor d_bias);
+template <Int Dimension>
+void InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                             at::Tensor input_coords, at::Tensor input_features,
+                             at::Tensor output_features, long batchSize,
+                             long mode);
+template <Int Dimension>
+void InputLayer_updateGradInput(Metadata<Dimension> &m,
+                                at::Tensor d_input_features,
+                                at::Tensor d_output_features);
+template <Int Dimension>
+void OutputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor input_features,
+                              at::Tensor output_features);
+template <Int Dimension>
+void OutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                 at::Tensor d_input_features,
+                                 at::Tensor d_output_features);
+template <Int Dimension>
+void BLInputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                               at::Tensor input_coords,
+                               at::Tensor input_features,
+                               at::Tensor output_features, long mode);
+template <Int Dimension>
+void BLInputLayer_updateGradInput(Metadata<Dimension> &m,
+                                  at::Tensor d_input_features,
+                                  at::Tensor d_output_features);
+template <Int Dimension>
+void BLOutputLayer_updateOutput(Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features);
+template <Int Dimension>
+void BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features);
+template <Int Dimension>
+void MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                             at::Tensor poolSize, at::Tensor poolStride,
+                             Metadata<Dimension> &m, at::Tensor input_features,
+                             at::Tensor output_features, long nFeaturesToDrop);
+template <Int Dimension>
+void MaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop);
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, long nFeaturesToDrop);
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop);
+template <Int Dimension>
+void SparseToDense_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, long nPlanes);
+template <Int Dimension>
+void SparseToDense_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features);
+template <Int Dimension>
+void UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor poolSize, at::Tensor poolStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor output_features, long nFeaturesToDrop);
+template <Int Dimension>
+void UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                               at::Tensor poolSize, at::Tensor poolStride,
+                               Metadata<Dimension> &m,
+                               at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features,
+                               long nFeaturesToDrop);
--- a/sparseconvnet/SCN/sparseconvnet_cpu.cpp
+++ b/sparseconvnet/SCN/sparseconvnet_cpu.cpp
+// Copyright 2016-present, Facebook, Inc.
+// All rights reserved.
+//
+// This source code is licensed under the license found in the
+// LICENSE file in the root directory of this source tree.
+
+#define ENABLE_OPENMP YES
+#if defined(ENABLE_OPENMP)
+#include <omp.h>
+#endif
+
+#include <torch/torch.h>
+
+#include "Metadata/Metadata.cpp"
+template class Metadata<1>;
+template class Metadata<2>;
+template class Metadata<3>;
+template class Metadata<4>;
+template class Metadata<5>;
+template class Metadata<6>;
+
+#include "CPU/ActivePooling.cpp"
+#include "CPU/AffineReluTrivialConvolution.cpp"
+#include "CPU/AveragePooling.cpp"
+#include "CPU/BatchNormalization.cpp"
+#include "CPU/BatchwiseMultiplicativeDropout.cpp"
+#include "CPU/Convolution.cpp"
+#include "CPU/Deconvolution.cpp"
+#include "CPU/IOLayers.cpp"
+#include "CPU/LeakyReLU.cpp"
+#include "CPU/MaxPooling.cpp"
+#include "CPU/NetworkInNetwork.cpp"
+#include "CPU/SparseToDense.cpp"
+#include "CPU/UnPooling.cpp"
+
+double AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor affineWeight,
+                                                 at::Tensor affineBias,
+                                                 at::Tensor convWeight) {
+  return cpu_AffineReluTrivialConvolution_updateOutput<float>(
+      input_features, output_features, affineWeight, affineBias, convWeight);
+}
+
+void AffineReluTrivialConvolution_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor affineWeight,
+    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
+    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad) {
+  cpu_AffineReluTrivialConvolution_backward<float>(
+      input_features, d_input_features, d_output_features, affineWeight,
+      d_affineWeight, affineBias, d_affineBias, convWeight, d_convWeight,
+      additiveGrad);
+}
+
+void BatchNormalization_updateOutput(
+    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
+    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
+    at::Tensor weight, at::Tensor bias, double eps, double momentum, bool train,
+    double leakiness) {
+  cpu_BatchNormalization_updateOutput<float>(
+      input_features, output_features, saveMean, saveInvStd, runningMean,
+      runningVar, weight, bias, eps, momentum, train, leakiness);
+}
+
+void BatchNormalization_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor output_features, at::Tensor d_output_features,
+    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
+    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
+    at::Tensor d_weight, at::Tensor d_bias, double leakiness) {
+  cpu_BatchNormalization_backward<float>(
+      input_features, d_input_features, output_features, d_output_features,
+      saveMean, saveInvStd, runningMean, runningVar, weight, bias, d_weight,
+      d_bias, leakiness);
+}
+
+void BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor noise,
+                                                 double alpha) {
+  cpu_BatchwiseMultiplicativeDropout_updateOutput<float>(
+      input_features, output_features, noise, alpha);
+}
+
+void BatchwiseMultiplicativeDropout_updateGradInput(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor noise, double alpha) {
+  cpu_BatchwiseMultiplicativeDropout_updateGradInput<float>(
+      input_features, d_input_features, d_output_features, noise, alpha);
+}
+
+void LeakyReLU_updateOutput(at::Tensor input_features,
+                            at::Tensor output_features, double alpha) {
+  cpu_LeakyReLU_updateOutput<float>(input_features, output_features, alpha);
+}
+
+void LeakyReLU_updateGradInput(at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features, double alpha) {
+  cpu_LeakyReLU_updateGradInput<float>(input_features, d_input_features,
+                                       d_output_features, alpha);
+}
+
+double NetworkInNetwork_updateOutput(at::Tensor input_features,
+                                     at::Tensor output_features,
+                                     at::Tensor weight, at::Tensor bias) {
+  return cpu_NetworkInNetwork_updateOutput<float>(
+      input_features, output_features, weight, bias);
+}
+
+void NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
+                                      at::Tensor d_output_features,
+                                      at::Tensor weight) {
+  cpu_NetworkInNetwork_updateGradInput<float>(d_input_features,
+                                              d_output_features, weight);
+}
+
+void NetworkInNetwork_accGradParameters(at::Tensor input_features,
+                                        at::Tensor d_output_features,
+                                        at::Tensor d_weight,
+                                        at::Tensor d_bias) {
+  cpu_NetworkInNetwork_accGradParameters<float>(
+      input_features, d_output_features, d_weight, d_bias);
+}
+template <Int Dimension>
+void ActivePooling_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, bool average) {
+  cpu_ActivePooling_updateOutput<float, Dimension>(inputSize, m, input_features,
+                                                   output_features, average);
+}
+
+template <Int Dimension>
+void ActivePooling_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features, bool average) {
+  return cpu_ActivePooling_updateGradInput<float, Dimension>(
+      inputSize, m, input_features, d_input_features, d_output_features,
+      average);
+}
+template <Int Dimension>
+void AveragePooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                 at::Tensor poolSize, at::Tensor poolStride,
+                                 Metadata<Dimension> &m,
+                                 at::Tensor input_features,
+                                 at::Tensor output_features,
+                                 long nFeaturesToDrop) {
+  cpu_AveragePooling_updateOutput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void AveragePooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                                    at::Tensor poolSize, at::Tensor poolStride,
+                                    Metadata<Dimension> &m,
+                                    at::Tensor input_features,
+                                    at::Tensor d_input_features,
+                                    at::Tensor d_output_features,
+                                    long nFeaturesToDrop) {
+  cpu_AveragePooling_updateGradInput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      d_input_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+double Convolution_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                at::Tensor filterSize, at::Tensor filterStride,
+                                Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, at::Tensor weight,
+                                at::Tensor bias) {
+  return cpu_Convolution_updateOutput<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      output_features, weight, bias);
+}
+template <Int Dimension>
+void Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                          at::Tensor filterSize, at::Tensor filterStride,
+                          Metadata<Dimension> &m, at::Tensor input_features,
+                          at::Tensor d_input_features,
+                          at::Tensor d_output_features, at::Tensor weight,
+                          at::Tensor d_weight, at::Tensor d_bias) {
+  cpu_Convolution_backward<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double SubmanifoldConvolution_updateOutput(at::Tensor inputSize,
+                                           at::Tensor filterSize,
+                                           Metadata<Dimension> &m,
+                                           at::Tensor input_features,
+                                           at::Tensor output_features,
+                                           at::Tensor weight, at::Tensor bias) {
+  return cpu_SubmanifoldConvolution_updateOutput<float, Dimension>(
+      inputSize, filterSize, m, input_features, output_features, weight, bias);
+}
+template <Int Dimension>
+void SubmanifoldConvolution_backward(
+    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
+    at::Tensor d_bias) {
+  cpu_SubmanifoldConvolution_backward<float, Dimension>(
+      inputSize, filterSize, m, input_features, d_input_features,
+      d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double FullConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &mIn,
+    Metadata<Dimension> &mOut, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  return cpu_FullConvolution_updateOutput<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+      input_features, output_features, weight, bias);
+}
+template <Int Dimension>
+void FullConvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                              at::Tensor filterSize, at::Tensor filterStride,
+                              Metadata<Dimension> &mIn,
+                              Metadata<Dimension> &mOut,
+                              at::Tensor input_features,
+                              at::Tensor d_input_features,
+                              at::Tensor d_output_features, at::Tensor weight,
+                              at::Tensor d_weight, at::Tensor d_bias) {
+  cpu_FullConvolution_backward<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+      input_features, d_input_features, d_output_features, weight, d_weight,
+      d_bias);
+}
+template <Int Dimension>
+double RandomizedStrideConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  return cpu_RandomizedStrideConvolution_updateOutput<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      output_features, weight, bias);
+}
+template <Int Dimension>
+void RandomizedStrideConvolution_backward(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor d_output_features,
+    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias) {
+  cpu_RandomizedStrideConvolution_backward<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double Deconvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  return cpu_Deconvolution_updateOutput<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      output_features, weight, bias);
+}
+template <Int Dimension>
+void Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor filterSize, at::Tensor filterStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor d_input_features,
+                            at::Tensor d_output_features, at::Tensor weight,
+                            at::Tensor d_weight, at::Tensor d_bias) {
+  cpu_Deconvolution_backward<float, Dimension>(
+      inputSize, outputSize, filterSize, filterStride, m, input_features,
+      d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+void InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                             at::Tensor input_coords, at::Tensor input_features,
+                             at::Tensor output_features, long batchSize,
+                             long mode) {
+  cpu_InputLayer_updateOutput<float, Dimension>(m, spatialSize, input_coords,
+                                                input_features, output_features,
+                                                batchSize, mode);
+}
+template <Int Dimension>
+void InputLayer_updateGradInput(Metadata<Dimension> &m,
+                                at::Tensor d_input_features,
+                                at::Tensor d_output_features) {
+  cpu_InputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                   d_output_features);
+}
+template <Int Dimension>
+void OutputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor input_features,
+                              at::Tensor output_features) {
+  cpu_OutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                 output_features);
+}
+template <Int Dimension>
+void OutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                 at::Tensor d_input_features,
+                                 at::Tensor d_output_features) {
+  cpu_OutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                    d_output_features);
+}
+template <Int Dimension>
+void BLInputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                               at::Tensor input_coords,
+                               at::Tensor input_features,
+                               at::Tensor output_features, long mode) {
+  cpu_BLInputLayer_updateOutput<float, Dimension>(
+      m, spatialSize, input_coords, input_features, output_features, mode);
+}
+template <Int Dimension>
+void BLInputLayer_updateGradInput(Metadata<Dimension> &m,
+                                  at::Tensor d_input_features,
+                                  at::Tensor d_output_features) {
+  cpu_BLInputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                     d_output_features);
+}
+template <Int Dimension>
+void BLOutputLayer_updateOutput(Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features) {
+  cpu_BLOutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                   output_features);
+}
+template <Int Dimension>
+void BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features) {
+  cpu_BLOutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                      d_output_features);
+}
+template <Int Dimension>
+void MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                             at::Tensor poolSize, at::Tensor poolStride,
+                             Metadata<Dimension> &m, at::Tensor input_features,
+                             at::Tensor output_features, long nFeaturesToDrop) {
+  cpu_MaxPooling_updateOutput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void MaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop) {
+  cpu_MaxPooling_updateGradInput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      d_input_features, output_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, long nFeaturesToDrop) {
+  cpu_RandomizedStrideMaxPooling_updateOutput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop) {
+  cpu_RandomizedStrideMaxPooling_updateGradInput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      d_input_features, output_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void SparseToDense_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, long nPlanes) {
+  cpu_SparseToDense_updateOutput<float, Dimension>(inputSize, m, input_features,
+                                                   output_features, nPlanes);
+}
+template <Int Dimension>
+void SparseToDense_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features) {
+  cpu_SparseToDense_updateGradInput<float, Dimension>(
+      inputSize, m, input_features, d_input_features, d_output_features);
+}
+template <Int Dimension>
+void UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor poolSize, at::Tensor poolStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor output_features, long nFeaturesToDrop) {
+  cpu_UnPooling_updateOutput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                               at::Tensor poolSize, at::Tensor poolStride,
+                               Metadata<Dimension> &m,
+                               at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features,
+                               long nFeaturesToDrop) {
+  cpu_UnPooling_updateGradInput<float, Dimension>(
+      inputSize, outputSize, poolSize, poolStride, m, input_features,
+      d_input_features, d_output_features, nFeaturesToDrop);
+}
+
+#define FOO                                                                    \
+  template void ActivePooling_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor output_features, bool average);    \
+  template void ActivePooling_updateGradInput<DIMENSION>(                      \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, bool average);                             \
+  template void AveragePooling_updateOutput<DIMENSION>(                        \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void AveragePooling_updateGradInput<DIMENSION>(                     \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, long nFeaturesToDrop);                     \
+  template double Convolution_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void Convolution_backward<DIMENSION>(                               \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double SubmanifoldConvolution_updateOutput<DIMENSION>(              \
+      at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> & m,    \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void SubmanifoldConvolution_backward<DIMENSION>(                    \
+      at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> & m,    \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double FullConvolution_updateOutput<DIMENSION>(                     \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & mIn,                      \
+      Metadata<DIMENSION> & mOut, at::Tensor input_features,                   \
+      at::Tensor output_features, at::Tensor weight, at::Tensor bias);         \
+  template void FullConvolution_backward<DIMENSION>(                           \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & mIn,                      \
+      Metadata<DIMENSION> & mOut, at::Tensor input_features,                   \
+      at::Tensor d_input_features, at::Tensor d_output_features,               \
+      at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);              \
+  template double RandomizedStrideConvolution_updateOutput<DIMENSION>(         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void RandomizedStrideConvolution_backward<DIMENSION>(               \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double Deconvolution_updateOutput<DIMENSION>(                       \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void Deconvolution_backward<DIMENSION>(                             \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template void InputLayer_updateOutput<DIMENSION>(                            \
+      Metadata<DIMENSION> & m, at::Tensor spatialSize,                         \
+      at::Tensor input_coords, at::Tensor input_features,                      \
+      at::Tensor output_features, long batchSize, long mode);                  \
+  template void InputLayer_updateGradInput<DIMENSION>(                         \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void OutputLayer_updateOutput<DIMENSION>(                           \
+      Metadata<DIMENSION> & m, at::Tensor input_features,                      \
+      at::Tensor output_features);                                             \
+  template void OutputLayer_updateGradInput<DIMENSION>(                        \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void BLInputLayer_updateOutput<DIMENSION>(                          \
+      Metadata<DIMENSION> & m, at::Tensor spatialSize,                         \
+      at::Tensor input_coords, at::Tensor input_features,                      \
+      at::Tensor output_features, long mode);                                  \
+  template void BLInputLayer_updateGradInput<DIMENSION>(                       \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void BLOutputLayer_updateOutput<DIMENSION>(                         \
+      Metadata<DIMENSION> & m, at::Tensor input_features,                      \
+      at::Tensor output_features);                                             \
+  template void BLOutputLayer_updateGradInput<DIMENSION>(                      \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void MaxPooling_updateOutput<DIMENSION>(                            \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void MaxPooling_updateGradInput<DIMENSION>(                         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor output_features, at::Tensor d_output_features,                \
+      long nFeaturesToDrop);                                                   \
+  template void RandomizedStrideMaxPooling_updateOutput<DIMENSION>(            \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void RandomizedStrideMaxPooling_updateGradInput<DIMENSION>(         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor output_features, at::Tensor d_output_features,                \
+      long nFeaturesToDrop);                                                   \
+  template void SparseToDense_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor output_features, long nPlanes);    \
+  template void SparseToDense_updateGradInput<DIMENSION>(                      \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features);                                           \
+  template void UnPooling_updateOutput<DIMENSION>(                             \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void UnPooling_updateGradInput<DIMENSION>(                          \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, long nFeaturesToDrop);
+
+#define DIMENSION 1
+FOO;
+#undef DIMENSION
+#define DIMENSION 2
+FOO;
+#undef DIMENSION
+#define DIMENSION 3
+FOO;
+#undef DIMENSION
+#define DIMENSION 4
+FOO;
+#undef DIMENSION
+#define DIMENSION 5
+FOO;
+#undef DIMENSION
+#define DIMENSION 6
+FOO;
+#undef DIMENSION
--- a/sparseconvnet/SCN/sparseconvnet_cuda.cpp
+++ b/sparseconvnet/SCN/sparseconvnet_cuda.cpp
+// Copyright 2016-present, Facebook, Inc.
+// All rights reserved.
+//
+// This source code is licensed under the license found in the
+// LICENSE file in the root directory of this source tree.
+
+#define ENABLE_OPENMP YES
+#if defined(ENABLE_OPENMP)
+#include <omp.h>
+#endif
+
+//#include <ATen/ATen.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <torch/torch.h>
+
+#include "Metadata/Metadata.cpp"
+template class Metadata<1>;
+template class Metadata<2>;
+template class Metadata<3>;
+template class Metadata<4>;
+template class Metadata<5>;
+template class Metadata<6>;
+
+#include "CPU/ActivePooling.cpp"
+#include "CPU/AffineReluTrivialConvolution.cpp"
+#include "CPU/AveragePooling.cpp"
+#include "CPU/BatchNormalization.cpp"
+#include "CPU/BatchwiseMultiplicativeDropout.cpp"
+#include "CPU/Convolution.cpp"
+#include "CPU/Deconvolution.cpp"
+#include "CPU/IOLayers.cpp"
+#include "CPU/LeakyReLU.cpp"
+#include "CPU/MaxPooling.cpp"
+#include "CPU/NetworkInNetwork.cpp"
+#include "CPU/SparseToDense.cpp"
+#include "CPU/UnPooling.cpp"
+#include "CUDA/ActivePooling.cpp"
+#include "CUDA/AffineReluTrivialConvolution.cpp"
+#include "CUDA/AveragePooling.cpp"
+#include "CUDA/BatchNormalization.cpp"
+#include "CUDA/BatchwiseMultiplicativeDropout.cpp"
+#include "CUDA/Convolution.cpp"
+#include "CUDA/Deconvolution.cpp"
+#include "CUDA/IOLayers.cpp"
+#include "CUDA/LeakyReLU.cpp"
+#include "CUDA/MaxPooling.cpp"
+#include "CUDA/NetworkInNetwork.cpp"
+#include "CUDA/SparseToDense.cpp"
+#include "CUDA/UnPooling.cpp"
+
+double AffineReluTrivialConvolution_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor affineWeight,
+                                                 at::Tensor affineBias,
+                                                 at::Tensor convWeight) {
+  if (input_features.type().is_cuda())
+    return cuda_AffineReluTrivialConvolution_updateOutput<float>(
+        input_features, output_features, affineWeight, affineBias, convWeight);
+  else
+    return cpu_AffineReluTrivialConvolution_updateOutput<float>(
+        input_features, output_features, affineWeight, affineBias, convWeight);
+}
+
+void AffineReluTrivialConvolution_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor affineWeight,
+    at::Tensor d_affineWeight, at::Tensor affineBias, at::Tensor d_affineBias,
+    at::Tensor convWeight, at::Tensor d_convWeight, bool additiveGrad) {
+  if (d_output_features.type().is_cuda())
+    cuda_AffineReluTrivialConvolution_backward<float>(
+        input_features, d_input_features, d_output_features, affineWeight,
+        d_affineWeight, affineBias, d_affineBias, convWeight, d_convWeight,
+        additiveGrad);
+  else
+    cpu_AffineReluTrivialConvolution_backward<float>(
+        input_features, d_input_features, d_output_features, affineWeight,
+        d_affineWeight, affineBias, d_affineBias, convWeight, d_convWeight,
+        additiveGrad);
+}
+
+void BatchNormalization_updateOutput(
+    at::Tensor input_features, at::Tensor output_features, at::Tensor saveMean,
+    at::Tensor saveInvStd, at::Tensor runningMean, at::Tensor runningVar,
+    at::Tensor weight, at::Tensor bias, double eps, double momentum, bool train,
+    double leakiness) {
+  if (input_features.type().is_cuda())
+    cuda_BatchNormalization_updateOutput<float>(
+        input_features, output_features, saveMean, saveInvStd, runningMean,
+        runningVar, weight, bias, eps, momentum, train, leakiness);
+  else
+    cpu_BatchNormalization_updateOutput<float>(
+        input_features, output_features, saveMean, saveInvStd, runningMean,
+        runningVar, weight, bias, eps, momentum, train, leakiness);
+}
+
+void BatchNormalization_backward(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor output_features, at::Tensor d_output_features,
+    at::Tensor saveMean, at::Tensor saveInvStd, at::Tensor runningMean,
+    at::Tensor runningVar, at::Tensor weight, at::Tensor bias,
+    at::Tensor d_weight, at::Tensor d_bias, double leakiness) {
+  if (d_output_features.type().is_cuda())
+    cuda_BatchNormalization_backward<float>(
+        input_features, d_input_features, output_features, d_output_features,
+        saveMean, saveInvStd, runningMean, runningVar, weight, bias, d_weight,
+        d_bias, leakiness);
+  else
+    cpu_BatchNormalization_backward<float>(
+        input_features, d_input_features, output_features, d_output_features,
+        saveMean, saveInvStd, runningMean, runningVar, weight, bias, d_weight,
+        d_bias, leakiness);
+}
+
+void BatchwiseMultiplicativeDropout_updateOutput(at::Tensor input_features,
+                                                 at::Tensor output_features,
+                                                 at::Tensor noise,
+                                                 double alpha) {
+  if (input_features.type().is_cuda())
+    cuda_BatchwiseMultiplicativeDropout_updateOutput<float>(
+        input_features, output_features, noise, alpha);
+  else
+    cpu_BatchwiseMultiplicativeDropout_updateOutput<float>(
+        input_features, output_features, noise, alpha);
+}
+
+void BatchwiseMultiplicativeDropout_updateGradInput(
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor noise, double alpha) {
+  if (d_output_features.type().is_cuda())
+    cuda_BatchwiseMultiplicativeDropout_updateGradInput<float>(
+        input_features, d_input_features, d_output_features, noise, alpha);
+  else
+    cpu_BatchwiseMultiplicativeDropout_updateGradInput<float>(
+        input_features, d_input_features, d_output_features, noise, alpha);
+}
+
+void LeakyReLU_updateOutput(at::Tensor input_features,
+                            at::Tensor output_features, double alpha) {
+  if (input_features.type().is_cuda())
+    cuda_LeakyReLU_updateOutput<float>(input_features, output_features, alpha);
+  else
+    cpu_LeakyReLU_updateOutput<float>(input_features, output_features, alpha);
+}
+
+void LeakyReLU_updateGradInput(at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features, double alpha) {
+  if (d_output_features.type().is_cuda())
+    cuda_LeakyReLU_updateGradInput<float>(input_features, d_input_features,
+                                          d_output_features, alpha);
+  else
+    cpu_LeakyReLU_updateGradInput<float>(input_features, d_input_features,
+                                         d_output_features, alpha);
+}
+
+double NetworkInNetwork_updateOutput(at::Tensor input_features,
+                                     at::Tensor output_features,
+                                     at::Tensor weight, at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_NetworkInNetwork_updateOutput<float>(
+        input_features, output_features, weight, bias);
+  else
+    return cpu_NetworkInNetwork_updateOutput<float>(
+        input_features, output_features, weight, bias);
+}
+
+void NetworkInNetwork_updateGradInput(at::Tensor d_input_features,
+                                      at::Tensor d_output_features,
+                                      at::Tensor weight) {
+  if (d_output_features.type().is_cuda())
+    cuda_NetworkInNetwork_updateGradInput<float>(d_input_features,
+                                                 d_output_features, weight);
+  else
+    cpu_NetworkInNetwork_updateGradInput<float>(d_input_features,
+                                                d_output_features, weight);
+}
+
+void NetworkInNetwork_accGradParameters(at::Tensor input_features,
+                                        at::Tensor d_output_features,
+                                        at::Tensor d_weight,
+                                        at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_NetworkInNetwork_accGradParameters<float>(
+        input_features, d_output_features, d_weight, d_bias);
+  else
+    cpu_NetworkInNetwork_accGradParameters<float>(
+        input_features, d_output_features, d_weight, d_bias);
+}
+template <Int Dimension>
+void ActivePooling_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, bool average) {
+  if (input_features.type().is_cuda())
+    cuda_ActivePooling_updateOutput<float, Dimension>(
+        inputSize, m, input_features, output_features, average);
+  else
+    cpu_ActivePooling_updateOutput<float, Dimension>(
+        inputSize, m, input_features, output_features, average);
+}
+
+template <Int Dimension>
+void ActivePooling_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features, bool average) {
+  if (d_output_features.type().is_cuda())
+    return cuda_ActivePooling_updateGradInput<float, Dimension>(
+        inputSize, m, input_features, d_input_features, d_output_features,
+        average);
+  else
+    return cpu_ActivePooling_updateGradInput<float, Dimension>(
+        inputSize, m, input_features, d_input_features, d_output_features,
+        average);
+}
+template <Int Dimension>
+void AveragePooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                 at::Tensor poolSize, at::Tensor poolStride,
+                                 Metadata<Dimension> &m,
+                                 at::Tensor input_features,
+                                 at::Tensor output_features,
+                                 long nFeaturesToDrop) {
+  if (input_features.type().is_cuda())
+    cuda_AveragePooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+  else
+    cpu_AveragePooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void AveragePooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                                    at::Tensor poolSize, at::Tensor poolStride,
+                                    Metadata<Dimension> &m,
+                                    at::Tensor input_features,
+                                    at::Tensor d_input_features,
+                                    at::Tensor d_output_features,
+                                    long nFeaturesToDrop) {
+  if (d_output_features.type().is_cuda())
+    cuda_AveragePooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, d_output_features, nFeaturesToDrop);
+  else
+    cpu_AveragePooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+double Convolution_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                                at::Tensor filterSize, at::Tensor filterStride,
+                                Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, at::Tensor weight,
+                                at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_Convolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+  else
+    return cpu_Convolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+}
+template <Int Dimension>
+void Convolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                          at::Tensor filterSize, at::Tensor filterStride,
+                          Metadata<Dimension> &m, at::Tensor input_features,
+                          at::Tensor d_input_features,
+                          at::Tensor d_output_features, at::Tensor weight,
+                          at::Tensor d_weight, at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_Convolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+  else
+    cpu_Convolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double SubmanifoldConvolution_updateOutput(at::Tensor inputSize,
+                                           at::Tensor filterSize,
+                                           Metadata<Dimension> &m,
+                                           at::Tensor input_features,
+                                           at::Tensor output_features,
+                                           at::Tensor weight, at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_SubmanifoldConvolution_updateOutput<float, Dimension>(
+        inputSize, filterSize, m, input_features, output_features, weight,
+        bias);
+  else
+    return cpu_SubmanifoldConvolution_updateOutput<float, Dimension>(
+        inputSize, filterSize, m, input_features, output_features, weight,
+        bias);
+}
+template <Int Dimension>
+void SubmanifoldConvolution_backward(
+    at::Tensor inputSize, at::Tensor filterSize, Metadata<Dimension> &m,
+    at::Tensor input_features, at::Tensor d_input_features,
+    at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,
+    at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_SubmanifoldConvolution_backward<float, Dimension>(
+        inputSize, filterSize, m, input_features, d_input_features,
+        d_output_features, weight, d_weight, d_bias);
+  else
+    cpu_SubmanifoldConvolution_backward<float, Dimension>(
+        inputSize, filterSize, m, input_features, d_input_features,
+        d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double FullConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &mIn,
+    Metadata<Dimension> &mOut, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_FullConvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+        input_features, output_features, weight, bias);
+  else
+    return cpu_FullConvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+        input_features, output_features, weight, bias);
+}
+template <Int Dimension>
+void FullConvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                              at::Tensor filterSize, at::Tensor filterStride,
+                              Metadata<Dimension> &mIn,
+                              Metadata<Dimension> &mOut,
+                              at::Tensor input_features,
+                              at::Tensor d_input_features,
+                              at::Tensor d_output_features, at::Tensor weight,
+                              at::Tensor d_weight, at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_FullConvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+        input_features, d_input_features, d_output_features, weight, d_weight,
+        d_bias);
+  else
+    cpu_FullConvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, mIn, mOut,
+        input_features, d_input_features, d_output_features, weight, d_weight,
+        d_bias);
+}
+template <Int Dimension>
+double RandomizedStrideConvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_RandomizedStrideConvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+  else
+    return cpu_RandomizedStrideConvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+}
+template <Int Dimension>
+void RandomizedStrideConvolution_backward(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor d_output_features,
+    at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_RandomizedStrideConvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+  else
+    cpu_RandomizedStrideConvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+double Deconvolution_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,
+    at::Tensor filterStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, at::Tensor weight, at::Tensor bias) {
+  if (input_features.type().is_cuda())
+    return cuda_Deconvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+  else
+    return cpu_Deconvolution_updateOutput<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        output_features, weight, bias);
+}
+template <Int Dimension>
+void Deconvolution_backward(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor filterSize, at::Tensor filterStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor d_input_features,
+                            at::Tensor d_output_features, at::Tensor weight,
+                            at::Tensor d_weight, at::Tensor d_bias) {
+  if (d_output_features.type().is_cuda())
+    cuda_Deconvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+  else
+    cpu_Deconvolution_backward<float, Dimension>(
+        inputSize, outputSize, filterSize, filterStride, m, input_features,
+        d_input_features, d_output_features, weight, d_weight, d_bias);
+}
+template <Int Dimension>
+void InputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                             at::Tensor input_coords, at::Tensor input_features,
+                             at::Tensor output_features, long batchSize,
+                             long mode) {
+  if (input_features.type().is_cuda())
+    cuda_InputLayer_updateOutput<float, Dimension>(
+        m, spatialSize, input_coords, input_features, output_features,
+        batchSize, mode);
+  else
+    cpu_InputLayer_updateOutput<float, Dimension>(
+        m, spatialSize, input_coords, input_features, output_features,
+        batchSize, mode);
+}
+template <Int Dimension>
+void InputLayer_updateGradInput(Metadata<Dimension> &m,
+                                at::Tensor d_input_features,
+                                at::Tensor d_output_features) {
+  if (d_output_features.type().is_cuda())
+    cuda_InputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                      d_output_features);
+  else
+    cpu_InputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                     d_output_features);
+}
+template <Int Dimension>
+void OutputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor input_features,
+                              at::Tensor output_features) {
+  if (input_features.type().is_cuda())
+    cuda_OutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                    output_features);
+  else
+    cpu_OutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                   output_features);
+}
+template <Int Dimension>
+void OutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                 at::Tensor d_input_features,
+                                 at::Tensor d_output_features) {
+  if (d_output_features.type().is_cuda())
+    cuda_OutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                       d_output_features);
+  else
+    cpu_OutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                      d_output_features);
+}
+template <Int Dimension>
+void BLInputLayer_updateOutput(Metadata<Dimension> &m, at::Tensor spatialSize,
+                               at::Tensor input_coords,
+                               at::Tensor input_features,
+                               at::Tensor output_features, long mode) {
+  if (input_features.type().is_cuda())
+    cuda_BLInputLayer_updateOutput<float, Dimension>(
+        m, spatialSize, input_coords, input_features, output_features, mode);
+  else
+    cpu_BLInputLayer_updateOutput<float, Dimension>(
+        m, spatialSize, input_coords, input_features, output_features, mode);
+}
+template <Int Dimension>
+void BLInputLayer_updateGradInput(Metadata<Dimension> &m,
+                                  at::Tensor d_input_features,
+                                  at::Tensor d_output_features) {
+  if (d_output_features.type().is_cuda())
+    cuda_BLInputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                        d_output_features);
+  else
+    cpu_BLInputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                       d_output_features);
+}
+template <Int Dimension>
+void BLOutputLayer_updateOutput(Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features) {
+  if (input_features.type().is_cuda())
+    cuda_BLOutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                      output_features);
+  else
+    cpu_BLOutputLayer_updateOutput<float, Dimension>(m, input_features,
+                                                     output_features);
+}
+template <Int Dimension>
+void BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features) {
+  if (d_output_features.type().is_cuda())
+    cuda_BLOutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                         d_output_features);
+  else
+    cpu_BLOutputLayer_updateGradInput<float, Dimension>(m, d_input_features,
+                                                        d_output_features);
+}
+template <Int Dimension>
+void MaxPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                             at::Tensor poolSize, at::Tensor poolStride,
+                             Metadata<Dimension> &m, at::Tensor input_features,
+                             at::Tensor output_features, long nFeaturesToDrop) {
+  if (input_features.type().is_cuda())
+    cuda_MaxPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+  else
+    cpu_MaxPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void MaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop) {
+  if (d_output_features.type().is_cuda())
+    cuda_MaxPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, output_features, d_output_features, nFeaturesToDrop);
+  else
+    cpu_MaxPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, output_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateOutput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor output_features, long nFeaturesToDrop) {
+  if (input_features.type().is_cuda())
+    cuda_RandomizedStrideMaxPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+  else
+    cpu_RandomizedStrideMaxPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void RandomizedStrideMaxPooling_updateGradInput(
+    at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,
+    at::Tensor poolStride, Metadata<Dimension> &m, at::Tensor input_features,
+    at::Tensor d_input_features, at::Tensor output_features,
+    at::Tensor d_output_features, long nFeaturesToDrop) {
+  if (d_output_features.type().is_cuda())
+    cuda_RandomizedStrideMaxPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, output_features, d_output_features, nFeaturesToDrop);
+  else
+    cpu_RandomizedStrideMaxPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, output_features, d_output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void SparseToDense_updateOutput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                at::Tensor input_features,
+                                at::Tensor output_features, long nPlanes) {
+  if (input_features.type().is_cuda())
+    cuda_SparseToDense_updateOutput<float, Dimension>(
+        inputSize, m, input_features, output_features, nPlanes);
+  else
+    cpu_SparseToDense_updateOutput<float, Dimension>(
+        inputSize, m, input_features, output_features, nPlanes);
+}
+template <Int Dimension>
+void SparseToDense_updateGradInput(at::Tensor inputSize, Metadata<Dimension> &m,
+                                   at::Tensor input_features,
+                                   at::Tensor d_input_features,
+                                   at::Tensor d_output_features) {
+  if (d_output_features.type().is_cuda())
+    cuda_SparseToDense_updateGradInput<float, Dimension>(
+        inputSize, m, input_features, d_input_features, d_output_features);
+  else
+    cpu_SparseToDense_updateGradInput<float, Dimension>(
+        inputSize, m, input_features, d_input_features, d_output_features);
+}
+template <Int Dimension>
+void UnPooling_updateOutput(at::Tensor inputSize, at::Tensor outputSize,
+                            at::Tensor poolSize, at::Tensor poolStride,
+                            Metadata<Dimension> &m, at::Tensor input_features,
+                            at::Tensor output_features, long nFeaturesToDrop) {
+  if (input_features.type().is_cuda())
+    cuda_UnPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+  else
+    cpu_UnPooling_updateOutput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        output_features, nFeaturesToDrop);
+}
+template <Int Dimension>
+void UnPooling_updateGradInput(at::Tensor inputSize, at::Tensor outputSize,
+                               at::Tensor poolSize, at::Tensor poolStride,
+                               Metadata<Dimension> &m,
+                               at::Tensor input_features,
+                               at::Tensor d_input_features,
+                               at::Tensor d_output_features,
+                               long nFeaturesToDrop) {
+  if (d_output_features.type().is_cuda())
+    cuda_UnPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, d_output_features, nFeaturesToDrop);
+  else
+    cpu_UnPooling_updateGradInput<float, Dimension>(
+        inputSize, outputSize, poolSize, poolStride, m, input_features,
+        d_input_features, d_output_features, nFeaturesToDrop);
+}
+
+#define FOO                                                                    \
+  template void ActivePooling_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor output_features, bool average);    \
+  template void ActivePooling_updateGradInput<DIMENSION>(                      \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, bool average);                             \
+  template void AveragePooling_updateOutput<DIMENSION>(                        \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void AveragePooling_updateGradInput<DIMENSION>(                     \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, long nFeaturesToDrop);                     \
+  template double Convolution_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void Convolution_backward<DIMENSION>(                               \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double SubmanifoldConvolution_updateOutput<DIMENSION>(              \
+      at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> & m,    \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void SubmanifoldConvolution_backward<DIMENSION>(                    \
+      at::Tensor inputSize, at::Tensor filterSize, Metadata<DIMENSION> & m,    \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double FullConvolution_updateOutput<DIMENSION>(                     \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & mIn,                      \
+      Metadata<DIMENSION> & mOut, at::Tensor input_features,                   \
+      at::Tensor output_features, at::Tensor weight, at::Tensor bias);         \
+  template void FullConvolution_backward<DIMENSION>(                           \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & mIn,                      \
+      Metadata<DIMENSION> & mOut, at::Tensor input_features,                   \
+      at::Tensor d_input_features, at::Tensor d_output_features,               \
+      at::Tensor weight, at::Tensor d_weight, at::Tensor d_bias);              \
+  template double RandomizedStrideConvolution_updateOutput<DIMENSION>(         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void RandomizedStrideConvolution_backward<DIMENSION>(               \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template double Deconvolution_updateOutput<DIMENSION>(                       \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      at::Tensor weight, at::Tensor bias);                                     \
+  template void Deconvolution_backward<DIMENSION>(                             \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor filterSize,      \
+      at::Tensor filterStride, Metadata<DIMENSION> & m,                        \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, at::Tensor weight, at::Tensor d_weight,    \
+      at::Tensor d_bias);                                                      \
+  template void InputLayer_updateOutput<DIMENSION>(                            \
+      Metadata<DIMENSION> & m, at::Tensor spatialSize,                         \
+      at::Tensor input_coords, at::Tensor input_features,                      \
+      at::Tensor output_features, long batchSize, long mode);                  \
+  template void InputLayer_updateGradInput<DIMENSION>(                         \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void OutputLayer_updateOutput<DIMENSION>(                           \
+      Metadata<DIMENSION> & m, at::Tensor input_features,                      \
+      at::Tensor output_features);                                             \
+  template void OutputLayer_updateGradInput<DIMENSION>(                        \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void BLInputLayer_updateOutput<DIMENSION>(                          \
+      Metadata<DIMENSION> & m, at::Tensor spatialSize,                         \
+      at::Tensor input_coords, at::Tensor input_features,                      \
+      at::Tensor output_features, long mode);                                  \
+  template void BLInputLayer_updateGradInput<DIMENSION>(                       \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void BLOutputLayer_updateOutput<DIMENSION>(                         \
+      Metadata<DIMENSION> & m, at::Tensor input_features,                      \
+      at::Tensor output_features);                                             \
+  template void BLOutputLayer_updateGradInput<DIMENSION>(                      \
+      Metadata<DIMENSION> & m, at::Tensor d_input_features,                    \
+      at::Tensor d_output_features);                                           \
+  template void MaxPooling_updateOutput<DIMENSION>(                            \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void MaxPooling_updateGradInput<DIMENSION>(                         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor output_features, at::Tensor d_output_features,                \
+      long nFeaturesToDrop);                                                   \
+  template void RandomizedStrideMaxPooling_updateOutput<DIMENSION>(            \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void RandomizedStrideMaxPooling_updateGradInput<DIMENSION>(         \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor output_features, at::Tensor d_output_features,                \
+      long nFeaturesToDrop);                                                   \
+  template void SparseToDense_updateOutput<DIMENSION>(                         \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor output_features, long nPlanes);    \
+  template void SparseToDense_updateGradInput<DIMENSION>(                      \
+      at::Tensor inputSize, Metadata<DIMENSION> & m,                           \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features);                                           \
+  template void UnPooling_updateOutput<DIMENSION>(                             \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor output_features,                   \
+      long nFeaturesToDrop);                                                   \
+  template void UnPooling_updateGradInput<DIMENSION>(                          \
+      at::Tensor inputSize, at::Tensor outputSize, at::Tensor poolSize,        \
+      at::Tensor poolStride, Metadata<DIMENSION> & m,                          \
+      at::Tensor input_features, at::Tensor d_input_features,                  \
+      at::Tensor d_output_features, long nFeaturesToDrop);
+
+#define DIMENSION 1
+FOO;
+#undef DIMENSION
+#define DIMENSION 2
+FOO;
+#undef DIMENSION
+#define DIMENSION 3
+FOO;
+#undef DIMENSION
+#define DIMENSION 4
+FOO;
+#undef DIMENSION
+#define DIMENSION 5
+FOO;
+#undef DIMENSION
+#define DIMENSION 6
+FOO;
+#undef DIMENSION
--- a/sparseconvnet/__init__.py
+++ b/sparseconvnet/__init__.py
@@ -28,10 +28,11 @@ from .sequential import Sequential
 from .sparseConvNetTensor import SparseConvNetTensor
 from .sparseToDense import SparseToDense
 from .sparsify import Sparsify
+from .spectral_norm import spectral_norm
 from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution
 from .tables import *
 from .unPooling import UnPooling
-
+from .utils import appendSparseConvTensors

 def concatenate_feature_planes(input):
    output = SparseConvNetTensor()

--- a/sparseconvnet/averagePooling.py
+++ b/sparseconvnet/averagePooling.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.

+import sparseconvnet_SCN
 from torch.autograd import Function
 from torch.nn import Module
 from .utils import *
@@ -81,7 +82,7 @@ class AveragePoolingFunction(Function):
        ctx.nFeaturesToDrop = nFeaturesToDrop
        output_features = input_features.new()

-        dim_typed_fn(dimension, input_features, 'AveragePooling_updateOutput')(
+        sparseconvnet_SCN.AveragePooling_updateOutput(
            input_spatial_size,
            output_spatial_size,
            pool_size,
@@ -108,8 +109,7 @@ class AveragePoolingFunction(Function):
        pool_size,\
        pool_stride = ctx.saved_tensors
        grad_input = grad_output.new()
-        dim_typed_fn(
-            ctx.dimension, input_features, 'AveragePooling_updateGradInput')(
+        sparseconvnet_SCN.AveragePooling_updateGradInput(
            input_spatial_size,
            output_spatial_size,
            pool_size,

--- a/sparseconvnet/batchNormalization.py
+++ b/sparseconvnet/batchNormalization.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.

-
+import sparseconvnet_SCN
 from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
@@ -107,7 +107,7 @@ class BatchNormalizationFunction(Function):
        output_features = input_features.new()
        saveMean = input_features.new().resize_(ctx.nPlanes)
        saveInvStd = runningMean.clone().resize_(ctx.nPlanes)
-        typed_fn(input_features, 'BatchNormalization_updateOutput')(
+        sparseconvnet_SCN.BatchNormalization_updateOutput(
            input_features,
            output_features,
            saveMean,
@@ -144,7 +144,7 @@ class BatchNormalizationFunction(Function):
        grad_input = grad_output.new()
        grad_weight = torch.zeros_like(weight)
        grad_bias = torch.zeros_like(bias)
-        typed_fn(input_features, 'BatchNormalization_backward')(
+        sparseconvnet_SCN.BatchNormalization_backward(
            input_features,
            grad_input,
            output_features,

--- a/sparseconvnet/convolution.py
+++ b/sparseconvnet/convolution.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.

-import sparseconvnet
+import sparseconvnet, sparseconvnet_SCN
 from torch.autograd import Function
 from torch.nn import Module, Parameter
 from .utils import *
@@ -91,8 +91,7 @@ class ConvolutionFunction(Function):
            filter_size,
            filter_stride)
        sparseconvnet.forward_pass_multiplyAdd_count +=\
-            dim_typed_fn(
-                dimension, input_features, 'Convolution_updateOutput')(
+            sparseconvnet_SCN.Convolution_updateOutput(
                input_spatial_size,
                output_spatial_size,
                filter_size,
@@ -111,8 +110,7 @@ class ConvolutionFunction(Function):
        grad_input = grad_output.new()
        grad_weight = torch.zeros_like(weight)
        grad_bias = torch.zeros_like(bias)
-        dim_typed_fn(
-            ctx.dimension, input_features, 'Convolution_backward')(
+        sparseconvnet_SCN.Convolution_backward(
            input_spatial_size,
            output_spatial_size,
            filter_size,