tidy

43d4e998 · Benjamin Thomas Graham · d77687a6 · 43d4e998 · 43d4e998 · 43d4e998
Commit 43d4e998 authored Mar 06, 2018 by Benjamin Thomas Graham
14 changed files
--- a/PyTorch/setup.py
+++ b/PyTorch/setup.py
@@ -12,10 +12,8 @@ torch_dir = os.path.dirname(torch.__file__)
 print('Building SCN module')
 if torch.cuda.is_available():
-    r = os.system(
+    s=('cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc'
-        'cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc'
+        + ' -m64 --std c++11 -Xcompiler \"-fopenmp -fPIC -O3\" '
-        + ' -m64 --std c++11 -Xcompiler '
-        + ',\"-fopenmp\",\"-fPIC\",\"-O3\" '
        + '-gencode arch=compute_62,code=sm_62 '
        + '-gencode arch=compute_61,code=sm_61 '
        + '-gencode arch=compute_60,code=sm_60 '
@@ -28,6 +26,7 @@ if torch.cuda.is_available():
        + '-I' + torch_dir + '/lib/include/TH '
        + '-I' + torch_dir + '/lib/include/THC '
        + '-I.')
+    r = os.system(s)
    assert r == 0
    ffi = create_extension(
        'sparseconvnet.SCN',
@@ -44,7 +43,7 @@ if torch.cuda.is_available():
        with_cuda=True)
 else:
    r = os.system(
-        'cd sparseconvnet/SCN; g++ -std=c++11 -DENABLE_OPENMP -fPIC -c init.cpp -o init.cpp.o -I' +
+        'cd sparseconvnet/SCN; g++ -fopenmp -std=c++11 -O3 -fPIC -c init.cpp -o init.cpp.o -I' +
        torch_dir +
        '/lib/include -I' +
        torch_dir +

--- a/PyTorch/sparseconvnet/SCN/generic/CPU/InputLayer.cpp
+++ b/PyTorch/sparseconvnet/SCN/generic/CPU/InputLayer.cpp
@@ -16,11 +16,11 @@ extern "C" void scn_DR_(InputLayer_updateOutput)(
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  _m.inputLayer(spatialSize, input_coords, batchSize, mode);
  auto nPlanes = input_features->size[1];
-  THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
-  THTensor_(zero)(output_features);
  auto &rules = _m.inputLayerRuleBook;
  auto maxActive = rules[0][1];
  auto nRows = rules[0][3];
+  THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
+  THTensor_(zero)(output_features);
  InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
                               THTensor_(data)(output_features), nRows,
                               maxActive, nPlanes, &rules[1][0], mode == 4);
@@ -49,14 +49,20 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  _m.blLayer(spatialSize, input_coords, mode);
  auto nPlanes = input_features->size[2];
-  THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
-  THTensor_(zero)(output_features);
  auto &rules = _m.blLayerRuleBook;
  auto maxActive = rules[0][1];
  auto nRows = rules[0][4];
-  InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
+  if (mode == 0) {
-                               THTensor_(data)(output_features), nRows,
+    THTensor_(resizeAs)(output_features, input_features);
-                               maxActive, nPlanes, &rules[1][0], mode == 4);
+    THTensor_(copy)(output_features, input_features);
+    THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
+  } else {
+    THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
+    THTensor_(zero)(output_features);
+    InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
+                                 THTensor_(data)(output_features), nRows,
+                                 maxActive, nPlanes, &rules[1][0], mode == 4);
+  }
 }
 extern "C" void
 scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
@@ -65,32 +71,44 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  auto &rules = _m.blLayerRuleBook;
  auto nPlanes = d_output_features->size[1];
-  THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
-  THTensor_(zero)(d_input_features);
  auto mode = rules[0][0];
  auto maxActive = rules[0][1];
  auto nRows = rules[0][4];
-  InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
+  if (mode == 0) {
-                                THTensor_(data)(d_output_features), nRows,
+    THTensor_(resizeAs)(d_input_features, d_output_features);
-                                maxActive, nPlanes, &rules[1][0], mode == 4);
+    THTensor_(copy)(d_input_features, d_output_features);
+    THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
+  } else {
+    THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
+    THTensor_(zero)(d_input_features);
+    InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
+                                  THTensor_(data)(d_output_features), nRows,
+                                  maxActive, nPlanes, &rules[1][0], mode == 4);
+  }
 }
-extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
+extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m,
-    void **m,
+                                                    THTensor *input_features,
-    THTensor *input_features, THTensor *output_features,
+                                                    THTensor *output_features,
-    void *rulesBuffer) {
+                                                    void *rulesBuffer) {
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  auto &rules = _m.blLayerRuleBook;
  auto nPlanes = input_features->size[1];
-  THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
-  THTensor_(zero)(output_features);
  auto mode = rules[0][0];
  auto maxActive = rules[0][1];
  auto nRows = rules[0][4];
-  InputLayer_BackwardPass<real>(THTensor_(data)(output_features),
+  if (mode == 0) {
-                                THTensor_(data)(input_features), nRows,
+    THTensor_(resizeAs)(output_features, input_features);
-                                maxActive, nPlanes, &rules[1][0], false);
+    THTensor_(copy)(output_features, input_features);
+    THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
+  } else {
+    THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
+    THTensor_(zero)(output_features);
+    InputLayer_BackwardPass<real>(THTensor_(data)(output_features),
+                                  THTensor_(data)(input_features), nRows,
+                                  maxActive, nPlanes, &rules[1][0], false);
+  }
 }
 extern "C" void
 scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
@@ -102,11 +120,16 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
  auto mode = rules[0][0];
  auto maxActive = rules[0][1];
  auto nRows = rules[0][4];
-  THTensor_(resize2d)(d_input_features, nRows, nPlanes);
+  if (mode == 0) {
-  THTensor_(zero)(d_input_features);
+    THTensor_(resizeAs)(d_input_features, d_output_features);
+    THTensor_(copy)(d_input_features, d_output_features);
-  InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features),
+    THTensor_(resize2d)(d_input_features, nRows, nPlanes);
-                               THTensor_(data)(d_input_features), nRows,
+  } else {
-                               maxActive, nPlanes, &rules[1][0], false);
+    THTensor_(resize2d)(d_input_features, nRows, nPlanes);
+    THTensor_(zero)(d_input_features);
+    InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features),
+                                 THTensor_(data)(d_input_features), nRows,
+                                 maxActive, nPlanes, &rules[1][0], false);
+  }
 }
 #endif
--- a/PyTorch/sparseconvnet/SCN/generic/GPU/InputLayer.cu
+++ b/PyTorch/sparseconvnet/SCN/generic/GPU/InputLayer.cu
@@ -22,15 +22,15 @@ extern "C" void scn_DR_(InputLayer_updateOutput)(
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][3];
-  THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
+    THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
-  auto iF = THCTensor_(data)(state, input_features);
+    auto iF = THCTensor_(data)(state, input_features);
-  auto oF = THCTensor_(data)(state, output_features);
+    auto oF = THCTensor_(data)(state, output_features);
-  auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
+    auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
-  cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
+    cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
-             cudaMemcpyHostToDevice);
+               cudaMemcpyHostToDevice);
-  InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
+    InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
-                        THCState_getCurrentStream(state)>>>(
+                          THCState_getCurrentStream(state)>>>(
-      iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
+        iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
 }
 extern "C" void
 scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
@@ -41,7 +41,7 @@ scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
  uInt nPlanes = d_output_features->size[1];
  THCTensor_(resize2d)(state, d_input_features, rules[0][2], nPlanes);
  THCTensor_(zero)(state, d_input_features);
-  uInt mode = rules[0][0];
+  auto mode = rules[0][0];
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][3];
@@ -69,6 +69,11 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][4];
+  if (mode == 0) {
+    THCTensor_(resizeAs)(state, output_features, input_features);
+    THCTensor_(copy)(state, output_features, input_features);
+    THCTensor_(resize2d)(state, output_features, *_m.inputNActive, nPlanes);
+  } else {
  THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
  auto iF = THCTensor_(data)(state, input_features);
  auto oF = THCTensor_(data)(state, output_features);
@@ -78,6 +83,7 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
  InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
                        THCState_getCurrentStream(state)>>>(
      iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
+    }
 }
 extern "C" void
 scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
@@ -86,11 +92,17 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  auto &rules = _m.blLayerRuleBook;
  uInt nPlanes = d_output_features->size[1];
-  THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
-  THCTensor_(zero)(state, d_input_features);
  uInt mode = rules[0][0];
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][4];
+  if (mode == 0) {
+    THCTensor_(resizeAs)(state, d_input_features, d_output_features);
+    THCTensor_(copy)(state, d_input_features, d_output_features);
+    THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
+  } else {
+    THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
+    THCTensor_(zero)(state, d_input_features);
  THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
  auto diF = THCTensor_(data)(state, d_input_features);
  auto doF = THCTensor_(data)(state, d_output_features);
@@ -100,6 +112,7 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
  InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
                        THCState_getCurrentStream(state)>>>(
      diF, doF, nRows, maxActive, nPlanes, rb, mode == 4);
+    }
 }
 extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
@@ -109,20 +122,26 @@ extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
  SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
  auto &rules = _m.blLayerRuleBook;
  uInt nPlanes = input_features->size[1];
-  THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
-  THCTensor_(zero)(state, output_features);
  auto mode = rules[0][0];
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][4];
-  THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
+  if (mode==0) {
-  auto iF = THCTensor_(data)(state, input_features);
+    THCTensor_(resizeAs)(state, output_features, input_features);
-  auto oF = THCTensor_(data)(state, output_features);
+    THCTensor_(copy)(state, output_features, input_features);
-  auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
+    THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
-  cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
+  } else {
-             cudaMemcpyHostToDevice);
+    THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
-  InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
+    THCTensor_(zero)(state, output_features);
-                        THCState_getCurrentStream(state)>>>(
+    THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
-      oF, iF, nRows, maxActive, nPlanes, rb, false);
+    auto iF = THCTensor_(data)(state, input_features);
+    auto oF = THCTensor_(data)(state, output_features);
+    auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
+    cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
+               cudaMemcpyHostToDevice);
+    InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
+                          THCState_getCurrentStream(state)>>>(
+        oF, iF, nRows, maxActive, nPlanes, rb, false);
+  }
 }
 extern "C" void
 scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
@@ -134,16 +153,22 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
  uInt mode = rules[0][0];
  uInt maxActive = rules[0][1];
  uInt nRows = rules[0][4];
-  THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
+  if (mode==0) {
-  THCTensor_(zero)(state, d_input_features);
+    THCTensor_(resizeAs)(state, d_input_features, d_output_features);
-  THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
+    THCTensor_(copy)(state, d_input_features, d_output_features);
-  auto diF = THCTensor_(data)(state, d_input_features);
+    THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
-  auto doF = THCTensor_(data)(state, d_output_features);
+  } else {
-  auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
+    THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
-  cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
+    THCTensor_(zero)(state, d_input_features);
-             cudaMemcpyHostToDevice);
+    THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
-  InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
+    auto diF = THCTensor_(data)(state, d_input_features);
-                        THCState_getCurrentStream(state)>>>(
+    auto doF = THCTensor_(data)(state, d_output_features);
-      doF, diF, nRows, maxActive, nPlanes, rb, false);
+    auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
+    cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
+               cudaMemcpyHostToDevice);
+    InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
+                          THCState_getCurrentStream(state)>>>(
+        doF, diF, nRows, maxActive, nPlanes, rb, false);
+  }
 }
 #endif
--- a/PyTorch/sparseconvnet/SCN/generic/Geometry/InputLayerRules.h
+++ b/PyTorch/sparseconvnet/SCN/generic/Geometry/InputLayerRules.h
@@ -7,7 +7,6 @@
 #ifndef INPUTLAYER_H
 #define INPUTLAYER_H
 #include "../SparseConvNet.h"
-#include <omp.h>
 // mode 1==overwrite, 2=keep, 3=sum, 4=mean
 template <uInt dimension>
@@ -100,6 +99,12 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
  if (mode == 0) {
    nActive = batchSize * length;
+    rules.resize(1);
+    rules[0].push_back(mode);
+    rules[0].push_back(1);
+    rules[0].push_back(batchSize);
+    rules[0].push_back(length);
+    rules[0].push_back(nActive);
 #pragma omp parallel for private(I)
    for (I = 0; I < batchSize; I++) {
      auto &sg = SGs[I];
@@ -113,20 +118,6 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
        sg.mp[p] = l;
      }
    }
-    rules.resize(2);
-    rules[0].push_back(0);
-    rules[0].push_back(1);
-    rules[0].push_back(batchSize);
-    rules[0].push_back(length);
-    rules[0].push_back(nActive);
-    auto &rule = rules[1];
-    int ll = 0;
-    for (I = 0; I < batchSize; I++) {
-      for (int l = 0; l < length; ++l, ++ll) {
-        rule.push_back(1);
-        rule.push_back(ll);
-      }
-    }
    return;
  }
@@ -199,10 +190,10 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
    }
  }
  if (mode == 3 or mode == 4) {
-    std::cout << omp_get_num_threads() << std::endl;
    rule.resize((maxActive + 1) * nActive);
 #pragma omp parallel for private(I)
    for (I = 0; I < batchSize; I++) {
+      std::cout << omp_get_num_threads() << "\n";
      auto &ors = outputRows[I];
      auto rr = &rule[SGs[I].ctr * (maxActive + 1)];
      for (auto &row : ors) {

--- a/PyTorch/sparseconvnet/averagePooling.py
+++ b/PyTorch/sparseconvnet/averagePooling.py
@@ -101,17 +101,17 @@ class AveragePooling(Module):
    def __repr__(self):
        s = 'AveragePooling'
-        if self.pool_size.max() == self.pool_size.min() and\
+        if self.pool_size.max().item() == self.pool_size.min().item() and\
-                self.pool_stride.max() == self.pool_stride.min():
+                self.pool_stride.max().item() == self.pool_stride.min().item():
            s = s + str(self.pool_size[0].item()) + \
                '/' + str(self.pool_stride[0].item())
        else:
            s = s + '(' + str(self.pool_size[0].item())
            for i in self.pool_size[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')/(' + str(self.pool_stride[0].item())
            for i in self.pool_stride[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')'
        if self.nFeaturesToDrop > 0:

--- a/PyTorch/sparseconvnet/convolution.py
+++ b/PyTorch/sparseconvnet/convolution.py
@@ -127,17 +127,17 @@ class Convolution(Module):
    def __repr__(self):
        s = 'Convolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
-        if self.filter_size.max() == self.filter_size.min() and\
+        if self.filter_size.max().item() == self.filter_size.min().item() and\
-                self.filter_stride.max() == self.filter_stride.min():
+                self.filter_stride.max().item() == self.filter_stride.min().item():
            s = s + str(self.filter_size[0].item()) + \
                '/' + str(self.filter_stride[0].item())
        else:
            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')/(' + str(self.filter_stride[0].item())
            for i in self.filter_stride[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')'
        return s

--- a/PyTorch/sparseconvnet/deconvolution.py
+++ b/PyTorch/sparseconvnet/deconvolution.py
@@ -127,17 +127,17 @@ class Deconvolution(Module):
    def __repr__(self):
        s = 'Deconvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
-        if self.filter_size.max() == self.filter_size.min() and\
+        if self.filter_size.max().item() == self.filter_size.min().item() and\
-                self.filter_stride.max() == self.filter_stride.min():
+                self.filter_stride.max().item() == self.filter_stride.min().item():
            s = s + str(self.filter_size[0].item()) + \
                '/' + str(self.filter_stride[0].item())
        else:
            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')/(' + str(self.filter_stride[0].item())
            for i in self.filter_stride[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')'
        return s

--- a/PyTorch/sparseconvnet/inputBatch.py
+++ b/PyTorch/sparseconvnet/inputBatch.py
@@ -72,12 +72,14 @@ class InputBatch(SparseConvNetTensor):
            self.spatial_size,
            threshold)
-    def precomputeMetadata(self, stride):
+    def precomputeMetadata(self, size):
-        if stride == 2:
+        """
+        Optional.
+        Allows precomputation of 'rulebooks' in data loading threads.
+        Use size == 2 if downsizing with size-2 stride-2 operations
+        Use size == 3 if downsizing with size-3 stride-2 operations
+        """
+        if size == 2:
            dim_fn(self.dimension, 'generateRuleBooks2s2')(self.metadata.ffi)
-        else:
+        if size == 3 :
            dim_fn(self.dimension, 'generateRuleBooks3s2')(self.metadata.ffi)
-    def __repr__(self):
-        return 'InputBatch<<' + repr(self.features) + repr(self.metadata) + \
-            repr(self.spatial_size) + '>>'
--- a/PyTorch/sparseconvnet/inputLayer.py
+++ b/PyTorch/sparseconvnet/inputLayer.py
@@ -10,6 +10,118 @@ from .utils import *
 from .sparseConvNetTensor import SparseConvNetTensor
 from .metadata import Metadata
+class InputLayer(Module):
+    """
+    Takes a tuple (coords, features, batch_size [optional])
+    * coords is 2d with size
+       N x dimension   (batch size == 1)
+    or
+       N x (dimension+1)  (first d columns are coordinates, last column is batch index)
+    * features is a tensor with size
+      N x n_feature_planes
+    * batch_size if given, set a lower bound on the the number of samples in the output tensor.
+    Batch size can normally be inferred from the last column of coords, but this may fail if
+    some of the batch items are totally empty.
+    In case of repetition in coords:
+    mode == 1 to use the last item at each spatial location
+    mode == 2 to keep the first item at each spatial location
+    mode == 3 to sum feature vectors sharing one spatial location
+    mode == 4 to average feature vectors at each spatial location
+    Output is a SparseConvNetTensor
+    """
+    def __init__(self, dimension, spatial_size, mode=3):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.spatial_size = toLongTensor(dimension, spatial_size)
+        self.mode = mode
+    def forward(self, input):
+        output = SparseConvNetTensor(
+            metadata=Metadata(
+                self.dimension),
+            spatial_size=self.spatial_size)
+        output.features = InputLayerFunction.apply(
+            self.dimension,
+            output.metadata,
+            self.spatial_size,
+            input[0],
+            input[1],
+            0 if len(input) == 2 else input[2],
+            self.mode
+        )
+        return output
+class BLInputLayer(Module):
+    """
+    Takes a tuple (coords, features)
+    * coords is 3d LongTensor with size
+       batch_size x length x dimension
+      Coordinates should be >=0, or -1 to indicate 'empty'
+    * features is a 3d float Tensor with size
+      batch_size x length x n_feature_planes
+    mode == 0 Assumes that for each coords[i, :], the locations are unique and not 'empty'.
+    mode == 1 Use the last item at each spatial location
+    mode == 2 Keep the first item at each spatial location
+    mode == 3 Sum feature vectors sharing one spatial location
+    mode == 4 Average feature vectors at each spatial location
+    Output is a SparseConvNetTensor
+    """
+    def __init__(self, dimension, spatial_size, mode=3):
+        Module.__init__(self)
+        self.dimension = dimension
+        self.spatial_size = toLongTensor(dimension, spatial_size)
+        self.mode = mode
+    # (coords,input_features) = input
+    def forward(self, input):
+        output = SparseConvNetTensor(
+            metadata=Metadata(
+                self.dimension),
+            spatial_size=self.spatial_size)
+        output.features = BLInputLayerFunction.apply(
+            self.dimension,
+            output.metadata,
+            self.spatial_size,
+            input[0],
+            input[1],
+            self.mode
+        )
+        return output
+class BLOutputLayer(Module):
+    """
+    Used in conjunction with a BLInputLayer for 'autoencoder' style networks
+    Takes a SparseConvNetTensor and results a float Tensor of batch_size
+    batch_size x length x n_feature_planes
+    batch_size and length are defined by the BLInputLayer
+    Behavior during forward-/back-propagation depends on the BLInputLayer's Module mode
+    """
+    def __init__(self, dimension):
+        Module.__init__(self)
+        self.dimension = dimension
+    def forward(self, input):
+        output = BLOutputLayerFunction.apply(
+            self.dimension,
+            input.metadata,
+            input.features
+        )
+        return output
 class InputLayerFunction(Function):
    @staticmethod
@@ -30,7 +142,7 @@ class InputLayerFunction(Function):
            metadata.ffi,
            spatial_size,
            coords,
-            input_features,
+            input_features.contiguous(),
            output_features,
            batch_size,
            mode,
@@ -52,29 +164,6 @@ class InputLayerFunction(Function):
        return None, None, None, None, grad_input, None, None
-class InputLayer(Module):
-    def __init__(self, dimension, spatial_size, mode=3):
-        Module.__init__(self)
-        self.dimension = dimension
-        self.spatial_size = toLongTensor(dimension, spatial_size)
-        self.mode = mode
-    # (coords,input_features,batch_size or None) = input
-    def forward(self, input):
-        output = SparseConvNetTensor(
-            metadata=Metadata(
-                self.dimension),
-            spatial_size=self.spatial_size)
-        output.features = InputLayerFunction.apply(
-            self.dimension,
-            output.metadata,
-            self.spatial_size,
-            input[0],
-            input[1],
-            0 if len(input == 2) else input[2],
-            self.mode
-        )
-        return output
 class BLInputLayerFunction(Function):
@@ -94,7 +183,7 @@ class BLInputLayerFunction(Function):
            metadata.ffi,
            spatial_size,
            coords,
-            input_features,
+            input_features.contiguous(),
            output_features,
            mode,
            torch.cuda.IntTensor() if input_features.is_cuda else nullptr
@@ -115,28 +204,7 @@ class BLInputLayerFunction(Function):
        return None, None, None, None, grad_input, None
-class BLInputLayer(Module):
-    def __init__(self, dimension, spatial_size, mode=3):
-        Module.__init__(self)
-        self.dimension = dimension
-        self.spatial_size = toLongTensor(dimension, spatial_size)
-        self.mode = mode
-    # (coords,input_features) = input
-    def forward(self, input):
-        output = SparseConvNetTensor(
-            metadata=Metadata(
-                self.dimension),
-            spatial_size=self.spatial_size)
-        output.features = BLInputLayerFunction.apply(
-            self.dimension,
-            output.metadata,
-            self.spatial_size,
-            input[0],
-            input[1],
-            self.mode
-        )
-        return output
 class BLOutputLayerFunction(Function):
@@ -151,7 +219,7 @@ class BLOutputLayerFunction(Function):
        ctx.dimension = dimension
        dim_typed_fn(dimension, input_features, 'BLOutputLayer_updateOutput')(
            metadata.ffi,
-            input_features,
+            input_features.contiguous(),
            output_features,
            torch.cuda.IntTensor() if input_features.is_cuda else nullptr
        )
@@ -169,17 +237,3 @@ class BLOutputLayerFunction(Function):
            grad_output.contiguous().data,
            torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr)
        return None, None, grad_input
-class BLOutputLayer(Module):
-    def __init__(self, dimension):
-        Module.__init__(self)
-        self.dimension = dimension
-    def forward(self, input):
-        output = BLOutputLayerFunction.apply(
-            self.dimension,
-            input.metadata,
-            input.features
-        )
-        return output
--- a/PyTorch/sparseconvnet/maxPooling.py
+++ b/PyTorch/sparseconvnet/maxPooling.py
@@ -101,17 +101,17 @@ class MaxPooling(Module):
    def __repr__(self):
        s = 'MaxPooling'
-        if self.pool_size.max() == self.pool_size.min() and\
+        if self.pool_size.max().item() == self.pool_size.min().item() and\
-                self.pool_stride.max() == self.pool_stride.min():
+                self.pool_stride.max().item() == self.pool_stride.min().item():
            s = s + str(self.pool_size[0].item()) + \
                '/' + str(self.pool_stride[0].item())
        else:
            s = s + '(' + str(self.pool_size[0].item())
            for i in self.pool_size[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')/(' + str(self.pool_stride[0].item())
            for i in self.pool_stride[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')'
        if self.nFeaturesToDrop > 0:

--- a/PyTorch/sparseconvnet/sparseConvNetTensor.py
+++ b/PyTorch/sparseconvnet/sparseConvNetTensor.py
@@ -16,7 +16,7 @@ class SparseConvNetTensor(object):
        self.metadata = metadata
        self.spatial_size = spatial_size
-    def getSpatialLocations(self, spatial_size=None):
+    def get_spatial_locations(self, spatial_size=None):
        "Coordinates and batch index for the active spatial locations"
        if spatial_size is None:
            spatial_size = self.spatial_size
@@ -51,7 +51,10 @@ class SparseConvNetTensor(object):
    def __repr__(self):
        return 'SparseConvNetTensor<<' + \
-            repr(self.features) + repr(self.metadata) + repr(self.spatial_size) + '>>'
+            repr(self.features) + \
+            repr(self.get_spatial_locations() if self.metadata else None) + \
+            repr(self.spatial_size) + \
+            '>>'
    def to_variable(self, requires_grad=False, volatile=False):
        "Convert self.features to a variable for use with modern PyTorch interface."

--- a/PyTorch/sparseconvnet/submanifoldConvolution.py
+++ b/PyTorch/sparseconvnet/submanifoldConvolution.py
@@ -118,7 +118,7 @@ class SubmanifoldConvolution(Module):
        else:
            s = s + '(' + str(self.filter_size[0].item())
            for i in self.filter_size[1:]:
-                s = s + ',' + str(i)
+                s = s + ',' + str(i.item())
            s = s + ')'
        return s

--- a/examples/Assamese_handwriting/data.py
+++ b/examples/Assamese_handwriting/data.py
@@ -19,7 +19,7 @@ if not os.path.exists('pickle/'):
    import process
-def train(spatial_size, Scale, precomputeStride):
+def train(spatial_size, Scale, precomputeSize):
    d = pickle.load(open('pickle/train.pickle', 'rb'))
    print('Replicating training set 10 times (1 epoch = 10 iterations through the training set = 10x6588 training samples)')
    for i in range(9):
@@ -82,7 +82,7 @@ def train(spatial_size, Scale, precomputeStride):
                #         p[1]=math.floor(y1*j+y2*(1-j))
                #         inp.setLocation(p,v,False)
                ###############################################################
-        inp.precomputeMetadata(precomputeStride)
+        inp.precomputeMetadata(precomputeSize)
        return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
    bd = torchnet.dataset.BatchDataset(d, 108, perm=perm, merge=merge)
    tdi = scn.threadDatasetIterator(bd)
@@ -93,7 +93,7 @@ def train(spatial_size, Scale, precomputeStride):
    return iter
-def val(spatial_size, Scale, precomputeStride):
+def val(spatial_size, Scale, precomputeSize):
    d = pickle.load(open('pickle/test.pickle', 'rb'))
    d = torchnet.dataset.ListDataset(d)
    randperm = torch.randperm(len(d))
@@ -117,7 +117,7 @@ def val(spatial_size, Scale, precomputeStride):
                    inp.metadata.ffi,
                    inp.features,
                    stroke)
-        inp.precomputeMetadata(precomputeStride)
+        inp.precomputeMetadata(precomputeSize)
        return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
    bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge)
    tdi = scn.threadDatasetIterator(bd)

--- a/examples/Chinese_handwriting/data.py
+++ b/examples/Chinese_handwriting/data.py
@@ -25,7 +25,7 @@ if not os.path.exists('pickle/'):
    os.system('python readPotFiles2.py')
-def train(spatial_size, Scale, precomputeStride):
+def train(spatial_size, Scale, precomputeSize):
    d = pickle.load(open('pickle/train.pickle', 'rb'))
    d = torchnet.dataset.ListDataset(d)
    randperm = torch.randperm(len(d))
@@ -68,7 +68,7 @@ def train(spatial_size, Scale, precomputeStride):
                #         p[1]=math.floor(y1*j+y2*(1-j))
                #         inp.setLocation(p,v,False)
                ###############################################################
-        inp.precomputeMetadata(precomputeStride)
+        inp.precomputeMetadata(precomputeSize)
        return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
    bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
    tdi = scn.threadDatasetIterator(bd)
@@ -79,7 +79,7 @@ def train(spatial_size, Scale, precomputeStride):
    return iter
-def val(spatial_size, Scale, precomputeStride):
+def val(spatial_size, Scale, precomputeSize):
    d = pickle.load(open('pickle/test.pickle', 'rb'))
    d = torchnet.dataset.ListDataset(d)
    randperm = torch.randperm(len(d))
@@ -103,7 +103,7 @@ def val(spatial_size, Scale, precomputeStride):
                    inp.metadata.ffi,
                    inp.features,
                    stroke)
-        inp.precomputeMetadata(precomputeStride)
+        inp.precomputeMetadata(precomputeSize)
        return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
    bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
    tdi = scn.threadDatasetIterator(bd)