Goodbye THNN. Hello ATen!

2c4ed608 · Benjamin Thomas Graham · 6d4475db · 2c4ed608 · 2c4ed608 · 2c4ed608
Commit 2c4ed608 authored Jun 20, 2018 by Benjamin Thomas Graham
20 changed files
--- a/sparseconvnet/SCN/generic/32bits.h
+++ b/sparseconvnet/SCN/generic/32bits.h
@@ -5,66 +5,58 @@
 // LICENSE file in the root directory of this source tree.

 #include <array>
-#include <tuple>

 // Using 32 bit integers for coordinates and memory calculations.
-// They could be replaced with 64 bit integers.
-// Advantages of 64 bit:
-// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
-// Disadvantages:
-// - larger, and therefore slower, data copies from CPU -> GPU
-// - more device memory needed to store sparseconvnet 'rulebooks'
-// - not really needed until GPUs have >> 32GB RAM

 using Int = int32_t;
-using uInt = uint32_t; // Max value = uInt_MAX used to denote 'non-existent'
-const uInt uInt_MAX = 4294967295; // 2^32-1
-const uInt Int_MAX = 2147483647;  // 2^31-1

 // Point<dimension> is a point in the d-dimensional integer lattice
 // (i.e. square-grid/cubic-grid, ...)
-template <uInt dimension> using Point = std::array<Int, dimension>;
+template <Int dimension> using Point = std::array<Int, dimension>;

-template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
+template <Int dimension>
+Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
  Point<dimension> p;
-  long *td = THLongTensor_data(t);
-  for (int i = 0; i < dimension; i++)
+  long *td = t.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
  return p;
 }
-template <uInt dimension>
-Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
+template <Int dimension>
+Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
+                                           /*long*/ at::Tensor &t1) {
  Point<2 * dimension> p;
  long *td;
-  td = THLongTensor_data(t0);
-  for (int i = 0; i < dimension; i++)
+  td = t0.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
-  td = THLongTensor_data(t1);
-  for (int i = 0; i < dimension; i++)
+  td = t1.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + dimension] = td[i];
  return p;
 }
-template <uInt dimension>
-Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
-                                             THLongTensor *t2) {
+template <Int dimension>
+Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
+                                             /*long*/ at::Tensor &t1,
+                                             /*long*/ at::Tensor &t2) {
  Point<3 * dimension> p;
  long *td;
-  td = THLongTensor_data(t0);
-  for (int i = 0; i < dimension; i++)
+  td = t0.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
-  td = THLongTensor_data(t1);
-  for (int i = 0; i < dimension; i++)
+  td = t1.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + dimension] = td[i];
-  td = THLongTensor_data(t2);
-  for (int i = 0; i < dimension; i++)
+  td = t2.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + 2 * dimension] = td[i];
  return p;
 }

 // FNV Hash function for Point<dimension>
-template <uInt dimension> struct IntArrayHash {
+template <Int dimension> struct IntArrayHash {
  std::size_t operator()(Point<dimension> const &p) const {
-    uInt hash = 16777619;
+    Int hash = 16777619;
    for (auto x : p) {
      hash *= 2166136261;
      hash ^= x;
@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
  }
 };

-#define THCITensor THCudaIntTensor
-#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
+#define at_kINT at::kInt
--- a/sparseconvnet/SCN/generic/64bits.h
+++ b/sparseconvnet/SCN/generic/64bits.h
@@ -5,66 +5,58 @@
 // LICENSE file in the root directory of this source tree.

 #include <array>
-#include <tuple>

-// Using 32 bit integers for coordinates and memory calculations.
-// They could be replaced with 64 bit integers.
-// Advantages of 64 bit:
-// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
-// Disadvantages:
-// - larger, and therefore slower, data copies from CPU -> GPU
-// - more device memory needed to store sparseconvnet 'rulebooks'
-// - not really needed until GPUs have >> 32GB RAM
+// Using 64 bit integers for coordinates and memory calculations.

 using Int = int64_t;
-using uInt = uint64_t; // Max value = uInt_MAX used to denote 'non-existent'
-const uInt uInt_MAX = 18446744073709551615; // 2^64-1
-const uInt Int_MAX = 9223372036854775807;   // 2^63-1

 // Point<dimension> is a point in the d-dimensional integer lattice
 // (i.e. square-grid/cubic-grid, ...)
-template <uInt dimension> using Point = std::array<Int, dimension>;
+template <Int dimension> using Point = std::array<Int, dimension>;

-template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
+template <Int dimension>
+Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
  Point<dimension> p;
-  long *td = THLongTensor_data(t);
-  for (int i = 0; i < dimension; i++)
+  long *td = t.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
  return p;
 }
-template <uInt dimension>
-Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
+template <Int dimension>
+Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
+                                           /*long*/ at::Tensor &t1) {
  Point<2 * dimension> p;
  long *td;
-  td = THLongTensor_data(t0);
-  for (int i = 0; i < dimension; i++)
+  td = t0.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
-  td = THLongTensor_data(t1);
-  for (int i = 0; i < dimension; i++)
+  td = t1.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + dimension] = td[i];
  return p;
 }
-template <uInt dimension>
-Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
-                                             THLongTensor *t2) {
+template <Int dimension>
+Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
+                                             /*long*/ at::Tensor &t1,
+                                             /*long*/ at::Tensor &t2) {
  Point<3 * dimension> p;
  long *td;
-  td = THLongTensor_data(t0);
-  for (int i = 0; i < dimension; i++)
+  td = t0.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i] = td[i];
-  td = THLongTensor_data(t1);
-  for (int i = 0; i < dimension; i++)
+  td = t1.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + dimension] = td[i];
-  td = THLongTensor_data(t2);
-  for (int i = 0; i < dimension; i++)
+  td = t2.data<long>();
+  for (Int i = 0; i < dimension; i++)
    p[i + 2 * dimension] = td[i];
  return p;
 }

 // FNV Hash function for Point<dimension>
-template <uInt dimension> struct IntArrayHash {
+template <Int dimension> struct IntArrayHash {
  std::size_t operator()(Point<dimension> const &p) const {
-    uInt hash = 14695981039346656037;
+    Int hash = -3750763034362895579; // 14695981039346656037;
    for (auto x : p) {
      hash *= 1099511628211;
      hash ^= x;
@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
  }
 };

-#define THCITensor THCudaLongTensor
-#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
+#define at_kINT at::kLong
--- a/sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
+++ b/sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
@@ -6,7 +6,6 @@

 #ifndef ACTIVEPOOLING_H
 #define ACTIVEPOOLING_H
-#include "../SparseConvNet.h"

 // Return the maximum number of active sites in the batch
 // rules has size 1.
@@ -14,14 +13,14 @@
 // First column is number of active sites for that sample (<= maxActive)
 // Remaining maxActive columns give the active sites, zero padded.

-template <uInt dimension>
+template <Int dimension>
 void activePoolingRules(SparseGrids<dimension> &SGs, RuleBook &rules) {
  rules.clear();
  rules.resize(2);
  auto &r = rules[0];
-  uInt maxActive = 0;
+  Int maxActive = 0;
  for (auto &sg : SGs)
-    maxActive = std::max(maxActive, (uInt)sg.mp.size());
+    maxActive = std::max(maxActive, (Int)sg.mp.size());
  for (auto &sg : SGs) {
    r.push_back(sg.mp.size());
    for (auto &iter : sg.mp)

--- a/sparseconvnet/SCN/generic/Geometry/ConvolutionRules.h
+++ b/sparseconvnet/SCN/generic/Geometry/ConvolutionRules.h
@@ -8,7 +8,7 @@
 #define CONVOLUTIONRULES_H
 #include "RectangularRegions.h"

-template <uInt dimension>
+template <Int dimension>
 void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
                                           SparseGrid<dimension> &outputGrid,
                                           RuleBook &rules, long *size,
@@ -17,10 +17,11 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
  rules.resize(volume<dimension>(size));

  for (auto const &inIter : inputGrid.mp) {
-    for (auto j : OutputRegionCalculator<dimension>(inIter.first, size, stride,
-                                                    outputSpatialSize)) {
+    auto outRegion = OutputRegionCalculator<dimension>(
+        inIter.first, size, stride, outputSpatialSize);
+    for (auto j : outRegion) {
      auto inRegion = InputRegionCalculator<dimension>(j, size, stride);
-      uInt rulesOffset = inRegion.offset(inIter.first);
+      Int rulesOffset = inRegion.offset(inIter.first);
      auto outIter = outputGrid.mp.find(j);
      if (outIter == outputGrid.mp.end()) {
        outIter =
@@ -32,8 +33,8 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
  }
 }

-template <uInt dimension>
-uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
+template <Int dimension>
+Int Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
                                            SparseGrids<dimension> &output_SGs,
                                            RuleBook &rules, long *filterSize,
                                            long *filterStride,
@@ -41,10 +42,10 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
                                            long *output_spatialSize) {
  rules.clear();
  output_SGs.clear();
-  uInt batchSize = input_SGs.size();
+  Int batchSize = input_SGs.size();
  output_SGs.resize(batchSize);
-  uInt output_nActive = 0;
-  for (uInt i = 0; i < batchSize; i++) {
+  Int output_nActive = 0;
+  for (Int i = 0; i < batchSize; i++) {
    auto &iSG = input_SGs[i];
    auto &oSG = output_SGs[i];
    oSG.ctr = output_nActive;
@@ -57,43 +58,43 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
  return output_nActive;
 }

-template <uInt dimension>
-uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
+template <Int dimension>
+Int Convolution_InputSgsToRulesAndOutputSgs_OMP(
    SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
    RuleBook &rules, long *filterSize, long *filterStride,
    long *input_spatialSize, long *output_spatialSize) {
  rules.clear();
  rules.resize(volume<dimension>(filterSize));
  output_SGs.clear();
-  uInt batchSize = input_SGs.size();
+  Int batchSize = input_SGs.size();
  output_SGs.resize(batchSize);
  std::vector<RuleBook> rbs(batchSize);
  {
-    uInt i;
+    Int i;
 #pragma omp parallel for private(i)
    for (i = 0; i < batchSize; i++)
      Convolution_InputSgToRulesAndOutputSg<dimension>(
          input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
          input_spatialSize, output_spatialSize);
  }
-  uInt output_nActive = 0;
-  for (uInt i = 0; i < batchSize; i++) {
+  Int output_nActive = 0;
+  for (Int i = 0; i < batchSize; i++) {
    // Parallel assignment:
    // output_nActive     <-  output_nActive+output_SGs[i].ctr
    // output_SGs[i].ctr  <-  output_nActive
-    uInt tmp = output_nActive;
+    Int tmp = output_nActive;
    output_nActive += output_SGs[i].ctr;
    output_SGs[i].ctr = tmp;
  }
  {
-    uInt i;
+    Int i;
 #pragma omp parallel for private(i)
-    for (i = 0; i < rules.size(); i++) {
+    for (i = 0; i < (Int)rules.size(); i++) {
      auto &R = rules[i];
-      for (uInt j = 0; j < batchSize; j++) {
+      for (Int j = 0; j < batchSize; j++) {
        auto &r = rbs[j][i];
        auto offset = output_SGs[j].ctr;
-        for (uInt k = 0; k < r.size();) {
+        for (Int k = 0; k < (Int)r.size();) {
          R.push_back(r[k++]);
          R.push_back(r[k++] + offset);
        }
@@ -105,19 +106,19 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(

 // for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset)
 // triples
-template <uInt dimension>
+template <Int dimension>
 void SparseToDense_InputSgsToRulesAndOutputSgs(
    SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
-  uInt batchSize = input_SGs.size();
+  Int batchSize = input_SGs.size();
  rules.clear();
  rules.resize(batchSize);
  Point<dimension> lb, ub;