Merge branch 'master' of github.com:facebookresearch/SparseConvNet

aebbb122 · Benjamin Thomas Graham · 8c706edd · e55e3bea · aebbb122 · aebbb122
Commit aebbb122 authored Oct 19, 2018 by Benjamin Thomas Graham
5 changed files
--- a/develop.sh
+++ b/develop.sh
+#!/bin/bash
+rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet_SCN*.so
+python setup.py develop
+python examples/hello-world.py
--- a/sparseconvnet/SCN/CUDA/ActivePooling.cu
+++ b/sparseconvnet/SCN/CUDA/ActivePooling.cu
@@ -6,8 +6,8 @@
 template <typename T>
 __global__ void ActivePooling_fp(T *input_features, T *output_features,
-                                 Int maxActive, Int nPlanes, Int *rules,
+				 Int maxActive, Int nPlanes, Int *rules,
-                                 bool average) {
+				 bool average) {
  T *out = &output_features[blockIdx.x * nPlanes];
  Int *r = &rules[blockIdx.x * (maxActive + 1)];
  Int nActive = *r++;
@@ -20,10 +20,10 @@ __global__ void ActivePooling_fp(T *input_features, T *output_features,
 }
 template <typename T>
 void ActivePooling_ForwardPass(T *input_features, T *output_features,
-                               Int batchSize, Int maxActive, Int nPlanes,
+			       Int batchSize, Int maxActive, Int nPlanes,
-                               Int *rules, bool average) {
+			       Int *rules, bool average) {
-  auto rulesBuffer = at::CUDA(at_kINT).tensor({1 << 22});
+  auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
  Int *rb = rulesBuffer.data<Int>();
  Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
  assert(rowBatchSize > 0);
@@ -32,17 +32,17 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
  for (Int o = 0; o < batchSize; o += rowBatchSize) {
    Int batchSize_ = std::min(rowBatchSize, (Int(batchSize - o)));
    cudaMemcpy(rb, rules + o * (maxActive + 1),
-               sizeof(Int) * (maxActive + 1) * batchSize_,
+	       sizeof(Int) * (maxActive + 1) * batchSize_,
-               cudaMemcpyHostToDevice);
+	       cudaMemcpyHostToDevice);
    ActivePooling_fp<T><<<batchSize_, kernelBlockDim>>>(
-        input_features, output_features + 0 * nPlanes, maxActive, nPlanes,
+	input_features, output_features + 0 * nPlanes, maxActive, nPlanes,
-        rules, average);
+	rules, average);
  }
 }
 template <typename T>
 __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
-                                 Int maxActive, Int nPlanes, Int *rules,
+				 Int maxActive, Int nPlanes, Int *rules,
-                                 bool average) {
+				 bool average) {
  T *out = &d_output_features[blockIdx.x * nPlanes];
  Int *r = &rules[blockIdx.x * (maxActive + 1)];
  Int nActive = *r++;
@@ -56,9 +56,9 @@ __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
 template <typename T>
 void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
-                                Int batchSize, Int maxActive, Int nPlanes,
+				Int batchSize, Int maxActive, Int nPlanes,
-                                Int *rules, bool average) {
+				Int *rules, bool average) {
-  auto rulesBuffer = at::CUDA(at_kINT).tensor({1 << 22});
+  auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
  Int *rb = rulesBuffer.data<Int>();
  Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
  assert(rowBatchSize > 0);
@@ -67,10 +67,10 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
  for (Int o = 0; o < batchSize; o += rowBatchSize) {
    Int batchSize_ = std::min(rowBatchSize, (Int(batchSize - o)));
    cudaMemcpy(rb, rules + o * (maxActive + 1),
-               sizeof(Int) * (maxActive + 1) * batchSize_,
+	       sizeof(Int) * (maxActive + 1) * batchSize_,
-               cudaMemcpyHostToDevice);
+	       cudaMemcpyHostToDevice);
    ActivePooling_bp<T><<<batchSize_, kernelBlockDim>>>(
-        d_input_features, d_output_features + o * nPlanes, maxActive, nPlanes,
+	d_input_features, d_output_features + o * nPlanes, maxActive, nPlanes,
-        rules, average);
+	rules, average);
  }
 }
--- a/sparseconvnet/SCN/CUDA/IOLayers.cpp
+++ b/sparseconvnet/SCN/CUDA/IOLayers.cpp
@@ -33,7 +33,7 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({*m.inputNActive, nPlanes});
    output_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -58,7 +58,7 @@ void cuda_InputLayer_updateGradInput(
  } else {
    d_input_features.resize_({rules[0][2], nPlanes});
    d_input_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -83,7 +83,7 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({rules[0][2], nPlanes});
    output_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -107,7 +107,7 @@ void cuda_OutputLayer_updateGradInput(
  } else {
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -137,7 +137,7 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
    output_features.copy_(input_features);
    output_features.resize_({*m.inputNActive, nPlanes});
  } else {
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -164,7 +164,7 @@ void cuda_BLInputLayer_updateGradInput(
  } else {
    d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
    d_input_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -191,7 +191,7 @@ void cuda_BLOutputLayer_updateOutput(
  } else {
    output_features.resize_({rules[0][2], rules[0][3], nPlanes});
    output_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();
@@ -216,7 +216,7 @@ void cuda_BLOutputLayer_updateGradInput(
  } else {
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
-    auto rulesBuffer = at::CUDA(at_kINT).tensor({(int)rules[1].size()});
+    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
    Int *rb = rulesBuffer.data<Int>();

--- a/sparseconvnet/SCN/CUDA/RuleBookIterator.h
+++ b/sparseconvnet/SCN/CUDA/RuleBookIterator.h
@@ -17,7 +17,7 @@
    Int rbMaxSize = 0;                                                         \
    for (auto &r : _rules)                                                     \
      rbMaxSize = std::max(rbMaxSize, (Int)r.size());                          \
-    at::Tensor rulesBuffer = at::CUDA(at_kINT).tensor({rbMaxSize});            \
+    at::Tensor rulesBuffer = at::empty({rbMaxSize}, at::CUDA(at_kINT));        \
    Int *rbB = rulesBuffer.data<Int>();                                        \
    for (int k = 0; k < _rules.size(); ++k) {                                  \
      auto &r = _rules[k];                                                     \

--- a/sparseconvnet/SCN/Metadata/Metadata.cpp
+++ b/sparseconvnet/SCN/Metadata/Metadata.cpp
@@ -588,13 +588,13 @@ Metadata<dimension>::compareSparseHelper(Metadata<dimension> &mR,
      }
    }
  }
-  at::Tensor cL_ = torch::CPU(at::kLong).tensor({(long)cL.size()});
+  at::Tensor cL_ = at::empty({(long)cL.size()}, at::CPU(at::kLong));
  std::memcpy(cL_.data<long>(), &cL[0], cL.size() * sizeof(long));
-  at::Tensor cR_ = torch::CPU(at::kLong).tensor({(long)cR.size()});
+  at::Tensor cR_ = at::empty({(long)cR.size()}, at::CPU(at::kLong));
  std::memcpy(cR_.data<long>(), &cR[0], cR.size() * sizeof(long));
-  at::Tensor L_ = torch::CPU(at::kLong).tensor({(long)L.size()});
+  at::Tensor L_ = at::empty({(long)L.size()}, at::CPU(at::kLong));
  std::memcpy(L_.data<long>(), &L[0], L.size() * sizeof(long));
-  at::Tensor R_ = torch::CPU(at::kLong).tensor({(long)R.size()});
+  at::Tensor R_ = at::empty({(long)R.size()}, at::CPU(at::kLong));
  std::memcpy(R_.data<long>(), &R[0], R.size() * sizeof(long));
  return {cL_, cR_, L_, R_};
 }