detach; batch_size; data_ptr

2ad7baf8 · Benjamin Thomas Graham · 16e4df34 · 2ad7baf8 · 2ad7baf8 · 2ad7baf8
Commit 2ad7baf8 authored Oct 21, 2019 by Benjamin Thomas Graham
20 changed files
--- a/sparseconvnet/SCN/CPU/ActivePooling.cpp
+++ b/sparseconvnet/SCN/CPU/ActivePooling.cpp
@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput(
  output_features.resize_({batchSize, nPlanes});
  output_features.zero_();

-  ActivePooling_ForwardPass<T>(input_features.data<T>(),
-                               output_features.data<T>(), batchSize, maxActive,
+  ActivePooling_ForwardPass<T>(input_features.data_ptr<T>(),
+                               output_features.data_ptr<T>(), batchSize, maxActive,
                               nPlanes, _rules, average);
 }

@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  ActivePooling_BackwardPass<T>(d_input_features.data<T>(),
-                                d_output_features.data<T>(), batchSize,
+  ActivePooling_BackwardPass<T>(d_input_features.data_ptr<T>(),
+                                d_output_features.data_ptr<T>(), batchSize,
                                maxActive, nPlanes, _rules, average);
 }
--- a/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
+++ b/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
    /*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) {
  output_features.resize_({input_features.size(0), convWeight.size(1)});
  AffineReluTrivialConvolution_ForwardPass(
-      input_features.data<T>(), convWeight.size(0), input_features.stride(0),
-      output_features.data<T>(), convWeight.size(1), output_features.stride(0),
-      affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(),
+      input_features.data_ptr<T>(), convWeight.size(0), input_features.stride(0),
+      output_features.data_ptr<T>(), convWeight.size(1), output_features.stride(0),
+      affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
      input_features.size(0));
  return input_features.size(0) * input_features.size(1) *
         output_features.size(1);
@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward(

  d_input_features.resize_as_(input_features);
  AffineReluTrivialConvolution_BackwardPass(
-      input_features.data<T>(), d_input_features.data<T>(), convWeight.size(0),
-      input_features.stride(0), d_output_features.data<T>(), convWeight.size(1),
-      d_output_features.stride(0), affineWeight.data<T>(),
-      d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(),
-      convWeight.data<T>(), d_convWeight.data<T>(), input_features.size(0),
+      input_features.data_ptr<T>(), d_input_features.data_ptr<T>(), convWeight.size(0),
+      input_features.stride(0), d_output_features.data_ptr<T>(), convWeight.size(1),
+      d_output_features.stride(0), affineWeight.data_ptr<T>(),
+      d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
+      convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), input_features.size(0),
      additiveGrad);
 }
--- a/sparseconvnet/SCN/CPU/AveragePooling.cpp
+++ b/sparseconvnet/SCN/CPU/AveragePooling.cpp
@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput(
  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
  output_features.zero_();

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto oF = output_features.data_ptr<T>();

  for (const auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  auto diF = d_input_features.data<T>() + nFeaturesToDrop;
-  auto doF = d_output_features.data<T>();
+  auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto doF = d_output_features.data_ptr<T>();

  for (const auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
                                         at::Tensor &Context) {
  Int nHot = rules.size(0) / 2;
  Int nPlanes = context.size(1);
-  auto iF = context.data<T>();
-  auto oF = Context.data<T>();
-  auto r = rules.data<Int>();
+  auto iF = context.data_ptr<T>();
+  auto oF = Context.data_ptr<T>();
+  auto r = rules.data_ptr<Int>();
  Int outSite;
 #pragma omp parallel for private(outSite)
  for (outSite = 0; outSite < nHot; outSite++) {
@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
                                            at::Tensor &dContext) {
  Int nHot = rules.size(0) / 2;
  Int nPlanes = dcontext.size(1);
-  auto iF = dcontext.data<T>();
-  auto oF = dContext.data<T>();
-  auto r = rules.data<Int>();
+  auto iF = dcontext.data_ptr<T>();
+  auto oF = dContext.data_ptr<T>();
+  auto r = rules.data_ptr<Int>();
  Int outSite;
 #pragma omp parallel for private(outSite)
  for (outSite = 0; outSite < nHot; outSite++) {

--- a/sparseconvnet/SCN/CPU/BatchNormalization.cpp
+++ b/sparseconvnet/SCN/CPU/BatchNormalization.cpp
@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput(
    auto input_stride = input_features.stride(0);
    auto output_stride = output_features.stride(0);
    BatchNormalization_ForwardPass<T>(
-        input_features.data<T>(), output_features.data<T>(), nPlanes,
-        input_stride, output_stride, nActive, saveMean.data<T>(),
-        saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
+        input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
+        input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
+        saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
        OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
        momentum, train, leakiness);
  }
@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward(
    auto input_stride = input_features.stride(0);
    auto output_stride = output_features.stride(0);
    BatchNormalization_BackwardPass<T>(
-        input_features.data<T>(), d_input_features.data<T>(),
-        output_features.data<T>(), d_output_features.data<T>(), nPlanes,
-        input_stride, output_stride, nActive, saveMean.data<T>(),
-        saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
+        input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
+        output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
+        input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
+        saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
        OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
        OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
        leakiness);

--- a/sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
+++ b/sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
  output_features.resize_as_(input_features);
  auto nActive = input_features.size(0);
  auto nPlanes = input_features.size(1);
-  auto iF = input_features.data<T>();
-  auto oF = output_features.data<T>();
-  auto nz = noise.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto oF = output_features.data_ptr<T>();
+  auto nz = noise.data_ptr<T>();
  for (Int row = 0; row < nActive; row++)
    for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
         plane++, o++, i++)
@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
  d_input_features.resize_as_(d_output_features);
  auto nActive = input_features.size(0);
  auto nPlanes = input_features.size(1);
-  auto iF = input_features.data<T>();
-  auto diF = d_input_features.data<T>();
-  auto doF = d_output_features.data<T>();
-  auto nz = noise.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto diF = d_input_features.data_ptr<T>();
+  auto doF = d_output_features.data_ptr<T>();
+  auto nz = noise.data_ptr<T>();
  for (Int row = 0; row < nActive; row++)
    for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
         plane++, o++, i++)

--- a/sparseconvnet/SCN/CPU/Convolution.cpp
+++ b/sparseconvnet/SCN/CPU/Convolution.cpp
@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
                              Int groups) {
  auto planes = src.size(1) / groups;
  auto target = at::empty({groups, nRules, planes}, src.options());
-  auto s_ptr = src.data<T>();
-  auto t_ptr = target.data<T>();
+  auto s_ptr = src.data_ptr<T>();
+  auto t_ptr = target.data_ptr<T>();
 #pragma omp parallel for
  for (Int i = 0; i < nRules; ++i) {
    for (Int g = 0; g < groups; ++g) {
@@ -29,8 +29,8 @@ template <typename T>
 void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules,
                     const Int *rules, Int groups) {
  auto planes = target.size(1) / groups;
-  auto s_ptr = src.data<T>();
-  auto t_ptr = target.data<T>();
+  auto s_ptr = src.data_ptr<T>();
+  auto t_ptr = target.data_ptr<T>();
 #pragma omp parallel for
  for (Int i = 0; i < nRules; ++i) {
    for (Int g = 0; g < groups; ++g) {

--- a/sparseconvnet/SCN/CPU/IOLayers.cpp
+++ b/sparseconvnet/SCN/CPU/IOLayers.cpp
@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({*m.inputNActive, nPlanes});
    output_features.zero_();
-    InputLayer_ForwardPass<T>(input_features.data<T>(),
-                              output_features.data<T>(), nRows, maxActive,
+    InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
+                              output_features.data_ptr<T>(), nRows, maxActive,
                              nPlanes, &rules[1][0], mode == 4);
  }
 }
@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
  } else {
    d_input_features.resize_({rules[0][2], nPlanes});
    d_input_features.zero_();
-    InputLayer_BackwardPass<T>(d_input_features.data<T>(),
-                               d_output_features.data<T>(), nRows, maxActive,
+    InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
+                               d_output_features.data_ptr<T>(), nRows, maxActive,
                               nPlanes, &rules[1][0], mode == 4);
  }
 }
@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({rules[0][2], nPlanes});
    output_features.zero_();
-    InputLayer_BackwardPass<T>(output_features.data<T>(),
-                               input_features.data<T>(), nRows, maxActive,
+    InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
+                               input_features.data_ptr<T>(), nRows, maxActive,
                               nPlanes, &rules[1][0], false);
  }
 }
@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
  } else {
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
-    InputLayer_ForwardPass<T>(d_output_features.data<T>(),
-                              d_input_features.data<T>(), nRows, maxActive,
+    InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
+                              d_input_features.data_ptr<T>(), nRows, maxActive,
                              nPlanes, &rules[1][0], false);
  }
 }
@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({*m.inputNActive, nPlanes});
    output_features.zero_();
-    InputLayer_ForwardPass<T>(input_features.data<T>(),
-                              output_features.data<T>(), nRows, maxActive,
+    InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
+                              output_features.data_ptr<T>(), nRows, maxActive,
                              nPlanes, &rules[1][0], mode == 4);
  }
 }
@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
  } else {
    d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
    d_input_features.zero_();
-    InputLayer_BackwardPass<T>(d_input_features.data<T>(),
-                               d_output_features.data<T>(), nRows, maxActive,
+    InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
+                               d_output_features.data_ptr<T>(), nRows, maxActive,
                               nPlanes, &rules[1][0], mode == 4);
  }
 }
@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
  } else {
    output_features.resize_({rules[0][2], rules[0][3], nPlanes});
    output_features.zero_();
-    InputLayer_BackwardPass<T>(output_features.data<T>(),
-                               input_features.data<T>(), nRows, maxActive,
+    InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
+                               input_features.data_ptr<T>(), nRows, maxActive,
                               nPlanes, &rules[1][0], false);
  }
 }
@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput(
  } else {
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
-    InputLayer_ForwardPass<T>(d_output_features.data<T>(),
-                              d_input_features.data<T>(), nRows, maxActive,
+    InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
+                              d_input_features.data_ptr<T>(), nRows, maxActive,
                              nPlanes, &rules[1][0], false);
  }
 }
--- a/sparseconvnet/SCN/CPU/LeakyReLU.cpp
+++ b/sparseconvnet/SCN/CPU/LeakyReLU.cpp
@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
                                /*float*/ at::Tensor &output_features,
                                T alpha) {
  output_features.resize_as_(input_features);
-  auto iF = input_features.data<T>();
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto oF = output_features.data_ptr<T>();
  auto n = input_features.numel();

  for (Int i = 0; i < n; i++) {
@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
                                   /*float*/ at::Tensor &d_output_features,
                                   T alpha) {
  d_input_features.resize_as_(d_output_features);
-  auto iF = input_features.data<T>();
-  auto diF = d_input_features.data<T>();
-  auto doF = d_output_features.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto diF = d_input_features.data_ptr<T>();
+  auto doF = d_output_features.data_ptr<T>();
  auto n = d_input_features.numel();

  for (Int i = 0; i < n; i++) {

--- a/sparseconvnet/SCN/CPU/MaxPooling.cpp
+++ b/sparseconvnet/SCN/CPU/MaxPooling.cpp
@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput(
  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
  output_features.zero_();

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto oF = output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  auto iF = input_features.data<T>();
-  auto oF = output_features.data<T>();
-  auto diF = d_input_features.data<T>();
-  auto doF = d_output_features.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto oF = output_features.data_ptr<T>();
+  auto diF = d_input_features.data_ptr<T>();
+  auto doF = d_output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
  output_features.zero_();

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto oF = output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  auto iF = input_features.data<T>();
-  auto oF = output_features.data<T>();
-  auto diF = d_input_features.data<T>();
-  auto doF = d_output_features.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto oF = output_features.data_ptr<T>();
+  auto diF = d_input_features.data_ptr<T>();
+  auto doF = d_output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;

--- a/sparseconvnet/SCN/CPU/SparseToDense.cpp
+++ b/sparseconvnet/SCN/CPU/SparseToDense.cpp
@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput(
    std::array<long, Dimension + 2> sz;
    sz[0] = m.grids.begin()->second.size(); // batch size
    sz[1] = nPlanes;
-    long *in_sz = inputSize.data<long>();
+    long *in_sz = inputSize.data_ptr<long>();
    for (Int i = 0; i < Dimension; ++i)
      sz[i + 2] = in_sz[i];
    output_features.resize_(sz);
@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput(
  if (input_features.ndimension() == 2) {
    const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
    Int _nPlanes = input_features.size(1);
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    long spatialVolume = inputSize.prod().data<long>()[0];
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    long spatialVolume = inputSize.prod().data_ptr<long>()[0];
    for (auto &r : _rules) {
      Int nHot = r.size() / 2;
      SparseToDense_ForwardPass<T>(iF, oF, _nPlanes, spatialVolume, &r[0],
@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput(
  d_input_features.zero_();
  if (input_features.ndimension() == 2) {
    const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
-    long spatialVolume = inputSize.prod().data<long>()[0];
+    long spatialVolume = inputSize.prod().data_ptr<long>()[0];
    Int _nPlanes = d_input_features.size(1);
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
    for (auto &r : _rules) {
      Int nHot = r.size() / 2;
      SparseToDense_BackwardPass<T>(diF, doF, _nPlanes, spatialVolume, &r[0],

--- a/sparseconvnet/SCN/CPU/UnPooling.cpp
+++ b/sparseconvnet/SCN/CPU/UnPooling.cpp
@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput(
  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
  output_features.zero_();

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto oF = output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;
@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput(
  const auto &_rules =
      m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);

-  auto diF = d_input_features.data<T>() + nFeaturesToDrop;
-  auto doF = d_output_features.data<T>();
+  auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto doF = d_output_features.data_ptr<T>();

  for (auto &r : _rules) {
    Int nHot = r.size() / 2;

--- a/sparseconvnet/SCN/CUDA/ActivePooling.cpp
+++ b/sparseconvnet/SCN/CUDA/ActivePooling.cpp
@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput(
  output_features.resize_({batchSize, nPlanes});
  output_features.zero_();

-  auto iF = input_features.data<T>();
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>();
+  auto oF = output_features.data_ptr<T>();
  ActivePooling_ForwardPass<T>(iF, oF, batchSize, maxActive, nPlanes,
                               &_rules[0][0], average);
 }
@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  auto diF = d_input_features.data<T>();
-  auto doF = d_output_features.data<T>();
+  auto diF = d_input_features.data_ptr<T>();
+  auto doF = d_output_features.data_ptr<T>();

  ActivePooling_BackwardPass<T>(diF, doF, batchSize, maxActive, nPlanes,
                                &_rules[0][0], average);

--- a/sparseconvnet/SCN/CUDA/ActivePooling.cu
+++ b/sparseconvnet/SCN/CUDA/ActivePooling.cu
@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
 			       const Int *rules, bool average) {

  auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
-  Int *rb = rulesBuffer.data<Int>();
+  Int *rb = rulesBuffer.data_ptr<Int>();
  Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
  assert(rowBatchSize > 0);
  Int kernelBlockDim = std::min(nPlanes, (Int)32);
@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
 				Int batchSize, Int maxActive, Int nPlanes,
 				const Int *rules, bool average) {
  auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
-  Int *rb = rulesBuffer.data<Int>();
+  Int *rb = rulesBuffer.data_ptr<Int>();
  Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
  assert(rowBatchSize > 0);
  Int kernelBlockDim = std::min(nPlanes, (Int)32);

--- a/sparseconvnet/SCN/CUDA/AffineReluTrivialConvolution.cpp
+++ b/sparseconvnet/SCN/CUDA/AffineReluTrivialConvolution.cpp
@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput(

  output_features.resize_({input_features.size(0), convWeight.size(1)});
  dAffineReluTrivialConvolution_forward<T>(
-      input_features.data<T>(), output_features.data<T>(),
-      affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(),
+      input_features.data_ptr<T>(), output_features.data_ptr<T>(),
+      affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
      convWeight.size(0), input_features.stride(0), convWeight.size(1),
      output_features.size(1), input_features.size(0));
  return input_features.size(0) * input_features.size(1) *
@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward(

  d_input_features.resize_as_(input_features);
  dAffineReluTrivialConvolution_backward_dW<T>(
-      input_features.data<T>(), d_input_features.data<T>(),
-      d_output_features.data<T>(), affineWeight.data<T>(),
-      d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(),
-      convWeight.data<T>(), d_convWeight.data<T>(), convWeight.size(0),
+      input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
+      d_output_features.data_ptr<T>(), affineWeight.data_ptr<T>(),
+      d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
+      convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), convWeight.size(0),
      input_features.stride(0), convWeight.size(1), d_output_features.stride(0),
      input_features.size(0), additiveGrad);
 }
--- a/sparseconvnet/SCN/CUDA/AveragePooling.cpp
+++ b/sparseconvnet/SCN/CUDA/AveragePooling.cpp
@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput(
  output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
  output_features.zero_();

-  auto iF = input_features.data<T>() + nFeaturesToDrop;
-  auto oF = output_features.data<T>();
+  auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto oF = output_features.data_ptr<T>();
  cuda_AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1),
                                     output_features.size(1), _rules,
                                     _rules.size());
@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput(
  d_input_features.resize_as_(input_features);
  d_input_features.zero_();

-  auto diF = d_input_features.data<T>() + nFeaturesToDrop;
-  auto doF = d_output_features.data<T>();
+  auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
+  auto doF = d_output_features.data_ptr<T>();
  cuda_AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1),
                                      d_output_features.size(1), _rules,
                                      _rules.size());
@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context

  Int nPlanes = context.size(1);
  Int nHot = rules.size(0) / 2;
-  cuda_CopyFeaturesHelper_ForwardPass<T>(context.data<T>(), Context.data<T>(),
-                                         rules.data<Int>(), nPlanes, nHot);
+  cuda_CopyFeaturesHelper_ForwardPass<T>(context.data_ptr<T>(), Context.data_ptr<T>(),
+                                         rules.data_ptr<Int>(), nPlanes, nHot);
 }

 template <typename T>
@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
  Int nPlanes = dcontext.size(1);
  Int nHot = rules.size(0) / 2;
  cuda_CopyFeaturesHelper_BackwardPass<T>(
-      dcontext.data<T>(), dContext.data<T>(), rules.data<Int>(), nPlanes, nHot);
+      dcontext.data_ptr<T>(), dContext.data_ptr<T>(), rules.data_ptr<Int>(), nPlanes, nHot);
 }
--- a/sparseconvnet/SCN/CUDA/BatchNormalization.cpp
+++ b/sparseconvnet/SCN/CUDA/BatchNormalization.cpp
@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput(
    auto nPlanes = input_features.size(1);
    auto input_stride = input_features.stride(0);
    auto output_stride = output_features.stride(0);
-    bn_f(input_features.data<T>(), output_features.data<T>(), nPlanes,
-         input_stride, output_stride, nActive, saveMean.data<T>(),
-         saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
+    bn_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
+         input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
+         saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
         OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
         momentum, train, leakiness);
  }
@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward(
    auto nPlanes = input_features.size(1);
    auto input_stride = input_features.stride(0);
    auto output_stride = output_features.stride(0);
-    bn_b(input_features.data<T>(), d_input_features.data<T>(),
-         output_features.data<T>(), d_output_features.data<T>(), nPlanes,
-         input_stride, output_stride, nActive, saveMean.data<T>(),
-         saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
+    bn_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
+         output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
+         input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
+         saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
         OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
         OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
         leakiness);

--- a/sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp
+++ b/sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp
@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
  output_features.resize_as_(input_features);
  auto nActive = input_features.size(0);
  auto nPlanes = input_features.size(1);
-  bmd_f(input_features.data<T>(), output_features.data<T>(), noise.data<T>(),
+  bmd_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), noise.data_ptr<T>(),
        nActive, nPlanes, alpha);
 }

@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
  d_input_features.resize_as_(d_output_features);
  auto nActive = input_features.size(0);
  auto nPlanes = input_features.size(1);
-  bmd_b(input_features.data<T>(), d_input_features.data<T>(),
-        d_output_features.data<T>(), noise.data<T>(), nActive, nPlanes, alpha);
+  bmd_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
+        d_output_features.data_ptr<T>(), noise.data_ptr<T>(), nActive, nPlanes, alpha);
 }
--- a/sparseconvnet/SCN/CUDA/Convolution.cpp
+++ b/sparseconvnet/SCN/CUDA/Convolution.cpp
@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput(
  output_features.resize_({nActiveOut, op * nGroups});

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
    else
      output_features.zero_();

@@ -76,17 +76,17 @@ void cuda_Convolution_backward(
  d_input_features.zero_();

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
                                 op, op * nGroups, nGroups);

    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActiveOut);
    }
  }
@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput(
  output_features.resize_({nActive, op * nGroups});

  if (nActive) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActive);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
    else
      output_features.zero_();

@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward(
  d_input_features.zero_();

  if (nActive) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
                                 op, op * nGroups, nGroups);

    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActive);
    }
  }
@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
  output_features.resize_({nActive, op * nGroups});

  if (nActive) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActive);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
    else
      output_features.zero_();

@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
  d_input_features.zero_();

  if (nActive) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
                                 op, op * nGroups, nGroups);

    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActive);
    }
  }
@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput(
  output_features.resize_({nActiveOut, op * nGroups});

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
    else
      output_features.zero_();

@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward(
  d_input_features.zero_();

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
                                 op, op * nGroups, nGroups);

    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActiveOut);
    }
  }
@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput(
  output_features.resize_({nActiveOut, op * nGroups});

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
    else
      output_features.zero_();

@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward(
  d_input_features.zero_();

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
                                 op, op * nGroups, nGroups);

    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActiveOut);
    }
  }

--- a/sparseconvnet/SCN/CUDA/Deconvolution.cpp
+++ b/sparseconvnet/SCN/CUDA/Deconvolution.cpp
@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput(
  output_features.resize_({nActiveOut, op * nGroups});

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    auto w = weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();

    if (bias.numel())
-      Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
+      Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
    else
      output_features.zero_();

@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward(
  d_input_features.zero_();

  if (nActiveOut) {
-    auto iF = input_features.data<T>();
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    auto w = weight.data<T>();
-    auto dw = d_weight.data<T>();
+    auto iF = input_features.data_ptr<T>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    auto w = weight.data_ptr<T>();
+    auto dw = d_weight.data_ptr<T>();

    dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip,
                                   ip * nGroups, op, op * nGroups, nGroups);
    if (d_bias.numel()) {
-      auto db = d_bias.data<T>();
+      auto db = d_bias.data_ptr<T>();
      Convolution_bp_bias(doF, db, op, nActiveOut);
    }
  }

--- a/sparseconvnet/SCN/CUDA/IOLayers.cpp
+++ b/sparseconvnet/SCN/CUDA/IOLayers.cpp
@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
    output_features.resize_({*m.inputNActive, nPlanes});
    output_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                     mode == 4);
  }
@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput(
    d_input_features.resize_({rules[0][2], nPlanes});
    d_input_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                  mode == 4);
  }
@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
    output_features.resize_({rules[0][2], nPlanes});
    output_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
  }
 }
@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput(
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                     false);
  }
@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
    output_features.resize_({*m.inputNActive, nPlanes});
  } else {
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                     mode == 4);
  }
@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput(
    d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
    d_input_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                  mode == 4);
  }
@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput(
    output_features.resize_({rules[0][2], rules[0][3], nPlanes});
    output_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto iF = input_features.data<T>();
-    auto oF = output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto iF = input_features.data_ptr<T>();
+    auto oF = output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
  }
 }
@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput(
    d_input_features.resize_({nRows, nPlanes});
    d_input_features.zero_();
    auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
-    auto diF = d_input_features.data<T>();
-    auto doF = d_output_features.data<T>();
-    Int *rb = rulesBuffer.data<Int>();
+    auto diF = d_input_features.data_ptr<T>();
+    auto doF = d_output_features.data_ptr<T>();
+    Int *rb = rulesBuffer.data_ptr<Int>();
    InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
                     false);
  }