Fix CPU IOLayers

19aca522 · Benjamin Thomas Graham · c9e5e6cd · 19aca522 · 19aca522
Commit 19aca522 authored Sep 19, 2018 by Benjamin Thomas Graham
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 9 deletions

sparseconvnet/SCN/CPU/IOLayers.cpp sparseconvnet/SCN/CPU/IOLayers.cpp +4 -4

sparseconvnet/SCN/CUDA/RuleBookIterator.h sparseconvnet/SCN/CUDA/RuleBookIterator.h +5 -5

No files found.
--- a/sparseconvnet/SCN/CPU/IOLayers.cpp
+++ b/sparseconvnet/SCN/CPU/IOLayers.cpp
@@ -15,10 +15,10 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
  Int row;
 #pragma omp parallel for private(row)
  for (row = 0; row < nRows; row++) {
-    auto nActive = rules[0];
+    auto r = rules + row * (1 + maxActive);
+    auto nActive = r[0];
    T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
    auto out_f = output_features + row * nPlanes;
-    auto r = rules + row * (1 + maxActive);
    for (Int i = 1; i <= nActive; ++i) {
      auto in_f = input_features + r[i] * nPlanes;
      for (Int plane = 0; plane < nPlanes; plane++) {
@@ -34,10 +34,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
  Int row;
 #pragma omp parallel for private(row)
  for (row = 0; row < nRows; row++) {
-    auto nActive = rules[0];
+    auto r = rules + row * (1 + maxActive);
+    auto nActive = r[0];
    T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
    auto d_out_f = d_output_features + row * nPlanes;
-    auto r = rules + row * (1 + maxActive);
    for (Int i = 1; i <= nActive; ++i) {
      auto d_in_f = d_input_features + r[i] * nPlanes;
      for (Int plane = 0; plane < nPlanes; plane++)

--- a/sparseconvnet/SCN/CUDA/RuleBookIterator.h
+++ b/sparseconvnet/SCN/CUDA/RuleBookIterator.h
@@ -14,16 +14,16 @@
 #define RULEBOOKITERATOR(X, Y)                                                 \
  {                                                                            \
-    Int rbMaxSize = 0;                                                        \
+    Int rbMaxSize = 0;                                                         \
    for (auto &r : _rules)                                                     \
-      rbMaxSize = std::max(rbMaxSize, (Int)r.size());                         \
+      rbMaxSize = std::max(rbMaxSize, (Int)r.size());                          \
    at::Tensor rulesBuffer = at::CUDA(at_kINT).tensor({rbMaxSize});            \
-    Int *rbB = rulesBuffer.data<Int>();                                       \
+    Int *rbB = rulesBuffer.data<Int>();                                        \
    for (int k = 0; k < _rules.size(); ++k) {                                  \
      auto &r = _rules[k];                                                     \
-      Int nHotB = r.size() / 2;                                               \
+      Int nHotB = r.size() / 2;                                                \
      if (nHotB) {                                                             \
-        cudaMemcpy(rbB, &r[0], sizeof(Int) * 2 * nHotB,                       \
+        cudaMemcpy(rbB, &r[0], sizeof(Int) * 2 * nHotB,                        \
                   cudaMemcpyHostToDevice);                                    \
        X                                                                      \
      }                                                                        \