fixes

ecc6e480 · Benjamin Thomas Graham · e488fe04 · ecc6e480 · ecc6e480 · e488fe04
Commit ecc6e480 authored Mar 15, 2019 by Benjamin Thomas Graham
9 changed files
--- a/examples/3d_segmentation/fully_convolutional.py
+++ b/examples/3d_segmentation/fully_convolutional.py
@@ -112,7 +112,7 @@ def iou(stats):
            # loop over CAD models
            for k in range(len(pred)):
                p = pred[k]
-                iou_per_part[k, j] = (inter(p, gt[k], j+1) + eps) / (union(p, gt[k], j+1) + eps)
+                iou_per_part[k, j] = (inter(p, gt[k], j) + eps) / (union(p, gt[k], j) + eps)
        # average over CAD models and parts
        iou_all[i] = np.mean(iou_per_part)
    # weighted average over categories

--- a/examples/3d_segmentation/unet.py
+++ b/examples/3d_segmentation/unet.py
@@ -112,7 +112,7 @@ def iou(stats):
            # loop over CAD models
            for k in range(len(pred)):
                p = pred[k]
-                iou_per_part[k, j] = (inter(p, gt[k], j+1) + eps) / (union(p, gt[k], j+1) + eps)
+                iou_per_part[k, j] = (inter(p, gt[k], j) + eps) / (union(p, gt[k], j) + eps)
        # average over CAD models and parts
        iou_all[i] = np.mean(iou_per_part)
    # weighted average over categories

--- a/examples/nyu2/data/prepare_data.py
+++ b/examples/nyu2/data/prepare_data.py
-import numpy as np
-import torch
-import glob, math, os
-import scipy.io
-import h5py
-import pickle
-classes = [
-'wall', 'floor', 'cabinet', 'bed',
-'chair', 'sofa', 'table', 'door',
-'window', 'bookshelf', 'picture', 'counter',
-'blinds', 'desk', 'shelves', 'curtain',
-'dresser', 'pillow', 'mirror', 'floor mat',
-'clothes', 'ceiling', 'books', 'refridgerator',
-'television', 'paper', 'towel', 'shower curtain',
-'box', 'whiteboard', 'person', 'night stand',
-'toilet', 'sink', 'lamp', 'bathtub',
-'bag', 'otherstructure', 'otherfurniture', 'otherprop']
-corresponding_classes_in_Silberman_labeling = [40, 40,  3, 22,  5, 40, 12, 38, 40, 40,  2, 39, 40, 40, 26, 40, 24,
-        40,  7, 40,  1, 40, 40, 34, 38, 29, 40,  8, 40, 40, 40, 40, 38, 40,
-        40, 14, 40, 38, 40, 40, 40, 15, 39, 40, 30, 40, 40, 39, 40, 39, 38,
-        40, 38, 40, 37, 40, 38, 38,  9, 40, 40, 38, 40, 11, 38, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 13, 40, 40,  6, 40, 23,
-        40, 39, 10, 16, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 38, 40, 39, 40, 40, 40, 40, 39, 38, 40, 40, 40, 40, 40, 40, 18,
-        40, 40, 19, 28, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 27, 36,
-        40, 40, 40, 40, 21, 40, 20, 35, 40, 40, 40, 40, 40, 40, 40, 40, 38,
-        40, 40, 40,  4, 32, 40, 40, 39, 40, 39, 40, 40, 40, 40, 40, 17, 40,
-        40, 25, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 38, 40, 40, 39, 40, 39,
-        40, 38, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 38,
-        40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        38, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 39, 40, 40, 40, 38, 40, 40, 39, 40, 40, 38, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 31, 40, 40, 40, 40, 40, 40, 40, 38, 40,
-        40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 39, 40,
-        40, 39, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 39, 40,
-        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        38, 39, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 38,
-        40, 40, 40, 38, 40, 39, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        39, 39, 40, 40, 39, 39, 40, 40, 40, 40, 38, 40, 40, 38, 39, 39, 40,
-        39, 40, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40,
-        38, 40, 39, 40, 40, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 39,
-        39, 40, 40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39,
-        40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 39, 40, 40, 39, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 40,
-        40, 40, 40, 40, 39, 38, 39, 40, 38, 39, 40, 39, 40, 39, 40, 40, 40,
-        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 38, 40, 40, 39, 40, 40,
-        40, 39, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 38,
-        40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 38, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 38, 38, 40, 40, 40, 38,
-        40, 40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 38, 40, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-        40, 40, 40, 40, 39, 40, 39, 40, 40, 40, 40, 38, 38, 40, 40, 40, 38,
-        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
-        39, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39, 39, 40,
-        40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
-        40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38,
-        40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 38, 40, 39, 40, 40, 40, 40,
-        38, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
-        40, 40, 40, 40, 40, 40, 40, 39, 40, 40]
-print(len(classes),len(corresponding_classes_in_Silberman_labeling))
-split=scipy.io.loadmat('splits.mat')['testNdxs']-1 # 0-index
-testIdxs=[x for x in range(1449) if x in split]
-trainIdxs=[x for x in range(1449) if x not in split]
-print(len(trainIdxs),len(testIdxs))
-f = h5py.File('nyu_depth_v2_labeled.mat','r')
-for i,x in enumerate(trainIdxs):
-    tc=f.get('images')[x]
-    td=f.get('depths')[x]*100
-    td-=td.mean()
-    print(td.std())
-    gt=np.array(f.get('labels')[x],dtype='int16')-1
-    coords=[]
-    col=[]
-    cl=[]
-    for x in range(40,600):
-        for y in range(45,470):
-            if gt[x,y]>=0:
-                cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]]-1)
-                coords.append([x-320,y-240,td[x,y]])
-                col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
-    coords=np.array(coords,dtype='int16')
-    col=np.array(col,dtype='uint8')
-    cl=np.array(cl,dtype='int8')
-    print(coords.shape,col.shape,cl.shape)
-    pickle.dump([coords,col,cl],open('train'+str(i)+'.pickle','wb'),protocol=pickle.HIGHEST_PROTOCOL)
-f = h5py.File('nyu_depth_v2_labeled.mat','r')
-for i,x in enumerate(testIdxs):
-    tc=f.get('images')[x]
-    td=f.get('depths')[x]*100
-    td-=td.mean()
-    print(td.std())
-    gt=np.array(f.get('labels')[x],dtype='int16')-1
-    coords=[]
-    col=[]
-    cl=[]
-    for x in range(40,600):
-        for y in range(45,470):
-            if gt[x,y]>=0:
-                cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]]-1)
-                coords.append([x-320,y-240,td[x,y]])
-                col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
-    coords=np.array(coords,dtype='int16')
-    col=np.array(col,dtype='uint8')
-    cl=np.array(cl,dtype='int8')
-    print(coords.shape,col.shape,cl.shape)
-    pickle.dump([coords,col,cl],open('test'+str(i)+'.pickle','wb'),protocol=pickle.HIGHEST_PROTOCOL)
--- a/sparseconvnet/SCN/CUDA/Convolution.cpp
+++ b/sparseconvnet/SCN/CUDA/Convolution.cpp
@@ -32,11 +32,11 @@ double cuda_Convolution_updateOutput(
  auto _rules =
      m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActiveOut, op});
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActiveOut, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -67,12 +67,12 @@ void cuda_Convolution_backward(
      m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
  Int nActiveIn = m.getNActive(inputSize);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActiveIn, ip});
+  d_input_features.zero_();
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActiveIn, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
@@ -98,11 +98,11 @@ double cuda_SubmanifoldConvolution_updateOutput(
  auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
  Int nActive = m.getNActive(inputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActive, op});
  if (nActive) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActive, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -130,12 +130,12 @@ void cuda_SubmanifoldConvolution_backward(
  auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
  Int nActive = m.getNActive(inputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActive, ip});
+  d_input_features.zero_();
  if (nActive) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActive, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
@@ -160,11 +160,11 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
  auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
  Int nActive = m.getNActive(inputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActive, op});
  if (nActive) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActive, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -191,12 +191,12 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
  auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
  Int nActive = m.getNActive(inputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActive, ip});
+  d_input_features.zero_();
  if (nActive) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActive, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
@@ -225,10 +225,11 @@ double cuda_FullConvolution_updateOutput(
  auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
                                               filterSize, filterStride, mOut);
  Int nActiveOut = mOut.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActiveOut, op});
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActiveOut, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -260,12 +261,12 @@ void cuda_FullConvolution_backward(
                                               filterSize, filterStride, mOut);
  Int nActiveIn = mIn.getNActive(inputSize);
  Int nActiveOut = mOut.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActiveIn, ip});
+  d_input_features.zero_();
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActiveIn, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();
@@ -292,11 +293,11 @@ double cuda_RandomizedStrideConvolution_updateOutput(
  auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
                                              filterStride, true);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActiveOut, op});
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActiveOut, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -327,12 +328,12 @@ void cuda_RandomizedStrideConvolution_backward(
                                              filterStride, true);
  Int nActiveIn = m.getNActive(inputSize);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActiveIn, ip});
+  d_input_features.zero_();
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActiveIn, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();

--- a/sparseconvnet/SCN/CUDA/Deconvolution.cpp
+++ b/sparseconvnet/SCN/CUDA/Deconvolution.cpp
@@ -28,11 +28,11 @@ double cuda_Deconvolution_updateOutput(
  auto _rules =
      m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  output_features.resize_({nActiveOut, op});
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    output_features.resize_({nActiveOut, op});
    auto iF = input_features.data<T>();
    auto oF = output_features.data<T>();
    auto w = weight.data<T>();
@@ -63,12 +63,12 @@ void cuda_Deconvolution_backward(
      m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
  Int nActiveIn = m.getNActive(inputSize);
  Int nActiveOut = m.getNActive(outputSize);
+  Int ip = weight.size(1);
+  Int op = weight.size(2);
+  d_input_features.resize_({nActiveIn, ip});
+  d_input_features.zero_();
  if (nActiveOut) {
-    Int ip = weight.size(1);
-    Int op = weight.size(2);
-    d_input_features.resize_({nActiveIn, ip});
-    d_input_features.zero_();
    auto iF = input_features.data<T>();
    auto diF = d_input_features.data<T>();
    auto doF = d_output_features.data<T>();

--- a/sparseconvnet/SCN/Metadata/IOLayersRules.h
+++ b/sparseconvnet/SCN/Metadata/IOLayersRules.h
@@ -7,7 +7,6 @@
 #ifndef INPUTLAYER_H
 #define INPUTLAYER_H
 // Rulebook Format
 // rules[0][0] == mode
 // rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2)
@@ -169,75 +168,119 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
    return;
  }
-  // Compile list of how input rows correspond to output rows
+  if (mode <= 2) {
-  std::vector<std::vector<std::vector<Int>>> outputRows(batchSize);
+    // Compile list of how input rows correspond to output rows
-  std::vector<Int> nActives(batchSize);
+    std::vector<std::vector<Int>> outputRows(batchSize);
+    std::vector<Int> nActives(batchSize);
 #pragma omp parallel for private(I)
-  for (I = 0; I < batchSize; I++) {
+    for (I = 0; I < batchSize; I++) {
-    auto &sg = SGs[I];
+      auto &sg = SGs[I];
-    auto &ors = outputRows[I];
+      auto &ors = outputRows[I];
-    auto &nAct = nActives[I];
+      auto &nAct = nActives[I];
-    auto c = coords + I * length * dimension;
+      auto c = coords + I * length * dimension;
-    Int i = I * length;
+      Int i = I * length;
-    Point<dimension> p;
+      Point<dimension> p;
-    for (Int l = 0; l < length; ++l, ++i) {
-      for (Int j = 0; j < dimension; ++j)
+      if (mode == 1) {
-        p[j] = *c++;
+        for (Int l = 0; l < length; ++l, ++i) {
-      if (p[0] >= 0) {
+          for (Int j = 0; j < dimension; ++j)
-        auto iter = sg.mp.find(p);
+            p[j] = *c++;
-        if (iter == sg.mp.end()) {
+          if (p[0] >= 0) {
-          sg.mp[p] = nAct++;
+            auto iter = sg.mp.find(p);
-          ors.resize(nAct);
+            if (iter == sg.mp.end()) {
+              sg.mp[p] = nAct++;
+              ors.push_back(i);
+            } else {
+              ors[sg.mp[p]] = i;
+            }
+          }
+        }
+      }
+      if (mode == 2) {
+        for (Int l = 0; l < length; ++l, ++i) {
+          for (Int j = 0; j < dimension; ++j)
+            p[j] = *c++;
+          if (p[0] >= 0) {
+            auto iter = sg.mp.find(p);
+            if (iter == sg.mp.end()) {
+              sg.mp[p] = nAct++;
+              ors.push_back(i);
+            }
+          }
        }
-        ors[sg.mp[p]].push_back(i);
      }
    }
-  }
-  for (I = 0; I < batchSize; I++) {
-    SGs[I].ctr = nActive;
-    nActive += nActives[I];
-  }
-  Int maxActive = 1;
-  if (mode >= 3)
-    for (auto &ors : outputRows)
-      for (auto &row : ors)
-        maxActive = std::max(maxActive, (Int)row.size());
-  rules.resize(2);
-  rules[0].push_back(mode);
-  rules[0].push_back(maxActive);
-  rules[0].push_back(batchSize);
-  rules[0].push_back(length);
-  rules[0].push_back(nActive);
-  auto &rule = rules[1];
-  if (mode == 1) {
-    rule.resize(2 * nActive);
-#pragma omp parallel for private(I)
    for (I = 0; I < batchSize; I++) {
-      auto &ors = outputRows[I];
+      SGs[I].ctr = nActive;
-      auto rr = &rule[SGs[I].ctr * 2];
+      nActive += nActives[I];
-      for (auto &row : ors) {
+    }
-        rr[0] = row.size();
+    Int maxActive = 1;
-        rr[1] = row.back();
+    rules.resize(2);
-        rr += 2;
+    rules[0].push_back(mode);
+    rules[0].push_back(maxActive);
+    rules[0].push_back(batchSize);
+    rules[0].push_back(length);
+    rules[0].push_back(nActive);
+    auto &rule = rules[1];
+    if (mode == 1) {
+      rule.resize(2 * nActive);
+#pragma omp parallel for private(I)
+      for (I = 0; I < batchSize; I++) {
+        auto &ors = outputRows[I];
+        auto rr = &rule[SGs[I].ctr * 2];
+        for (auto &row : ors) {
+          rr[0] = 1;
+          rr[1] = row;
+          rr += 2;
+        }
      }
    }
+    return;
  }
-  if (mode == 2) {
-    rule.resize(2 * nActive);
+  if (mode == 3 or mode == 4) {
+    // Compile list of how input rows correspond to output rows
+    std::vector<std::vector<std::vector<Int>>> outputRows(batchSize);
+    std::vector<Int> nActives(batchSize);
 #pragma omp parallel for private(I)
    for (I = 0; I < batchSize; I++) {
+      auto &sg = SGs[I];
      auto &ors = outputRows[I];
-      auto rr = &rule[SGs[I].ctr * 2];
+      auto &nAct = nActives[I];
-      for (auto &row : ors) {
+      auto c = coords + I * length * dimension;
-        rr[0] = row.size();
+      Int i = I * length;
-        rr[1] = row.front();
+      Point<dimension> p;
-        rr += 2;
+      for (Int l = 0; l < length; ++l, ++i) {
+        for (Int j = 0; j < dimension; ++j)
+          p[j] = *c++;
+        if (p[0] >= 0) {
+          auto iter = sg.mp.find(p);
+          if (iter == sg.mp.end()) {
+            sg.mp[p] = nAct++;
+            ors.resize(nAct);
+          }
+          ors[sg.mp[p]].push_back(i);
+        }
      }
    }
-  }
-  if (mode == 3 or mode == 4) {
+    for (I = 0; I < batchSize; I++) {
+      SGs[I].ctr = nActive;
+      nActive += nActives[I];
+    }
+    Int maxActive = 1;
+    if (mode >= 3)
+      for (auto &ors : outputRows)
+        for (auto &row : ors)
+          maxActive = std::max(maxActive, (Int)row.size());
+    rules.resize(2);
+    rules[0].push_back(mode);
+    rules[0].push_back(maxActive);
+    rules[0].push_back(batchSize);
+    rules[0].push_back(length);
+    rules[0].push_back(nActive);
+    auto &rule = rules[1];
    rule.resize((maxActive + 1) * nActive);
 #pragma omp parallel for private(I)
    for (I = 0; I < batchSize; I++) {

--- a/sparseconvnet/SCN/Metadata/Metadata.cpp
+++ b/sparseconvnet/SCN/Metadata/Metadata.cpp
@@ -255,29 +255,28 @@ void Metadata<dimension>::appendMetadata(Metadata<dimension> &mAdd,
 template <Int dimension>
 std::vector<at::Tensor>
-Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mReference,
+Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
                                     /*long*/ at::Tensor spatialSize) {
  auto p = LongTensorToPoint<dimension>(spatialSize);
  at::Tensor gt = torch::zeros({nActive[p]}, at::kByte);
-  at::Tensor ref_map = torch::empty({mReference.nActive[p]}, at::kLong);
+  at::Tensor ref_map = torch::/*empty*/ zeros({mGT.nActive[p]}, at::kLong);
-  auto gtPtr = (signed char *)gt.data_ptr(); //<signed char>();
+  long *ref_map_ptr = ref_map.data<long>();
-  // auto gtPtr = gt.data<signed char>();
+  unsigned char *gt_ptr = gt.data<unsigned char>();
-  auto &sgsReference = mReference.grids[p];
+  auto &sgsGT = mGT.grids[p];
  auto &sgsFull = grids[p];
  Int batchSize = sgsFull.size();
  Int sample;
 #pragma omp parallel for private(sample)
  for (sample = 0; sample < (Int)batchSize; ++sample) {
-    auto &sgReference = sgsReference[sample];
+    auto &sgGT = sgsGT[sample];
    auto &sgFull = sgsFull[sample];
-    for (auto const &iter : sgFull.mp) {
+    for (auto const &iter : sgGT.mp) {
-      bool gt_ = sgReference.mp.find(iter.first) != sgReference.mp.end();
+      auto f = sgFull.mp.find(iter.first);
-      if (gt_) {
+      if (f == sgFull.mp.end())
-        ref_map[sgReference.mp[iter.first] + sgReference.ctr] =
+        std::cout << __FILE__ << ":" << __LINE__ << std::endl;
-            iter.second + sgFull.ctr;
+      ref_map_ptr[iter.second + sgGT.ctr] = f->second + sgFull.ctr;
-        gtPtr[iter.second + sgFull.ctr] = +1;
+      gt_ptr[f->second + sgFull.ctr] = +1;
-      }
    }
  }
  return {gt, ref_map};
@@ -561,6 +560,19 @@ RuleBook &Metadata<dimension>::getRandomizedStrideRuleBook(
  return rb;
 }
+at::Tensor vvl2t(std::vector<std::vector<long>> v) {
+  long s = 0;
+  for (auto &x : v)
+    s += x.size();
+  at::Tensor t = torch::empty({s}, at::CPU(at::kLong));
+  long *p = t.data<long>();
+  for (auto &x : v) {
+    std::memcpy(p, &x[0], x.size() * sizeof(long));
+    p += x.size();
+  }
+  return t;
+}
 template <Int dimension>
 std::vector<at::Tensor>
 Metadata<dimension>::compareSparseHelper(Metadata<dimension> &mR,
@@ -568,33 +580,44 @@ Metadata<dimension>::compareSparseHelper(Metadata<dimension> &mR,
  auto p = LongTensorToPoint<dimension>(spatialSize);
  auto &sgsL = grids[p];
  auto &sgsR = mR.grids[p];
-  std::vector<long> cL, cR, L, R;
+  Int bs = sgsL.size(), sample;
-  for (Int sample = 0; sample < (Int)sgsL.size(); ++sample) {
+  std::vector<std::vector<long>> cL(bs), cR(bs), L(bs), R(bs);
+#pragma omp parallel for private(sample)
+  for (sample = 0; sample < bs; ++sample) {
    auto &sgL = sgsL[sample];
    auto &sgR = sgsR[sample];
+    auto &cLs = cL[sample];
+    auto &cRs = cR[sample];
+    auto &Ls = L[sample];
+    auto &Rs = R[sample];
    for (auto const &iter : sgL.mp) {
      if (sgR.mp.find(iter.first) == sgR.mp.end()) {
-        L.push_back(sgL.mp[iter.first] + sgL.ctr);
+        Ls.push_back(sgL.mp[iter.first] + sgL.ctr);
      } else {
-        cL.push_back(sgL.mp[iter.first] + sgL.ctr);
+        cLs.push_back(sgL.mp[iter.first] + sgL.ctr);
-        cR.push_back(sgR.mp[iter.first] + sgR.ctr);
+        cRs.push_back(sgR.mp[iter.first] + sgR.ctr);
      }
    }
    for (auto const &iter : sgR.mp) {
      if (sgL.mp.find(iter.first) == sgL.mp.end()) {
-        R.push_back(sgR.mp[iter.first] + sgR.ctr);
+        Rs.push_back(sgR.mp[iter.first] + sgR.ctr);
      }
    }
  }
-  at::Tensor cL_ = torch::empty({(long)cL.size()}, at::CPU(at::kLong));
+  return {vvl2t(cL), vvl2t(cR), vvl2t(L), vvl2t(R)};
-  std::memcpy(cL_.data<long>(), &cL[0], cL.size() * sizeof(long));
+}
-  at::Tensor cR_ = torch::empty({(long)cR.size()}, at::CPU(at::kLong));
-  std::memcpy(cR_.data<long>(), &cR[0], cR.size() * sizeof(long));
+at::Tensor vvl2t_(std::vector<std::vector<Int>> v) {
-  at::Tensor L_ = torch::empty({(long)L.size()}, at::CPU(at::kLong));
+  long s = 0;
-  std::memcpy(L_.data<long>(), &L[0], L.size() * sizeof(long));
+  for (auto &x : v)
-  at::Tensor R_ = torch::empty({(long)R.size()}, at::CPU(at::kLong));
+    s += x.size();
-  std::memcpy(R_.data<long>(), &R[0], R.size() * sizeof(long));
+  at::Tensor t = torch::empty({s}, at::CPU(at_kINT));
-  return {cL_, cR_, L_, R_};
+  Int *p = t.data<Int>();
+  for (auto &x : v) {
+    std::memcpy(p, &x[0], x.size() * sizeof(Int));
+    p += x.size();
+  }
+  return t;
 }
 template <Int dimension> Int volume(long *point) {

--- a/sparseconvnet/networkArchitectures.py
+++ b/sparseconvnet/networkArchitectures.py
-# Copyright 2g016-present, Facebook, Inc.
+# Copyright 2016-present, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the

--- a/sparseconvnet/sparseConvNetTensor.py
+++ b/sparseconvnet/sparseConvNetTensor.py
@@ -36,10 +36,9 @@ class SparseConvNetTensor(object):
        self.features = self.features.cpu()
        return self
-    def set_(self):
+    @property
-        self.features.set_(self.features.storage_type()())
+    def requires_grad(self):
-        self.metadata.set_()
+        return self.features.requires_grad
-        self.spatialSize = None
    def __repr__(self):
        sl = self.get_spatial_locations() if self.metadata else None
@@ -50,11 +49,3 @@ class SparseConvNetTensor(object):
            ',batch_locations.shape=' + repr(sl.shape if self.metadata else None) + \
            ',spatial size=' + repr(self.spatial_size) + \
            '>>'
-    def to_variable(self, requires_grad=False, volatile=False):
-        "Convert self.features to a variable for use with modern PyTorch interface."
-        self.features = Variable(
-            self.features,
-            requires_grad=requires_grad,
-            volatile=volatile)
-        return self