nyu depth v2 example

c5070f09 · Benjamin Thomas Graham · 0fff2951 · c5070f09 · c5070f09 · c5070f09
Commit c5070f09 authored Mar 27, 2019 by Benjamin Thomas Graham
5 changed files
--- a/examples/nyu2/README.md
+++ b/examples/nyu2/README.md
+3D Semantic Segmentation with Submanifold Sparse Convolutional Networks, CVPR 2018
+SSCN-FCN A (k=1) network
+
+Put the labeled dataset nyu_depth_v2_labeled.mat (2.8 GB) from http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html, 
+and the train/test split file splits.mat (2.6 kB) from http://cs.nyu.edu/~silberman/projects/indoor_scene_seg_sup.html into data/ and run data/prepare_data.py
--- a/examples/nyu2/data/prepare_data.py
+++ b/examples/nyu2/data/prepare_data.py
+import numpy as np
+import torch
+import glob, math, os
+import scipy.io
+import h5py
+import pickle
+
+classes = [
+'wall', 'floor', 'cabinet', 'bed',
+'chair', 'sofa', 'table', 'door',
+'window', 'bookshelf', 'picture', 'counter',
+'blinds', 'desk', 'shelves', 'curtain',
+'dresser', 'pillow', 'mirror', 'floor mat',
+'clothes', 'ceiling', 'books', 'refridgerator',
+'television', 'paper', 'towel', 'shower curtain',
+'box', 'whiteboard', 'person', 'night stand',
+'toilet', 'sink', 'lamp', 'bathtub',
+'bag', 'otherstructure', 'otherfurniture', 'otherprop']
+
+corresponding_classes_in_Silberman_labeling = [40, 40,  3, 22,  5, 40, 12, 38, 40, 40,  2, 39, 40, 40, 26, 40, 24,
+        40,  7, 40,  1, 40, 40, 34, 38, 29, 40,  8, 40, 40, 40, 40, 38, 40,
+        40, 14, 40, 38, 40, 40, 40, 15, 39, 40, 30, 40, 40, 39, 40, 39, 38,
+        40, 38, 40, 37, 40, 38, 38,  9, 40, 40, 38, 40, 11, 38, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 13, 40, 40,  6, 40, 23,
+        40, 39, 10, 16, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 38, 40, 39, 40, 40, 40, 40, 39, 38, 40, 40, 40, 40, 40, 40, 18,
+        40, 40, 19, 28, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 27, 36,
+        40, 40, 40, 40, 21, 40, 20, 35, 40, 40, 40, 40, 40, 40, 40, 40, 38,
+        40, 40, 40,  4, 32, 40, 40, 39, 40, 39, 40, 40, 40, 40, 40, 17, 40,
+        40, 25, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 38, 40, 40, 39, 40, 39,
+        40, 38, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 38,
+        40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        38, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 39, 40, 40, 40, 38, 40, 40, 39, 40, 40, 38, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 31, 40, 40, 40, 40, 40, 40, 40, 38, 40,
+        40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 39, 40,
+        40, 39, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 39, 40,
+        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        38, 39, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 38,
+        40, 40, 40, 38, 40, 39, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        39, 39, 40, 40, 39, 39, 40, 40, 40, 40, 38, 40, 40, 38, 39, 39, 40,
+        39, 40, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40,
+        38, 40, 39, 40, 40, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 39,
+        39, 40, 40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39,
+        40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 39, 40, 40, 39, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 40,
+        40, 40, 40, 40, 39, 38, 39, 40, 38, 39, 40, 39, 40, 39, 40, 40, 40,
+        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 38, 40, 40, 39, 40, 40,
+        40, 39, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 38,
+        40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 38, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 38, 38, 40, 40, 40, 38,
+        40, 40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 38, 40, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+        40, 40, 40, 40, 39, 40, 39, 40, 40, 40, 40, 38, 38, 40, 40, 40, 38,
+        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
+        39, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39, 39, 40,
+        40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
+        40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38,
+        40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 38, 40, 39, 40, 40, 40, 40,
+        38, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
+        40, 40, 40, 40, 40, 40, 40, 39, 40, 40]
+print(len(classes),len(corresponding_classes_in_Silberman_labeling))
+split=scipy.io.loadmat('splits.mat')['testNdxs']-1 # 0-index
+testIdxs=[x for x in range(1449) if x in split]
+trainIdxs=[x for x in range(1449) if x not in split]
+print(len(trainIdxs),len(testIdxs))
+
+f = h5py.File('nyu_depth_v2_labeled.mat','r')
+for i,x in enumerate(trainIdxs):
+    print(i,x)
+    tc=f.get('images')[x]
+    td=f.get('depths')[x]*100
+    td-=td.mean()
+    gt=np.array(f.get('labels')[x],dtype='int16')
+    coords=[]
+    col=[]
+    cl=[]
+    for x in range(40,600):
+        for y in range(45,470):
+            cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
+            coords.append([x-320,y-240,td[x,y]])
+            col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
+    coords=np.array(coords,dtype='int16')
+    col=np.array(col,dtype='uint8')
+    cl=np.array(cl,dtype='int8')
+    torch.save([coords,col,cl],'train'+str(i)+'.pth')
+
+f = h5py.File('nyu_depth_v2_labeled.mat','r')
+for i,x in enumerate(testIdxs):
+    print(i,x)
+    tc=f.get('images')[x]
+    td=f.get('depths')[x]*100
+    td-=td.mean()
+    gt=np.array(f.get('labels')[x],dtype='int16')
+    coords=[]
+    col=[]
+    cl=[]
+    for x in range(40,600):
+        for y in range(45,470):
+            cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
+            coords.append([x-320,y-240,td[x,y]])
+            col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
+    coords=np.array(coords,dtype='int16')
+    col=np.array(col,dtype='uint8')
+    cl=np.array(cl,dtype='int8')
+    torch.save([coords,col,cl],'test'+str(i)+'.pth')
+
--- a/examples/nyu2/train.py
+++ b/examples/nyu2/train.py
+# Copyright 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch, torch.nn.functional as F, torch.utils.data
+import sparseconvnet as scn
+import time, os, sys, glob, math
+import numpy as np
+
+downscale=2
+trainBatchSize=2
+testBatchSize=2
+testReps=1 # Assume testBatchSize is a multiple of testReps
+spatialSize=torch.LongTensor([65536,65536,65536])
+nClasses=40
+
+train_data=[torch.load('data/train%d.pth'%i) for i in range(795)]
+test_data=[torch.load('data/test%d.pth'%i) for i in range(654)]
+for x in train_data+test_data:
+    x[0]=torch.from_numpy(x[0]).float()
+    x[1]=torch.from_numpy(x[1]).float()/127.5-1
+    x[2]=torch.from_numpy(x[2]).long()
+print(len(train_data),len(test_data))
+if testReps>1:
+    test_data=[test_data[x] for i in range(0,654,testBatchSize//testReps) for _ in range(testReps) for x in range(i,min(i+testBatchSize//testReps,654))]
+
+def train_merge(tbl):
+    torch.set_num_threads(1)
+    locations_=[]
+    features_=[]
+    targets_=[]
+    for coords,irgb,targets in tbl:
+        m=torch.eye(3)
+        theta=torch.rand(1).item()*0.2-0.1
+        m[0,0]=math.cos(theta)
+        m[0,2]=math.sin(theta)
+        m[2,0]=-math.sin(theta)
+        m[2,2]=math.cos(theta)
+        m/=downscale
+        m+=torch.FloatTensor(3,3).uniform_(-0.05,0.05)
+        if torch.rand(1).item()<0.5:
+            m[:,0]*=-1
+        coords=torch.matmul(coords,m)
+        coords+=torch.rand(3)*24000-12000+32768
+        coords=coords.long()
+        locations_.append(coords)
+        features_.append(irgb)
+        targets_.append(targets)
+    return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
+def test_merge(tbl):
+    torch.set_num_threads(1)
+    locations_=[]
+    features_=[]
+    targets_=[]
+    for coords,irgb,targets in tbl:
+        m=torch.eye(3)
+        m/=downscale
+        if torch.rand(1).item()<0.5:
+            m[:,0]*=-1
+        coords=torch.matmul(coords,m)
+        coords+=torch.rand(3)*24000-12000+32768
+        coords=coords.long()
+        locations_.append(coords)
+        features_.append(irgb)
+        targets_.append(targets)
+    return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
+
+trainIterator=torch.utils.data.DataLoader(train_data,collate_fn=train_merge,shuffle=True,num_workers=16,drop_last=True,batch_size=trainBatchSize)
+testIterator=torch.utils.data.DataLoader(test_data,collate_fn=test_merge,shuffle=False,num_workers=16,drop_last=False,batch_size=testBatchSize)
+
+
+def ShrinkScatterC22l(dimension,nPlanes,nClasses,reps,depth=4):
+    def l(x):
+        return x+nPlanes
+    def foo(nPlanes):
+        m=scn.Sequential()
+        for _ in range(reps):
+            m.add(scn.BatchNormReLU(nPlanes))
+            m.add(scn.SubmanifoldConvolution(dimension, nPlanes, nPlanes, 3, False))
+        return m
+    def bar(nPlanes,bias):
+        m=scn.Sequential()
+        m.add(scn.BatchNormReLU(nPlanes))
+        m.add(scn.NetworkInNetwork(nPlanes,nClasses,bias)) #accumulte softmax input, only one set of biases
+        return m
+    def baz(depth,nPlanes):
+        if depth==1:
+            return scn.Sequential().add(foo(nPlanes)).add(bar(nPlanes,True))
+        else:
+            return scn.Sequential().add(foo(nPlanes)).add(scn.ConcatTable().add(bar(nPlanes,False)).add(
+                scn.Sequential()\
+                    .add(scn.BatchNormReLU(nPlanes))\
+                    .add(scn.Convolution(dimension, nPlanes, l(nPlanes), 2, 2, False))\
+                    .add(baz(depth-1,l(nPlanes)))\
+                    .add(scn.UnPooling(dimension, 2, 2))
+            )).add(scn.AddTable())
+    return baz(depth,nPlanes)
+
+class Model(torch.nn.Module):
+    def __init__(self):
+        torch.nn.Module.__init__(self)
+        self.sparseModel = scn.Sequential(
+            scn.InputLayer(dimension=3,spatial_size=65536,mode=4),
+            scn.ValidConvolution(3, 4, 16, 3, False),
+            ShrinkScatterC22l(3, 16, 40, 1, 9),
+            scn.OutputLayer(dimension=3)
+        )
+    def forward(self,x):
+        x=self.sparseModel(x)
+        return x
+
+model=Model()
+
+p={}
+p['n_epochs'] = 200
+p['initial_lr'] = 1e-1
+p['lr_decay'] = 0.02
+p['weight_decay'] = 1e-4
+p['momentum'] = 0.9
+p['check_point'] = True
+device = 'cuda:0'
+model.to(device)
+optimizer = torch.optim.SGD(model.parameters(),
+    lr=p['initial_lr'],
+    momentum = p['momentum'],
+    weight_decay = p['weight_decay'],
+    nesterov=True)
+if p['check_point'] and os.path.isfile('epoch.pth'):
+    p['epoch'] = torch.load('epoch.pth') + 1
+    print('Restarting at epoch ' +
+          str(p['epoch']))
+    model.load_state_dict(torch.load('model%d.pth'%(p['epoch']-1)))
+else:
+    p['epoch']=1
+print(p)
+print('#parameters', sum([x.nelement() for x in model.parameters() ]))
+
+for epoch in range(p['epoch'], p['n_epochs'] + 1):
+    model.train()
+    stats = {'n': 0, 'c': 0, 'loss': 0}
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = p['initial_lr'] * \
+        math.exp((1 - epoch) * p['lr_decay'])
+    scn.forward_pass_multiplyAdd_count=0
+    scn.forward_pass_hidden_states=0
+    start = time.time()
+    for xyz,rgb,targets in trainIterator:
+        optimizer.zero_grad()
+        predictions=model((xyz,rgb.to(device)))
+        targets=targets.to(device)
+        loss = F.cross_entropy(predictions,targets)
+        loss.backward()
+        optimizer.step()
+        with torch.no_grad():
+            predictions=predictions[targets>=0]
+            targets=targets[targets>=0]
+            stats['n']+=predictions.size(0)
+            stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
+            stats['loss']+=loss*predictions.size(0)
+        if epoch<=1:
+            print('train',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
+    print('train epoch',epoch,stats['c']/stats['n'],
+        'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
+
+    if p['check_point']:
+        torch.save(epoch, 'epoch.pth')
+        torch.save(model.state_dict(),'model%d.pth'%epoch)
+
+    if scn.is_power2(epoch) or epoch==200:
+        with torch.no_grad():
+            model.eval()
+            stats = {'n': 0, 'c': 0, 'loss': 0}
+            scn.forward_pass_multiplyAdd_count=0
+            scn.forward_pass_hidden_states=0
+            start = time.time()
+            for xyz,rgb,targets in testIterator:
+                predictions=model((xyz,rgb.to(device)))
+                targets=targets.to(device)
+                targets=targets[:targets.numel()//testReps]
+                predictions=predictions.view(testReps,-1,nClasses).mean(0)
+                loss = F.cross_entropy(predictions,targets)
+                predictions=predictions[targets>=0]
+                targets=targets[targets>=0]
+                stats['n']+=predictions.size(0)
+                stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
+                stats['loss']+=loss*predictions.size(0)
+                if epoch<=1:
+                    print('test',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
+            print('test epoch',epoch,stats['c']/stats['n'],
+                'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
--- a/sparseconvnet/SCN/Metadata/Metadata.cpp
+++ b/sparseconvnet/SCN/Metadata/Metadata.cpp
@@ -259,7 +259,7 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
                                     /*long*/ at::Tensor spatialSize) {
  auto p = LongTensorToPoint<dimension>(spatialSize);
  at::Tensor gt = torch::zeros({nActive[p]}, at::kByte);
-  at::Tensor ref_map = torch::/*empty*/ zeros({mGT.nActive[p]}, at::kLong);
+  at::Tensor ref_map = torch::empty({mGT.nActive[p]}, at::kLong);
  long *ref_map_ptr = ref_map.data<long>();
  unsigned char *gt_ptr = gt.data<unsigned char>();
  auto &sgsGT = mGT.grids[p];
@@ -273,12 +273,12 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
    auto &sgFull = sgsFull[sample];
    for (auto const &iter : sgGT.mp) {
      auto f = sgFull.mp.find(iter.first);
-      if (f == sgFull.mp.end())
-        std::cout << __FILE__ << ":" << __LINE__ << std::endl;
+      if (f != sgFull.mp.end()) {
        ref_map_ptr[iter.second + sgGT.ctr] = f->second + sgFull.ctr;
        gt_ptr[f->second + sgFull.ctr] = +1;
      }
    }
+  }
  return {gt, ref_map};
 }


--- a/sparseconvnet/ioLayers.py
+++ b/sparseconvnet/ioLayers.py
@@ -42,6 +42,11 @@ class InputLayer(Module):
        self.dimension = dimension
        self.spatial_size = toLongTensor(dimension, spatial_size)
        self.mode = mode
+        self.device = None
+
+    def to(self, device):
+        self.device=device
+        return self

    def forward(self, input):
        output = SparseConvNetTensor(
@@ -52,8 +57,8 @@ class InputLayer(Module):
            self.dimension,
            output.metadata,
            self.spatial_size,
-            input[0].type(torch.LongTensor),
-            input[1],
+            input[0].cpu().long(),
+            input[1].to(self.device) if self.device else input[1],
            0 if len(input) == 2 else input[2],
            self.mode
        )
@@ -109,7 +114,11 @@ class BLInputLayer(Module):
        self.dimension = dimension
        self.spatial_size = toLongTensor(dimension, spatial_size)
        self.mode = mode
-    # (coords,input_features) = input
+        self.device = None
+
+    def to(self, device):
+        self.device=device
+        return self

    def forward(self, input):
        output = SparseConvNetTensor(
@@ -120,8 +129,8 @@ class BLInputLayer(Module):
            self.dimension,
            output.metadata,
            self.spatial_size,
-            input[0].type(torch.LongTensor),
-            input[1],
+            input[0].cpu().long(),
+            input[1].to(self.device) if self.device else input[1],
            self.mode
        )
        return output