Commit c5070f09 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

nyu depth v2 example

parent 0fff2951
3D Semantic Segmentation with Submanifold Sparse Convolutional Networks, CVPR 2018
SSCN-FCN A (k=1) network
Put the labeled dataset nyu_depth_v2_labeled.mat (2.8 GB) from http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html,
and the train/test split file splits.mat (2.6 kB) from http://cs.nyu.edu/~silberman/projects/indoor_scene_seg_sup.html into data/ and run data/prepare_data.py
import numpy as np
import torch
import glob, math, os
import scipy.io
import h5py
import pickle
classes = [
'wall', 'floor', 'cabinet', 'bed',
'chair', 'sofa', 'table', 'door',
'window', 'bookshelf', 'picture', 'counter',
'blinds', 'desk', 'shelves', 'curtain',
'dresser', 'pillow', 'mirror', 'floor mat',
'clothes', 'ceiling', 'books', 'refridgerator',
'television', 'paper', 'towel', 'shower curtain',
'box', 'whiteboard', 'person', 'night stand',
'toilet', 'sink', 'lamp', 'bathtub',
'bag', 'otherstructure', 'otherfurniture', 'otherprop']
corresponding_classes_in_Silberman_labeling = [40, 40, 3, 22, 5, 40, 12, 38, 40, 40, 2, 39, 40, 40, 26, 40, 24,
40, 7, 40, 1, 40, 40, 34, 38, 29, 40, 8, 40, 40, 40, 40, 38, 40,
40, 14, 40, 38, 40, 40, 40, 15, 39, 40, 30, 40, 40, 39, 40, 39, 38,
40, 38, 40, 37, 40, 38, 38, 9, 40, 40, 38, 40, 11, 38, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 13, 40, 40, 6, 40, 23,
40, 39, 10, 16, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40,
40, 38, 40, 39, 40, 40, 40, 40, 39, 38, 40, 40, 40, 40, 40, 40, 18,
40, 40, 19, 28, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 27, 36,
40, 40, 40, 40, 21, 40, 20, 35, 40, 40, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 4, 32, 40, 40, 39, 40, 39, 40, 40, 40, 40, 40, 17, 40,
40, 25, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39,
40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 38, 40, 40, 39, 40, 39,
40, 38, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 38,
40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
38, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 39, 40, 40, 40, 38, 40, 40, 39, 40, 40, 38, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 31, 40, 40, 40, 40, 40, 40, 40, 38, 40,
40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 39, 40,
40, 39, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 39, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
38, 39, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 38, 40, 39, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
39, 39, 40, 40, 39, 39, 40, 40, 40, 40, 38, 40, 40, 38, 39, 39, 40,
39, 40, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40,
38, 40, 39, 40, 40, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 39,
39, 40, 40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39,
40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 39, 40, 40, 39, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 40,
40, 40, 40, 40, 39, 38, 39, 40, 38, 39, 40, 39, 40, 39, 40, 40, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 38, 40, 40, 39, 40, 40,
40, 39, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 38,
40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 38, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 38, 38, 40, 40, 40, 38,
40, 40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 38, 40, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 39, 40, 39, 40, 40, 40, 40, 38, 38, 40, 40, 40, 38,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
39, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39, 39, 40,
40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 38, 40, 39, 40, 40, 40, 40,
38, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
40, 40, 40, 40, 40, 40, 40, 39, 40, 40]
print(len(classes),len(corresponding_classes_in_Silberman_labeling))
split=scipy.io.loadmat('splits.mat')['testNdxs']-1 # 0-index
testIdxs=[x for x in range(1449) if x in split]
trainIdxs=[x for x in range(1449) if x not in split]
print(len(trainIdxs),len(testIdxs))
f = h5py.File('nyu_depth_v2_labeled.mat','r')
for i,x in enumerate(trainIdxs):
print(i,x)
tc=f.get('images')[x]
td=f.get('depths')[x]*100
td-=td.mean()
gt=np.array(f.get('labels')[x],dtype='int16')
coords=[]
col=[]
cl=[]
for x in range(40,600):
for y in range(45,470):
cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
coords.append([x-320,y-240,td[x,y]])
col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
coords=np.array(coords,dtype='int16')
col=np.array(col,dtype='uint8')
cl=np.array(cl,dtype='int8')
torch.save([coords,col,cl],'train'+str(i)+'.pth')
f = h5py.File('nyu_depth_v2_labeled.mat','r')
for i,x in enumerate(testIdxs):
print(i,x)
tc=f.get('images')[x]
td=f.get('depths')[x]*100
td-=td.mean()
gt=np.array(f.get('labels')[x],dtype='int16')
coords=[]
col=[]
cl=[]
for x in range(40,600):
for y in range(45,470):
cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
coords.append([x-320,y-240,td[x,y]])
col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
coords=np.array(coords,dtype='int16')
col=np.array(col,dtype='uint8')
cl=np.array(cl,dtype='int8')
torch.save([coords,col,cl],'test'+str(i)+'.pth')
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch, torch.nn.functional as F, torch.utils.data
import sparseconvnet as scn
import time, os, sys, glob, math
import numpy as np
downscale=2
trainBatchSize=2
testBatchSize=2
testReps=1 # Assume testBatchSize is a multiple of testReps
spatialSize=torch.LongTensor([65536,65536,65536])
nClasses=40
train_data=[torch.load('data/train%d.pth'%i) for i in range(795)]
test_data=[torch.load('data/test%d.pth'%i) for i in range(654)]
for x in train_data+test_data:
x[0]=torch.from_numpy(x[0]).float()
x[1]=torch.from_numpy(x[1]).float()/127.5-1
x[2]=torch.from_numpy(x[2]).long()
print(len(train_data),len(test_data))
if testReps>1:
test_data=[test_data[x] for i in range(0,654,testBatchSize//testReps) for _ in range(testReps) for x in range(i,min(i+testBatchSize//testReps,654))]
def train_merge(tbl):
torch.set_num_threads(1)
locations_=[]
features_=[]
targets_=[]
for coords,irgb,targets in tbl:
m=torch.eye(3)
theta=torch.rand(1).item()*0.2-0.1
m[0,0]=math.cos(theta)
m[0,2]=math.sin(theta)
m[2,0]=-math.sin(theta)
m[2,2]=math.cos(theta)
m/=downscale
m+=torch.FloatTensor(3,3).uniform_(-0.05,0.05)
if torch.rand(1).item()<0.5:
m[:,0]*=-1
coords=torch.matmul(coords,m)
coords+=torch.rand(3)*24000-12000+32768
coords=coords.long()
locations_.append(coords)
features_.append(irgb)
targets_.append(targets)
return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
def test_merge(tbl):
torch.set_num_threads(1)
locations_=[]
features_=[]
targets_=[]
for coords,irgb,targets in tbl:
m=torch.eye(3)
m/=downscale
if torch.rand(1).item()<0.5:
m[:,0]*=-1
coords=torch.matmul(coords,m)
coords+=torch.rand(3)*24000-12000+32768
coords=coords.long()
locations_.append(coords)
features_.append(irgb)
targets_.append(targets)
return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
trainIterator=torch.utils.data.DataLoader(train_data,collate_fn=train_merge,shuffle=True,num_workers=16,drop_last=True,batch_size=trainBatchSize)
testIterator=torch.utils.data.DataLoader(test_data,collate_fn=test_merge,shuffle=False,num_workers=16,drop_last=False,batch_size=testBatchSize)
def ShrinkScatterC22l(dimension,nPlanes,nClasses,reps,depth=4):
def l(x):
return x+nPlanes
def foo(nPlanes):
m=scn.Sequential()
for _ in range(reps):
m.add(scn.BatchNormReLU(nPlanes))
m.add(scn.SubmanifoldConvolution(dimension, nPlanes, nPlanes, 3, False))
return m
def bar(nPlanes,bias):
m=scn.Sequential()
m.add(scn.BatchNormReLU(nPlanes))
m.add(scn.NetworkInNetwork(nPlanes,nClasses,bias)) #accumulte softmax input, only one set of biases
return m
def baz(depth,nPlanes):
if depth==1:
return scn.Sequential().add(foo(nPlanes)).add(bar(nPlanes,True))
else:
return scn.Sequential().add(foo(nPlanes)).add(scn.ConcatTable().add(bar(nPlanes,False)).add(
scn.Sequential()\
.add(scn.BatchNormReLU(nPlanes))\
.add(scn.Convolution(dimension, nPlanes, l(nPlanes), 2, 2, False))\
.add(baz(depth-1,l(nPlanes)))\
.add(scn.UnPooling(dimension, 2, 2))
)).add(scn.AddTable())
return baz(depth,nPlanes)
class Model(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.sparseModel = scn.Sequential(
scn.InputLayer(dimension=3,spatial_size=65536,mode=4),
scn.ValidConvolution(3, 4, 16, 3, False),
ShrinkScatterC22l(3, 16, 40, 1, 9),
scn.OutputLayer(dimension=3)
)
def forward(self,x):
x=self.sparseModel(x)
return x
model=Model()
p={}
p['n_epochs'] = 200
p['initial_lr'] = 1e-1
p['lr_decay'] = 0.02
p['weight_decay'] = 1e-4
p['momentum'] = 0.9
p['check_point'] = True
device = 'cuda:0'
model.to(device)
optimizer = torch.optim.SGD(model.parameters(),
lr=p['initial_lr'],
momentum = p['momentum'],
weight_decay = p['weight_decay'],
nesterov=True)
if p['check_point'] and os.path.isfile('epoch.pth'):
p['epoch'] = torch.load('epoch.pth') + 1
print('Restarting at epoch ' +
str(p['epoch']))
model.load_state_dict(torch.load('model%d.pth'%(p['epoch']-1)))
else:
p['epoch']=1
print(p)
print('#parameters', sum([x.nelement() for x in model.parameters() ]))
for epoch in range(p['epoch'], p['n_epochs'] + 1):
model.train()
stats = {'n': 0, 'c': 0, 'loss': 0}
for param_group in optimizer.param_groups:
param_group['lr'] = p['initial_lr'] * \
math.exp((1 - epoch) * p['lr_decay'])
scn.forward_pass_multiplyAdd_count=0
scn.forward_pass_hidden_states=0
start = time.time()
for xyz,rgb,targets in trainIterator:
optimizer.zero_grad()
predictions=model((xyz,rgb.to(device)))
targets=targets.to(device)
loss = F.cross_entropy(predictions,targets)
loss.backward()
optimizer.step()
with torch.no_grad():
predictions=predictions[targets>=0]
targets=targets[targets>=0]
stats['n']+=predictions.size(0)
stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
stats['loss']+=loss*predictions.size(0)
if epoch<=1:
print('train',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
print('train epoch',epoch,stats['c']/stats['n'],
'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
if p['check_point']:
torch.save(epoch, 'epoch.pth')
torch.save(model.state_dict(),'model%d.pth'%epoch)
if scn.is_power2(epoch) or epoch==200:
with torch.no_grad():
model.eval()
stats = {'n': 0, 'c': 0, 'loss': 0}
scn.forward_pass_multiplyAdd_count=0
scn.forward_pass_hidden_states=0
start = time.time()
for xyz,rgb,targets in testIterator:
predictions=model((xyz,rgb.to(device)))
targets=targets.to(device)
targets=targets[:targets.numel()//testReps]
predictions=predictions.view(testReps,-1,nClasses).mean(0)
loss = F.cross_entropy(predictions,targets)
predictions=predictions[targets>=0]
targets=targets[targets>=0]
stats['n']+=predictions.size(0)
stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
stats['loss']+=loss*predictions.size(0)
if epoch<=1:
print('test',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
print('test epoch',epoch,stats['c']/stats['n'],
'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
...@@ -259,7 +259,7 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT, ...@@ -259,7 +259,7 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
/*long*/ at::Tensor spatialSize) { /*long*/ at::Tensor spatialSize) {
auto p = LongTensorToPoint<dimension>(spatialSize); auto p = LongTensorToPoint<dimension>(spatialSize);
at::Tensor gt = torch::zeros({nActive[p]}, at::kByte); at::Tensor gt = torch::zeros({nActive[p]}, at::kByte);
at::Tensor ref_map = torch::/*empty*/ zeros({mGT.nActive[p]}, at::kLong); at::Tensor ref_map = torch::empty({mGT.nActive[p]}, at::kLong);
long *ref_map_ptr = ref_map.data<long>(); long *ref_map_ptr = ref_map.data<long>();
unsigned char *gt_ptr = gt.data<unsigned char>(); unsigned char *gt_ptr = gt.data<unsigned char>();
auto &sgsGT = mGT.grids[p]; auto &sgsGT = mGT.grids[p];
...@@ -273,10 +273,10 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT, ...@@ -273,10 +273,10 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
auto &sgFull = sgsFull[sample]; auto &sgFull = sgsFull[sample];
for (auto const &iter : sgGT.mp) { for (auto const &iter : sgGT.mp) {
auto f = sgFull.mp.find(iter.first); auto f = sgFull.mp.find(iter.first);
if (f == sgFull.mp.end()) if (f != sgFull.mp.end()) {
std::cout << __FILE__ << ":" << __LINE__ << std::endl; ref_map_ptr[iter.second + sgGT.ctr] = f->second + sgFull.ctr;
ref_map_ptr[iter.second + sgGT.ctr] = f->second + sgFull.ctr; gt_ptr[f->second + sgFull.ctr] = +1;
gt_ptr[f->second + sgFull.ctr] = +1; }
} }
} }
return {gt, ref_map}; return {gt, ref_map};
......
...@@ -42,6 +42,11 @@ class InputLayer(Module): ...@@ -42,6 +42,11 @@ class InputLayer(Module):
self.dimension = dimension self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size) self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode self.mode = mode
self.device = None
def to(self, device):
self.device=device
return self
def forward(self, input): def forward(self, input):
output = SparseConvNetTensor( output = SparseConvNetTensor(
...@@ -52,8 +57,8 @@ class InputLayer(Module): ...@@ -52,8 +57,8 @@ class InputLayer(Module):
self.dimension, self.dimension,
output.metadata, output.metadata,
self.spatial_size, self.spatial_size,
input[0].type(torch.LongTensor), input[0].cpu().long(),
input[1], input[1].to(self.device) if self.device else input[1],
0 if len(input) == 2 else input[2], 0 if len(input) == 2 else input[2],
self.mode self.mode
) )
...@@ -109,7 +114,11 @@ class BLInputLayer(Module): ...@@ -109,7 +114,11 @@ class BLInputLayer(Module):
self.dimension = dimension self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size) self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode self.mode = mode
# (coords,input_features) = input self.device = None
def to(self, device):
self.device=device
return self
def forward(self, input): def forward(self, input):
output = SparseConvNetTensor( output = SparseConvNetTensor(
...@@ -120,8 +129,8 @@ class BLInputLayer(Module): ...@@ -120,8 +129,8 @@ class BLInputLayer(Module):
self.dimension, self.dimension,
output.metadata, output.metadata,
self.spatial_size, self.spatial_size,
input[0].type(torch.LongTensor), input[0].cpu().long(),
input[1], input[1].to(self.device) if self.device else input[1],
self.mode self.mode
) )
return output return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment