"vscode:/vscode.git/clone" did not exist on "2d39ded64cbf3025b6ced809fd2a3e50bf1fb72d"
Commit c5070f09 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

nyu depth v2 example

parent 0fff2951
3D Semantic Segmentation with Submanifold Sparse Convolutional Networks, CVPR 2018
SSCN-FCN A (k=1) network
Put the labeled dataset nyu_depth_v2_labeled.mat (2.8 GB) from http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html,
and the train/test split file splits.mat (2.6 kB) from http://cs.nyu.edu/~silberman/projects/indoor_scene_seg_sup.html into data/ and run data/prepare_data.py
import numpy as np
import torch
import glob, math, os
import scipy.io
import h5py
import pickle
classes = [
'wall', 'floor', 'cabinet', 'bed',
'chair', 'sofa', 'table', 'door',
'window', 'bookshelf', 'picture', 'counter',
'blinds', 'desk', 'shelves', 'curtain',
'dresser', 'pillow', 'mirror', 'floor mat',
'clothes', 'ceiling', 'books', 'refridgerator',
'television', 'paper', 'towel', 'shower curtain',
'box', 'whiteboard', 'person', 'night stand',
'toilet', 'sink', 'lamp', 'bathtub',
'bag', 'otherstructure', 'otherfurniture', 'otherprop']
corresponding_classes_in_Silberman_labeling = [40, 40, 3, 22, 5, 40, 12, 38, 40, 40, 2, 39, 40, 40, 26, 40, 24,
40, 7, 40, 1, 40, 40, 34, 38, 29, 40, 8, 40, 40, 40, 40, 38, 40,
40, 14, 40, 38, 40, 40, 40, 15, 39, 40, 30, 40, 40, 39, 40, 39, 38,
40, 38, 40, 37, 40, 38, 38, 9, 40, 40, 38, 40, 11, 38, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 13, 40, 40, 6, 40, 23,
40, 39, 10, 16, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40,
40, 38, 40, 39, 40, 40, 40, 40, 39, 38, 40, 40, 40, 40, 40, 40, 18,
40, 40, 19, 28, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 27, 36,
40, 40, 40, 40, 21, 40, 20, 35, 40, 40, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 4, 32, 40, 40, 39, 40, 39, 40, 40, 40, 40, 40, 17, 40,
40, 25, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39,
40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 38, 40, 40, 39, 40, 39,
40, 38, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 38,
40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
38, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 39, 40, 40, 40, 38, 40, 40, 39, 40, 40, 38, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 31, 40, 40, 40, 40, 40, 40, 40, 38, 40,
40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 39, 40,
40, 39, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 39, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
38, 39, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 38, 40, 39, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
39, 39, 40, 40, 39, 39, 40, 40, 40, 40, 38, 40, 40, 38, 39, 39, 40,
39, 40, 39, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40,
38, 40, 39, 40, 40, 40, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 39,
39, 40, 40, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39,
40, 40, 40, 40, 39, 40, 40, 40, 40, 40, 39, 40, 40, 39, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 40,
40, 40, 40, 40, 39, 38, 39, 40, 38, 39, 40, 39, 40, 39, 40, 40, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 38, 40, 40, 39, 40, 40,
40, 39, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 38, 40, 40, 38,
40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 40, 40, 38, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38, 38, 38, 40, 40, 40, 38,
40, 40, 40, 38, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 38, 40, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 39, 40, 39, 40, 40, 40, 40, 38, 38, 40, 40, 40, 38,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
39, 40, 40, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39, 39, 40,
40, 40, 40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 40, 40,
40, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 38,
40, 40, 40, 40, 40, 40, 40, 39, 40, 40, 38, 40, 39, 40, 40, 40, 40,
38, 40, 40, 40, 40, 40, 38, 40, 40, 40, 40, 40, 40, 40, 39, 40, 40,
40, 40, 40, 40, 40, 40, 40, 39, 40, 40]
print(len(classes),len(corresponding_classes_in_Silberman_labeling))
split=scipy.io.loadmat('splits.mat')['testNdxs']-1 # 0-index
testIdxs=[x for x in range(1449) if x in split]
trainIdxs=[x for x in range(1449) if x not in split]
print(len(trainIdxs),len(testIdxs))
f = h5py.File('nyu_depth_v2_labeled.mat','r')
for i,x in enumerate(trainIdxs):
print(i,x)
tc=f.get('images')[x]
td=f.get('depths')[x]*100
td-=td.mean()
gt=np.array(f.get('labels')[x],dtype='int16')
coords=[]
col=[]
cl=[]
for x in range(40,600):
for y in range(45,470):
cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
coords.append([x-320,y-240,td[x,y]])
col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
coords=np.array(coords,dtype='int16')
col=np.array(col,dtype='uint8')
cl=np.array(cl,dtype='int8')
torch.save([coords,col,cl],'train'+str(i)+'.pth')
f = h5py.File('nyu_depth_v2_labeled.mat','r')
for i,x in enumerate(testIdxs):
print(i,x)
tc=f.get('images')[x]
td=f.get('depths')[x]*100
td-=td.mean()
gt=np.array(f.get('labels')[x],dtype='int16')
coords=[]
col=[]
cl=[]
for x in range(40,600):
for y in range(45,470):
cl.append(corresponding_classes_in_Silberman_labeling[gt[x,y]-1]-1 if gt[x,y]>=1 else -100)
coords.append([x-320,y-240,td[x,y]])
col.append([255,tc[0,x,y],tc[1,x,y],tc[2,x,y]])
coords=np.array(coords,dtype='int16')
col=np.array(col,dtype='uint8')
cl=np.array(cl,dtype='int8')
torch.save([coords,col,cl],'test'+str(i)+'.pth')
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch, torch.nn.functional as F, torch.utils.data
import sparseconvnet as scn
import time, os, sys, glob, math
import numpy as np
downscale=2
trainBatchSize=2
testBatchSize=2
testReps=1 # Assume testBatchSize is a multiple of testReps
spatialSize=torch.LongTensor([65536,65536,65536])
nClasses=40
train_data=[torch.load('data/train%d.pth'%i) for i in range(795)]
test_data=[torch.load('data/test%d.pth'%i) for i in range(654)]
for x in train_data+test_data:
x[0]=torch.from_numpy(x[0]).float()
x[1]=torch.from_numpy(x[1]).float()/127.5-1
x[2]=torch.from_numpy(x[2]).long()
print(len(train_data),len(test_data))
if testReps>1:
test_data=[test_data[x] for i in range(0,654,testBatchSize//testReps) for _ in range(testReps) for x in range(i,min(i+testBatchSize//testReps,654))]
def train_merge(tbl):
torch.set_num_threads(1)
locations_=[]
features_=[]
targets_=[]
for coords,irgb,targets in tbl:
m=torch.eye(3)
theta=torch.rand(1).item()*0.2-0.1
m[0,0]=math.cos(theta)
m[0,2]=math.sin(theta)
m[2,0]=-math.sin(theta)
m[2,2]=math.cos(theta)
m/=downscale
m+=torch.FloatTensor(3,3).uniform_(-0.05,0.05)
if torch.rand(1).item()<0.5:
m[:,0]*=-1
coords=torch.matmul(coords,m)
coords+=torch.rand(3)*24000-12000+32768
coords=coords.long()
locations_.append(coords)
features_.append(irgb)
targets_.append(targets)
return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
def test_merge(tbl):
torch.set_num_threads(1)
locations_=[]
features_=[]
targets_=[]
for coords,irgb,targets in tbl:
m=torch.eye(3)
m/=downscale
if torch.rand(1).item()<0.5:
m[:,0]*=-1
coords=torch.matmul(coords,m)
coords+=torch.rand(3)*24000-12000+32768
coords=coords.long()
locations_.append(coords)
features_.append(irgb)
targets_.append(targets)
return scn.batch_location_tensors(locations_), torch.cat(features_,0), torch.cat(targets_,0)
trainIterator=torch.utils.data.DataLoader(train_data,collate_fn=train_merge,shuffle=True,num_workers=16,drop_last=True,batch_size=trainBatchSize)
testIterator=torch.utils.data.DataLoader(test_data,collate_fn=test_merge,shuffle=False,num_workers=16,drop_last=False,batch_size=testBatchSize)
def ShrinkScatterC22l(dimension,nPlanes,nClasses,reps,depth=4):
def l(x):
return x+nPlanes
def foo(nPlanes):
m=scn.Sequential()
for _ in range(reps):
m.add(scn.BatchNormReLU(nPlanes))
m.add(scn.SubmanifoldConvolution(dimension, nPlanes, nPlanes, 3, False))
return m
def bar(nPlanes,bias):
m=scn.Sequential()
m.add(scn.BatchNormReLU(nPlanes))
m.add(scn.NetworkInNetwork(nPlanes,nClasses,bias)) #accumulte softmax input, only one set of biases
return m
def baz(depth,nPlanes):
if depth==1:
return scn.Sequential().add(foo(nPlanes)).add(bar(nPlanes,True))
else:
return scn.Sequential().add(foo(nPlanes)).add(scn.ConcatTable().add(bar(nPlanes,False)).add(
scn.Sequential()\
.add(scn.BatchNormReLU(nPlanes))\
.add(scn.Convolution(dimension, nPlanes, l(nPlanes), 2, 2, False))\
.add(baz(depth-1,l(nPlanes)))\
.add(scn.UnPooling(dimension, 2, 2))
)).add(scn.AddTable())
return baz(depth,nPlanes)
class Model(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.sparseModel = scn.Sequential(
scn.InputLayer(dimension=3,spatial_size=65536,mode=4),
scn.ValidConvolution(3, 4, 16, 3, False),
ShrinkScatterC22l(3, 16, 40, 1, 9),
scn.OutputLayer(dimension=3)
)
def forward(self,x):
x=self.sparseModel(x)
return x
model=Model()
p={}
p['n_epochs'] = 200
p['initial_lr'] = 1e-1
p['lr_decay'] = 0.02
p['weight_decay'] = 1e-4
p['momentum'] = 0.9
p['check_point'] = True
device = 'cuda:0'
model.to(device)
optimizer = torch.optim.SGD(model.parameters(),
lr=p['initial_lr'],
momentum = p['momentum'],
weight_decay = p['weight_decay'],
nesterov=True)
if p['check_point'] and os.path.isfile('epoch.pth'):
p['epoch'] = torch.load('epoch.pth') + 1
print('Restarting at epoch ' +
str(p['epoch']))
model.load_state_dict(torch.load('model%d.pth'%(p['epoch']-1)))
else:
p['epoch']=1
print(p)
print('#parameters', sum([x.nelement() for x in model.parameters() ]))
for epoch in range(p['epoch'], p['n_epochs'] + 1):
model.train()
stats = {'n': 0, 'c': 0, 'loss': 0}
for param_group in optimizer.param_groups:
param_group['lr'] = p['initial_lr'] * \
math.exp((1 - epoch) * p['lr_decay'])
scn.forward_pass_multiplyAdd_count=0
scn.forward_pass_hidden_states=0
start = time.time()
for xyz,rgb,targets in trainIterator:
optimizer.zero_grad()
predictions=model((xyz,rgb.to(device)))
targets=targets.to(device)
loss = F.cross_entropy(predictions,targets)
loss.backward()
optimizer.step()
with torch.no_grad():
predictions=predictions[targets>=0]
targets=targets[targets>=0]
stats['n']+=predictions.size(0)
stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
stats['loss']+=loss*predictions.size(0)
if epoch<=1:
print('train',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
print('train epoch',epoch,stats['c']/stats['n'],
'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
if p['check_point']:
torch.save(epoch, 'epoch.pth')
torch.save(model.state_dict(),'model%d.pth'%epoch)
if scn.is_power2(epoch) or epoch==200:
with torch.no_grad():
model.eval()
stats = {'n': 0, 'c': 0, 'loss': 0}
scn.forward_pass_multiplyAdd_count=0
scn.forward_pass_hidden_states=0
start = time.time()
for xyz,rgb,targets in testIterator:
predictions=model((xyz,rgb.to(device)))
targets=targets.to(device)
targets=targets[:targets.numel()//testReps]
predictions=predictions.view(testReps,-1,nClasses).mean(0)
loss = F.cross_entropy(predictions,targets)
predictions=predictions[targets>=0]
targets=targets[targets>=0]
stats['n']+=predictions.size(0)
stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
stats['loss']+=loss*predictions.size(0)
if epoch<=1:
print('test',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
print('test epoch',epoch,stats['c']/stats['n'],
'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')
......@@ -259,7 +259,7 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
/*long*/ at::Tensor spatialSize) {
auto p = LongTensorToPoint<dimension>(spatialSize);
at::Tensor gt = torch::zeros({nActive[p]}, at::kByte);
at::Tensor ref_map = torch::/*empty*/ zeros({mGT.nActive[p]}, at::kLong);
at::Tensor ref_map = torch::empty({mGT.nActive[p]}, at::kLong);
long *ref_map_ptr = ref_map.data<long>();
unsigned char *gt_ptr = gt.data<unsigned char>();
auto &sgsGT = mGT.grids[p];
......@@ -273,12 +273,12 @@ Metadata<dimension>::sparsifyCompare(Metadata<dimension> &mGT,
auto &sgFull = sgsFull[sample];
for (auto const &iter : sgGT.mp) {
auto f = sgFull.mp.find(iter.first);
if (f == sgFull.mp.end())
std::cout << __FILE__ << ":" << __LINE__ << std::endl;
if (f != sgFull.mp.end()) {
ref_map_ptr[iter.second + sgGT.ctr] = f->second + sgFull.ctr;
gt_ptr[f->second + sgFull.ctr] = +1;
}
}
}
return {gt, ref_map};
}
......
......@@ -42,6 +42,11 @@ class InputLayer(Module):
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
self.device = None
def to(self, device):
self.device=device
return self
def forward(self, input):
output = SparseConvNetTensor(
......@@ -52,8 +57,8 @@ class InputLayer(Module):
self.dimension,
output.metadata,
self.spatial_size,
input[0].type(torch.LongTensor),
input[1],
input[0].cpu().long(),
input[1].to(self.device) if self.device else input[1],
0 if len(input) == 2 else input[2],
self.mode
)
......@@ -109,7 +114,11 @@ class BLInputLayer(Module):
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
# (coords,input_features) = input
self.device = None
def to(self, device):
self.device=device
return self
def forward(self, input):
output = SparseConvNetTensor(
......@@ -120,8 +129,8 @@ class BLInputLayer(Module):
self.dimension,
output.metadata,
self.spatial_size,
input[0].type(torch.LongTensor),
input[1],
input[0].cpu().long(),
input[1].to(self.device) if self.device else input[1],
self.mode
)
return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment