Commit 2c4ed608 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Goodbye THNN. Hello ATen!

parent 6d4475db
...@@ -128,19 +128,19 @@ python VGGplus.py ...@@ -128,19 +128,19 @@ python VGGplus.py
## Setup ## Setup
Tested with Ubuntu 16.04, Python 3 in [Miniconda](https://conda.io/miniconda.html) and PyTorch v0.4 (with merged Tensors/Variables). Tested with Ubuntu 16.04, Python 3.6 in [Miniconda](https://conda.io/miniconda.html) and PyTorch v0.4 (with merged Tensors/Variables).
``` ```
conda install pytorch -c pytorch conda install pytorch -c pytorch
conda install google-sparsehash -c bioconda # OR apt-get install libsparsehash-dev conda install google-sparsehash -c bioconda # OR apt-get install libsparsehash-dev
conda install -c anaconda pillow
git clone git@github.com:facebookresearch/SparseConvNet.git git clone git@github.com:facebookresearch/SparseConvNet.git
cd SparseConvNet/ cd SparseConvNet/PyTorch/
python setup.py install bash build.sh
``` ```
To run the examples you may also need to install unrar and TorchNet: To run the examples you may also need to install unrar:
``` ```
apt-get install unrar apt-get install unrar
pip install git+https://github.com/pytorch/tnt.git@master
``` ```
...@@ -154,7 +154,7 @@ pip install git+https://github.com/pytorch/tnt.git@master ...@@ -154,7 +154,7 @@ pip install git+https://github.com/pytorch/tnt.git@master
6. [Kaggle Diabetic Retinopathy Detection, 2015](https://www.kaggle.com/c/diabetic-retinopathy-detection/) First place in the Kaggle Diabetic Retinopathy Detection competition. 6. [Kaggle Diabetic Retinopathy Detection, 2015](https://www.kaggle.com/c/diabetic-retinopathy-detection/) First place in the Kaggle Diabetic Retinopathy Detection competition.
7. [Submanifold Sparse Convolutional Networks, 2017](https://arxiv.org/abs/1706.01307) Introduces deep 'submanifold' SparseConvNets. 7. [Submanifold Sparse Convolutional Networks, 2017](https://arxiv.org/abs/1706.01307) Introduces deep 'submanifold' SparseConvNets.
8. [Workshop on Learning to See from 3D Data, 2017](https://shapenet.cs.stanford.edu/iccv17workshop/) First place in the [semantic segmentation](https://shapenet.cs.stanford.edu/iccv17/) competition. [Report](https://arxiv.org/pdf/1710.06104) 8. [Workshop on Learning to See from 3D Data, 2017](https://shapenet.cs.stanford.edu/iccv17workshop/) First place in the [semantic segmentation](https://shapenet.cs.stanford.edu/iccv17/) competition. [Report](https://arxiv.org/pdf/1710.06104)
9. [3D Semantic Segmentation with Submanifold Sparse Convolutional Networks, 2017](https://arxiv.org/abs/1711.10275) Semantic segmentation for the ShapeNet Core55 and NYU-DepthV2 datasets 9. [3D Semantic Segmentation with Submanifold Sparse Convolutional Networks, 2017](https://arxiv.org/abs/1711.10275) Semantic segmentation for the ShapeNet Core55 and NYU-DepthV2 datasets, CVPR 2018
### Citations ### Citations
......
#!/bin/bash
rm -rf build/ sparseconvnet.egg-info sparseconvnet_SCN*.so
python setup.py install
...@@ -48,10 +48,10 @@ p['lr_decay'] = 4e-2 ...@@ -48,10 +48,10 @@ p['lr_decay'] = 4e-2
p['weight_decay'] = 1e-4 p['weight_decay'] = 1e-4
p['momentum'] = 0.9 p['momentum'] = 0.9
p['check_point'] = True p['check_point'] = True
p['use_gpu'] = torch.cuda.is_available() p['use_cuda'] = torch.cuda.is_available()
dtype = 'torch.cuda.FloatTensor' if p['use_gpu'] else 'torch.FloatTensor' dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
dtypei = 'torch.cuda.LongTensor' if p['use_gpu'] else 'torch.LongTensor' dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'
if p['use_gpu']: if p['use_cuda']:
model.cuda() model.cuda()
criterion.cuda() criterion.cuda()
optimizer = optim.SGD(model.parameters(), optimizer = optim.SGD(model.parameters(),
......
...@@ -48,10 +48,10 @@ p['lr_decay'] = 4e-2 ...@@ -48,10 +48,10 @@ p['lr_decay'] = 4e-2
p['weight_decay'] = 1e-4 p['weight_decay'] = 1e-4
p['momentum'] = 0.9 p['momentum'] = 0.9
p['check_point'] = True p['check_point'] = True
p['use_gpu'] = torch.cuda.is_available() p['use_cuda'] = torch.cuda.is_available()
dtype = 'torch.cuda.FloatTensor' if p['use_gpu'] else 'torch.FloatTensor' dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
dtypei = 'torch.cuda.LongTensor' if p['use_gpu'] else 'torch.LongTensor' dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'
if p['use_gpu']: if p['use_cuda']:
model.cuda() model.cuda()
criterion.cuda() criterion.cuda()
optimizer = optim.SGD(model.parameters(), optimizer = optim.SGD(model.parameters(),
......
...@@ -10,40 +10,41 @@ import sparseconvnet as scn ...@@ -10,40 +10,41 @@ import sparseconvnet as scn
from data import get_iterators from data import get_iterators
# two-dimensional SparseConvNet # two-dimensional SparseConvNet
class Model(nn.Module): class Model(nn.Module):
def __init__(self): def __init__(self):
nn.Module.__init__(self) nn.Module.__init__(self)
self.sparseModel = scn.Sequential( self.sparseModel = scn.Sequential(
).add(scn.SubmanifoldConvolution(2, 3, 8, 3, False) scn.SubmanifoldConvolution(2, 3, 8, 3, False),
).add(scn.MaxPooling(2, 3, 2) scn.MaxPooling(2, 3, 2),
).add(scn.SparseResNet(2, 8, [ scn.SparseResNet(2, 8, [
['b', 8, 2, 1], ['b', 8, 2, 1],
['b', 16, 2, 2], ['b', 16, 2, 2],
['b', 24, 2, 2], ['b', 24, 2, 2],
['b', 32, 2, 2]]) ['b', 32, 2, 2]]),
).add(scn.Convolution(2, 32, 64, 5, 1, False) scn.Convolution(2, 32, 64, 5, 1, False),
).add(scn.BatchNormReLU(64) scn.BatchNormReLU(64),
).add(scn.SparseToDense(2, 64)) scn.SparseToDense(2, 64))
self.spatial_size= self.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
self.inputLayer = scn.InputLayer(2,self.spatial_size,2)
self.linear = nn.Linear(64, 183) self.linear = nn.Linear(64, 183)
def forward(self, x): def forward(self, x):
x = self.inputLayer(x)
x = self.sparseModel(x) x = self.sparseModel(x)
x = x.view(-1, 64) x = x.view(-1, 64)
x = self.linear(x) x = self.linear(x)
return x return x
model = Model() model = Model()
spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1])) scale=63
print('Input spatial size:', spatial_size) dataset = get_iterators(model.spatial_size, scale)
dataset = get_iterators(spatial_size, 63, 3) print('Input spatial size:', model.spatial_size, 'Data scale:', scale)
scn.ClassificationTrainValidate( scn.ClassificationTrainValidate(
model, dataset, model, dataset,
{'n_epochs': 100, {'n_epochs': 100,
'initial_lr': 0.1, 'initial_lr': 0.1,
'lr_decay': 0.05, 'lr_decay': 0.05,
'weight_decay': 1e-4, 'weight_decay': 1e-4,
'use_gpu': torch.cuda.is_available(), 'use_cuda': torch.cuda.is_available(),
'check_point': True, }) 'check_point': False, })
...@@ -13,32 +13,36 @@ from data import get_iterators ...@@ -13,32 +13,36 @@ from data import get_iterators
class Model(nn.Module): class Model(nn.Module):
def __init__(self): def __init__(self):
nn.Module.__init__(self) nn.Module.__init__(self)
self.sparseModel = scn.SparseVggNet(2, 3, [ self.sparseModel = scn.Sequential(
scn.SparseVggNet(2, 3, [
['C', 8, ], ['C', 8], 'MP', ['C', 8, ], ['C', 8], 'MP',
['C', 16], ['C', 16], 'MP', ['C', 16], ['C', 16], 'MP',
['C', 16 + 8], ['C', 16 + 8], 'MP', ['C', 16, 8], ['C', 16, 8], 'MP',
['C', 24 + 8], ['C', 24 + 8], 'MP'] ['C', 24, 8], ['C', 24, 8], 'MP']),
).add(scn.Convolution(2, 32, 64, 5, 1, False) scn.Convolution(2, 32, 64, 5, 1, False),
).add(scn.BatchNormReLU(64) scn.BatchNormReLU(64),
).add(scn.SparseToDense(2, 64)) scn.SparseToDense(2, 64))
self.spatial_size= self.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
self.inputLayer = scn.InputLayer(2,self.spatial_size,2)
self.linear = nn.Linear(64, 183) self.linear = nn.Linear(64, 183)
def forward(self, x): def forward(self, x):
x = self.inputLayer(x)
x = self.sparseModel(x) x = self.sparseModel(x)
x = x.view(-1, 64) x = x.view(-1, 64)
x = self.linear(x) x = self.linear(x)
return x return x
model = Model() model = Model()
spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1])) scale=63
print('Input spatial size:', spatial_size) dataset = get_iterators(model.spatial_size, scale)
dataset = get_iterators(spatial_size, 63, 3) print('Input spatial size:', model.spatial_size, 'Data scale:', scale)
scn.ClassificationTrainValidate( scn.ClassificationTrainValidate(
model, dataset, model, dataset,
{'n_epochs': 100, {'n_epochs': 100,
'initial_lr': 0.1, 'initial_lr': 0.1,
'lr_decay': 0.05, 'lr_decay': 0.05,
'weight_decay': 1e-4, 'weight_decay': 1e-4,
'use_gpu': torch.cuda.is_available(), 'use_cuda': torch.cuda.is_available(),
'check_point': True, }) 'check_point': False, })
...@@ -19,114 +19,90 @@ if not os.path.exists('pickle/'): ...@@ -19,114 +19,90 @@ if not os.path.exists('pickle/'):
import process import process
def train(spatial_size, Scale, precomputeSize): def interp(sample,x,y):
d = pickle.load(open('pickle/train.pickle', 'rb')) return torch.from_numpy(np.hstack([np.interp(sample.numpy(),x.numpy(),y[:,i].numpy())[:,None] for i in range(y.shape[1])])).float()
print('Replicating training set 10 times (1 epoch = 10 iterations through the training set = 10x6588 training samples)') class Data(torch.utils.data.Dataset):
for i in range(9): def __init__(self,file,scale=63,repeats=1):
for j in range(6588): torch.utils.data.Dataset.__init__(self)
d.append(d[j]) self.data = pickle.load(open(file, 'rb'))
for i, x in enumerate(d): for j in range(len(self.data)):
x['idx'] = i strokes=[]
d = torchnet.dataset.ListDataset(d) features=[]
randperm = torch.randperm(len(d)) for k,stroke in enumerate(self.data[j]['input']):
if len(stroke)>1:
def perm(idx, size): stroke=stroke.float()/255-0.5
return randperm[idx] stroke*=scale-1e-3
delta=stroke[1:]-stroke[:-1]
mag=(delta**2).sum(1)**0.5
l=mag.cumsum(0)
zl=torch.cat([torch.zeros(1),l])
strokes.append(interp(torch.arange(0,zl[-1]),zl,stroke))
delta/=mag[:,None]
delta=torch.Tensor(delta[[i//2 for i in range(2*len(l))]])
zl_=zl[[i//2 for i in range(1,2*len(l)+1)]]
features.append(interp(torch.arange(0,zl[-1]),zl_,delta))
self.data[j]['coords'] = torch.cat(strokes,0)
self.data[j]['features'] = torch.cat(features,0)
self.data[j]['target']-=1
if repeats>1:
print('Replicating dataset: 1 epoch = %d iterations of the dataset; %d x %d = %d training samples'%(repeats, repeats, len(self.data), repeats * len(self.data)))
for j in range(len(self.data)):
for i in range(repeats-1):
self.data.append(self.data[j])
for i, x in enumerate(self.data):
x['idx'] = i
def __getitem__(self,n):
return self.data[n]
def __len__(self):
return len(self.data)
def TrainMergeFn(spatial_size=95, jitter=8):
center = spatial_size/2
def merge(tbl): def merge(tbl):
inp = scn.InputBatch(2, spatial_size) v=torch.Tensor([[1,0,0]])
center = spatial_size.float().view(1, 2) / 2 targets=[x['target'] for x in tbl]
p = torch.LongTensor(2) locations=[]
v = torch.FloatTensor([1, 0, 0]) features=[]
np_random = np.random.RandomState(tbl['idx']) for idx,char in enumerate(tbl):
for char in tbl['input']:
inp.add_sample()
m = torch.eye(2) m = torch.eye(2)
r = np_random.randint(1, 3) r = torch.randint(0,3,[1]).int().item()
alpha = random.uniform(-0.2, 0.2) alpha = torch.rand(1).item()*0.4-0.2
if alpha == 1: if r == 1:
m[0][1] = alpha m[0][1] = alpha
elif alpha == 2: elif r == 2:
m[1][0] = alpha m[1][0] = alpha
else: else:
m = torch.mm(m, torch.FloatTensor( m = torch.mm(m, torch.FloatTensor(
[[math.cos(alpha), math.sin(alpha)], [[math.cos(alpha), math.sin(alpha)],
[-math.sin(alpha), math.cos(alpha)]])) [-math.sin(alpha), math.cos(alpha)]]))
c = center + torch.FloatTensor(1, 2).uniform_(-8, 8) coords=char['coords']
for stroke in char: coords = torch.mm(coords, m) + torch.FloatTensor(1, 2).uniform_(center-jitter, center+jitter)
stroke = stroke.float() / 255 - 0.5 coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
stroke = c.expand_as(stroke) + \ locations.append(coords)
torch.mm(stroke, m * (Scale - 0.01)) f=char['features']
############################################################### f=torch.mm(f, m)
# To avoid GIL problems use a helper function: f /= (f**2).sum(1,keepdim=True)**0.5
scn.dim_fn( f = torch.cat([f,torch.ones([f.size(0),1])],1)
2, features.append(f)
'drawCurve')( return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
inp.metadata.ffi, return merge
inp.features, def TestMergeFn(spatial_size=95):
stroke) center = spatial_size/2
###############################################################
# Above is equivalent to :
# x1,x2,y1,y2,l=0,stroke[0][0],0,stroke[0][1],0
# for i in range(1,stroke.size(0)):
# x1=x2
# y1=y2
# x2=stroke[i][0]
# y2=stroke[i][1]
# l=1e-10+((x2-x1)**2+(y2-y1)**2)**0.5
# v[1]=(x2-x1)/l
# v[2]=(y2-y1)/l
# l=max(x2-x1,y2-y1,x1-x2,y1-y2,0.9)
# for j in np.arange(0,1,1/l):
# p[0]=math.floor(x1*j+x2*(1-j))
# p[1]=math.floor(y1*j+y2*(1-j))
# inp.set_location(p,v,False)
###############################################################
inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
bd = torchnet.dataset.BatchDataset(d, 108, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd)
def iter():
randperm.copy_(torch.randperm(len(d)))
return tdi()
return iter
def val(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/test.pickle', 'rb'))
d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d))
def perm(idx, size):
return randperm[idx]
def merge(tbl): def merge(tbl):
inp = scn.InputBatch(2, spatial_size) v=torch.Tensor([[1,0,0]])
center = spatial_size.float().view(1, 2) / 2 targets=[x['target'] for x in tbl]
p = torch.LongTensor(2) locations=[]
v = torch.FloatTensor([1, 0, 0]) features=[]
for char in tbl['input']: for idx,char in enumerate(tbl):
inp.add_sample() coords=char['coords']+center
for stroke in char: coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
stroke = stroke.float() * (Scale - 0.01) / 255 - 0.5 * (Scale - 0.01) locations.append(coords)
stroke += center.expand_as(stroke) f=char['features']
scn.dim_fn( f = torch.cat([f,torch.ones([f.size(0),1])],1)
2, features.append(f)
'drawCurve')( return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
inp.metadata.ffi, return merge
inp.features,
stroke)
inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd)
def iter():
randperm.copy_(torch.randperm(len(d)))
return tdi()
return iter
def get_iterators(*args): def get_iterators(*args):
return {'train': train(*args), 'val': val(*args)} return {'train': torch.utils.data.DataLoader(Data('pickle/train.pickle',repeats=10), collate_fn=TrainMergeFn(), batch_size=108, shuffle=True, num_workers=10),
'val': torch.utils.data.DataLoader(Data('pickle/test.pickle',repeats=1), collate_fn=TestMergeFn(), batch_size=183, shuffle=True, num_workers=10)}
...@@ -10,40 +10,41 @@ import sparseconvnet as scn ...@@ -10,40 +10,41 @@ import sparseconvnet as scn
from data import get_iterators from data import get_iterators
# two-dimensional SparseConvNet # two-dimensional SparseConvNet
class Model(nn.Module): class Model(nn.Module):
def __init__(self): def __init__(self):
nn.Module.__init__(self) nn.Module.__init__(self)
self.sparseModel = scn.Sequential( self.sparseModel = scn.Sequential(
).add(scn.SubmanifoldConvolution(2, 3, 16, 3, False) scn.SubmanifoldConvolution(2, 3, 16, 3, False),
).add(scn.MaxPooling(2, 3, 2) scn.MaxPooling(2, 3, 2),
).add(scn.SparseResNet(2, 16, [ scn.SparseResNet(2, 16, [
['b', 16, 2, 1], ['b', 16, 2, 1],
['b', 32, 2, 2], ['b', 32, 2, 2],
['b', 48, 2, 2], ['b', 48, 2, 2],
['b', 96, 2, 2]]) ['b', 96, 2, 2]]),
).add(scn.Convolution(2, 96, 128, 5, 1, False) scn.Convolution(2, 96, 128, 3, 1, False),
).add(scn.BatchNormReLU(128) scn.BatchNormReLU(128),
).add(scn.SparseToDense(2, 128)) scn.SparseToDense(2, 128))
self.spatial_size= self.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
self.inputLayer = scn.InputLayer(2,self.spatial_size,2)
self.linear = nn.Linear(128, 3755) self.linear = nn.Linear(128, 3755)
def forward(self, x): def forward(self, x):
x = self.inputLayer(x)
x = self.sparseModel(x) x = self.sparseModel(x)
x = x.view(-1, 128) x = x.view(-1, 128)
x = self.linear(x) x = self.linear(x)
return x return x
model = Model() model = Model()
spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1])) scale=63
print('Input spatial size:', spatial_size) dataset = get_iterators(model.spatial_size, scale)
dataset = get_iterators(spatial_size, 63, 3) print('Input spatial size:', model.spatial_size, 'Data scale:', scale)
scn.ClassificationTrainValidate( scn.ClassificationTrainValidate(
model, dataset, model, dataset,
{'n_epochs': 100, {'n_epochs': 100,
'initial_lr': 0.1, 'initial_lr': 0.1,
'lr_decay': 0.05, 'lr_decay': 0.05,
'weight_decay': 1e-4, 'weight_decay': 1e-4,
'use_gpu': torch.cuda.is_available(), 'use_cuda': torch.cuda.is_available(),
'check_point': True, }) 'check_point': False, })
...@@ -22,23 +22,27 @@ class Model(nn.Module): ...@@ -22,23 +22,27 @@ class Model(nn.Module):
).add(scn.Convolution(2, 96, 128, 3, 2, False) ).add(scn.Convolution(2, 96, 128, 3, 2, False)
).add(scn.BatchNormReLU(128) ).add(scn.BatchNormReLU(128)
).add(scn.SparseToDense(2, 128)) ).add(scn.SparseToDense(2, 128))
self.spatial_size= self.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
self.inputLayer = scn.InputLayer(2,self.spatial_size,2)
self.linear = nn.Linear(128, 3755) self.linear = nn.Linear(128, 3755)
def forward(self, x): def forward(self, x):
x = self.inputLayer(x)
x = self.sparseModel(x) x = self.sparseModel(x)
x = x.view(-1, 128) x = x.view(-1, 128)
x = self.linear(x) x = self.linear(x)
return x return x
model = Model() model = Model()
spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1])) scale=63
print('Input spatial size:', spatial_size) dataset = get_iterators(model.spatial_size, scale)
dataset = get_iterators(spatial_size, 63, 3) print('Input spatial size:', model.spatial_size, 'Data scale:', scale)
scn.ClassificationTrainValidate( scn.ClassificationTrainValidate(
model, dataset, model, dataset,
{'n_epochs': 100, {'n_epochs': 100,
'initial_lr': 0.1, 'initial_lr': 0.1,
'lr_decay': 0.05, 'lr_decay': 0.05,
'weight_decay': 1e-4, 'weight_decay': 1e-4,
'use_gpu': torch.cuda.is_available(), 'use_cuda': torch.cuda.is_available(),
'check_point': True, }) 'check_point': False, })
...@@ -22,23 +22,27 @@ class Model(nn.Module): ...@@ -22,23 +22,27 @@ class Model(nn.Module):
).add(scn.Convolution(2, 112, 128, 3, 2, False) ).add(scn.Convolution(2, 112, 128, 3, 2, False)
).add(scn.BatchNormReLU(128) ).add(scn.BatchNormReLU(128)
).add(scn.SparseToDense(2, 128)) ).add(scn.SparseToDense(2, 128))
self.spatial_size= self.sparseModel.input_spatial_size(torch.LongTensor([1, 1]))
self.inputLayer = scn.InputLayer(2,self.spatial_size,2)
self.linear = nn.Linear(128, 3755) self.linear = nn.Linear(128, 3755)
def forward(self, x): def forward(self, x):
x = self.inputLayer(x)
x = self.sparseModel(x) x = self.sparseModel(x)
x = x.view(-1, 128) x = x.view(-1, 128)
x = self.linear(x) x = self.linear(x)
return x return x
model = Model() model = Model()
spatial_size = model.sparseModel.input_spatial_size(torch.LongTensor([1, 1])) scale=63
print('Input spatial size:', spatial_size) dataset = get_iterators(model.spatial_size, scale)
dataset = get_iterators(spatial_size, 63, 3) print('Input spatial size:', model.spatial_size, 'Data scale:', scale)
scn.ClassificationTrainValidate( scn.ClassificationTrainValidate(
model, dataset, model, dataset,
{'n_epochs': 100, {'n_epochs': 100,
'initial_lr': 0.1, 'initial_lr': 0.1,
'lr_decay': 0.05, 'lr_decay': 0.05,
'weight_decay': 1e-4, 'weight_decay': 1e-4,
'use_gpu': torch.cuda.is_available(), 'use_cuda': torch.cuda.is_available(),
'check_point': True, }) 'check_point': False, })
...@@ -10,7 +10,7 @@ import sparseconvnet as scn ...@@ -10,7 +10,7 @@ import sparseconvnet as scn
import pickle import pickle
import math import math
import random import random
import numpy import numpy as np
import os import os
if not os.path.exists('pickle/'): if not os.path.exists('pickle/'):
...@@ -24,95 +24,57 @@ if not os.path.exists('pickle/'): ...@@ -24,95 +24,57 @@ if not os.path.exists('pickle/'):
os.system('unzip OLHWDB1.1tst_pot.zip -d POT/') os.system('unzip OLHWDB1.1tst_pot.zip -d POT/')
os.system('python readPotFiles.py') os.system('python readPotFiles.py')
def interp(sample,x,y):
return torch.from_numpy(np.hstack([np.interp(sample.numpy(),x.numpy(),y[:,i].numpy())[:,None] for i in range(y.shape[1])])).float()
class Data(torch.utils.data.Dataset):
def __init__(self,file,scale=63):
print('Loading', file, 'and balancing points for scale', scale)
torch.utils.data.Dataset.__init__(self)
self.data = pickle.load(open(file, 'rb'))
for j in range(len(self.data)):
strokes=[]
features=[]
for k,stroke in enumerate(self.data[j]['input']):
if len(stroke)>1:
stroke=stroke.float()/255-0.5
stroke*=scale-1e-3
delta=stroke[1:]-stroke[:-1]
mag=(delta**2).sum(1)**0.5
l=mag.cumsum(0)
zl=torch.cat([torch.zeros(1),l])
strokes.append(interp(torch.arange(0,zl[-1]),zl,stroke))
delta/=mag[:,None]
delta=torch.Tensor(delta[[i//2 for i in range(2*len(l))]])
zl_=zl[[i//2 for i in range(1,2*len(l)+1)]]
features.append(interp(torch.arange(0,zl[-1]),zl_,delta))
self.data[j]['coords'] = torch.cat(strokes,0)
self.data[j]['features'] = torch.cat(features,0)
for i, x in enumerate(self.data):
x['idx'] = i
print('Loaded', len(self.data), 'points')
def __getitem__(self,n):
return self.data[n]
def __len__(self):
return len(self.data)
def train(spatial_size, Scale, precomputeSize): def MergeFn(spatial_size=63):
d = pickle.load(open('pickle/train.pickle', 'rb')) center = spatial_size/2
d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d))
def perm(idx, size):
return randperm[idx]
def merge(tbl): def merge(tbl):
inp = scn.InputBatch(2, spatial_size) v=torch.Tensor([[1,0,0]])
center = spatial_size.float().view(1, 2) / 2 targets=[x['target'] for x in tbl]
p = torch.LongTensor(2) locations=[]
v = torch.FloatTensor([1, 0, 0]) features=[]
for char in tbl['input']: for idx,char in enumerate(tbl):
inp.add_sample() coords=char['coords']+center
for stroke in char: coords = torch.cat([coords.long(),torch.LongTensor([idx]).expand([coords.size(0),1])],1)
stroke = stroke.float() * (Scale - 0.01) / 255 - 0.5 * (Scale - 0.01) locations.append(coords)
stroke += center.expand_as(stroke) f=char['features']
############################################################### f = torch.cat([f,torch.ones([f.size(0),1])],1)
# To avoid GIL problems use a helper function: features.append(f)
scn.dim_fn( return {'input': scn.InputLayerInput(torch.cat(locations,0), torch.cat(features,0)), 'target': torch.LongTensor(targets)}
2, return merge
'drawCurve')(
inp.metadata.ffi,
inp.features,
stroke)
###############################################################
# Above is equivalent to :
# x1,x2,y1,y2,l=0,stroke[0][0],0,stroke[0][1],0
# for i in range(1,stroke.size(0)):
# x1=x2
# y1=y2
# x2=stroke[i][0]
# y2=stroke[i][1]
# l=1e-10+((x2-x1)**2+(y2-y1)**2)**0.5
# v[1]=(x2-x1)/l
# v[2]=(y2-y1)/l
# l=max(x2-x1,y2-y1,x1-x2,y1-y2,0.9)
# for j in numpy.arange(0,1,1/l):
# p[0]=math.floor(x1*j+x2*(1-j))
# p[1]=math.floor(y1*j+y2*(1-j))
# inp.set_location(p,v,False)
###############################################################
inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd)
def iter():
randperm.copy_(torch.randperm(len(d)))
return tdi()
return iter
def val(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/test.pickle', 'rb'))
d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d))
def perm(idx, size):
return randperm[idx]
def merge(tbl):
inp = scn.InputBatch(2, spatial_size)
center = spatial_size.float().view(1, 2) / 2
p = torch.LongTensor(2)
v = torch.FloatTensor([1, 0, 0])
for char in tbl['input']:
inp.add_sample()
for stroke in char:
stroke = stroke.float() * (Scale - 0.01) / 255 - 0.5 * (Scale - 0.01)
stroke += center.expand_as(stroke)
scn.dim_fn(
2,
'drawCurve')(
inp.metadata.ffi,
inp.features,
stroke)
inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd)
def iter():
randperm.copy_(torch.randperm(len(d)))
return tdi()
return iter
def get_iterators(*args): def get_iterators(*args):
return {'train': train(*args), 'val': val(*args)} return {'train': torch.utils.data.DataLoader(Data('pickle/train.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10),
'val': torch.utils.data.DataLoader(Data('pickle/test.pickle'), collate_fn=MergeFn(), batch_size=100, shuffle=True, num_workers=10)}
...@@ -8,7 +8,7 @@ import torch ...@@ -8,7 +8,7 @@ import torch
import sparseconvnet as scn import sparseconvnet as scn
# Use the GPU if there is one, otherwise CPU # Use the GPU if there is one, otherwise CPU
use_gpu = torch.cuda.is_available() use_cuda = torch.cuda.is_available()
model = scn.Sequential().add( model = scn.Sequential().add(
scn.SparseVggNet(2, 1, scn.SparseVggNet(2, 1,
...@@ -22,7 +22,7 @@ model = scn.Sequential().add( ...@@ -22,7 +22,7 @@ model = scn.Sequential().add(
).add( ).add(
scn.SparseToDense(2, 32) scn.SparseToDense(2, 32)
) )
if use_gpu: if use_cuda:
model.cuda() model.cuda()
# output will be 10x10 # output will be 10x10
...@@ -67,7 +67,7 @@ input.set_locations(locations, features, 0) ...@@ -67,7 +67,7 @@ input.set_locations(locations, features, 0)
input.precomputeMetadata(3) input.precomputeMetadata(3)
model.train() model.train()
if use_gpu: if use_cuda:
input.cuda() input.cuda()
output = model.forward(input) output = model.forward(input)
......
...@@ -4,77 +4,34 @@ ...@@ -4,77 +4,34 @@
# This source code is licensed under the license found in the # This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree. # LICENSE file in the root directory of this source tree.
import os import torch, os
import torch from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension
from torch.utils.ffi import create_extension from setuptools import setup, find_packages
this_dir = os.path.dirname(os.path.realpath(__file__)) this_dir = os.path.dirname(os.path.realpath(__file__))
torch_dir = os.path.dirname(torch.__file__) torch_dir = os.path.dirname(torch.__file__)
conda_include_dir = '/'.join(torch_dir.split('/')[:-4]) + '/include'
print('Building SCN module') extra = {'cxx': ['-std=c++11', '-fopenmp'], 'nvcc': ['-std=c++11', '-Xcompiler', '-fopenmp']}
if torch.cuda.is_available():
s=('cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc'
+ ' -m64 --std c++11 -Xcompiler \"-fopenmp -fPIC -O3\" '
+ '-gencode arch=compute_62,code=sm_62 '
+ '-gencode arch=compute_61,code=sm_61 '
+ '-gencode arch=compute_60,code=sm_60 '
+ '-gencode arch=compute_52,code=sm_52 '
+ '-gencode arch=compute_50,code=sm_50 '
+ '-gencode arch=compute_30,code=sm_30 '
+ '-DNVCC '
+ '-I/usr/local/cuda/include '
+ '-I' + '/'.join(torch_dir.split('/')[:-4]) + '/include '
+ '-I' + torch_dir + '/lib/include '
+ '-I' + torch_dir + '/lib/include/TH '
+ '-I' + torch_dir + '/lib/include/THC '
+ '-I.')
r = os.system(s)
assert r == 0
ffi = create_extension(
'sparseconvnet.SCN',
headers=[
'sparseconvnet/SCN/header_cpu.h',
'sparseconvnet/SCN/header_gpu.h'],
sources=[],
include_dirs=[os.path.expandvars('$CUDA_HOME') + '/include'],
extra_objects=[
this_dir +
'/sparseconvnet/SCN/init.cu.o'],
relative_to=__file__,
extra_compile_args=["-std=c99"],
with_cuda=True)
else:
r = os.system(
'cd sparseconvnet/SCN; g++ -fopenmp -std=c++11 -O3 -fPIC -c init.cpp -o init.cpp.o '
+ '-I' + '/'.join(torch_dir.split('/')[:-4]) + '/include '
+ '-I' + torch_dir + '/lib/include '
+ '-I' + torch_dir + '/lib/include/TH '
+ '-I.')
assert r == 0
ffi = create_extension(
'sparseconvnet.SCN',
headers=['sparseconvnet/SCN/header_cpu.h'],
sources=[],
extra_objects=[
this_dir +
'/sparseconvnet/SCN/init.cpp.o'],
relative_to=__file__,
extra_compile_args=["-std=c99"],
with_cuda=False)
ffi.build()
from setuptools import setup, find_packages
setup( setup(
name='sparseconvnet', name='sparseconvnet',
version='0.1.1', version='0.2',
description='Submanifold (Spatially) Sparse Convolutional Networks https://arxiv.org/abs/1706.01307', description='Submanifold (Spatially) Sparse Convolutional Networks https://arxiv.org/abs/1706.01307',
author='Facebook AI Research', author='Facebook AI Research',
author_email='benjamingraham@fb.com', author_email='benjamingraham@fb.com',
url='https://github.com/facebookresearch/SparseConvNet', url='https://github.com/facebookresearch/SparseConvNet',
package_data={ packages=['sparseconvnet','sparseconvnet.SCN'],
'sparseconvnet': ['SCN/_SCN.so'], ext_modules=[
}, CUDAExtension('sparseconvnet_SCN',
packages=find_packages(), ['sparseconvnet/SCN/pybind_cuda.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp', 'sparseconvnet/SCN/instantiate_cuda.cu'],
# Since the package includes a shared object, this is not zip-safe. include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
extra_compile_args=extra)
if torch.cuda.is_available() else
CppExtension('sparseconvnet_SCN',
['sparseconvnet/SCN/pybind_cpu.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp'],
include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
extra_compile_args=extra['cxx'])],
cmdclass={'build_ext': BuildExtension},
zip_safe=False, zip_safe=False,
) )
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "ActivePooling.h"
template <typename T, Int Dimension>
void cpu_ActivePooling_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
output_features.resize_({batchSize, nPlanes});
output_features.zero_();
ActivePooling_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), batchSize, maxActive,
nPlanes, _rules, average);
}
template <typename T, Int Dimension>
void cpu_ActivePooling_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
d_input_features.resize_as_(input_features);
d_input_features.zero_();
ActivePooling_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), batchSize,
maxActive, nPlanes, _rules, average);
}
...@@ -11,32 +11,32 @@ ...@@ -11,32 +11,32 @@
template <typename T> template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features, void ActivePooling_ForwardPass(T *input_features, T *output_features,
uInt batchSize, uInt maxActive, uInt nPlanes, Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) { RuleBook &rules, bool average) {
for (uInt outSite = 0; outSite < batchSize; outSite++) { for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &output_features[outSite * nPlanes]; T *out = &output_features[outSite * nPlanes];
uInt *r = &rules[0][outSite * (maxActive + 1)]; Int *r = &rules[0][outSite * (maxActive + 1)];
uInt nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f; T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) { while (nActive-- > 0) {
T *inp = &input_features[(*r++) * nPlanes]; T *inp = &input_features[(*r++) * nPlanes];
for (uInt plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
out[plane] += inp[plane] * multiplier; out[plane] += inp[plane] * multiplier;
} }
} }
} }
template <typename T> template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
uInt batchSize, uInt maxActive, uInt nPlanes, Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) { RuleBook &rules, bool average) {
for (uInt outSite = 0; outSite < batchSize; outSite++) { for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &d_output_features[outSite * nPlanes]; T *out = &d_output_features[outSite * nPlanes];
uInt *r = &rules[0][outSite * (maxActive + 1)]; Int *r = &rules[0][outSite * (maxActive + 1)];
uInt nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f; T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) { while (nActive-- > 0) {
T *inp = &d_input_features[(*r++) * nPlanes]; T *inp = &d_input_features[(*r++) * nPlanes];
for (uInt plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
inp[plane] = out[plane] * multiplier; inp[plane] = out[plane] * multiplier;
} }
} }
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "AffineReluTrivialConvolution.h"
template <typename T>
double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor affineWeight,
/*float*/ at::Tensor affineBias, /*float*/ at::Tensor convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)});
AffineReluTrivialConvolution_ForwardPass(
input_features.data<T>(), convWeight.size(0), input_features.stride(0),
output_features.data<T>(), convWeight.size(1), output_features.stride(0),
affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(),
input_features.size(0));
return input_features.size(0) * input_features.size(1) *
output_features.size(1);
}
template <typename T>
void cpu_AffineReluTrivialConvolution_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor affineWeight,
/*float*/ at::Tensor d_affineWeight, /*float*/ at::Tensor affineBias,
/*float*/ at::Tensor d_affineBias,
/*float*/ at::Tensor convWeight, /*float*/ at::Tensor d_convWeight,
bool additiveGrad) {
d_input_features.resize_as_(input_features);
AffineReluTrivialConvolution_BackwardPass(
input_features.data<T>(), d_input_features.data<T>(), convWeight.size(0),
input_features.stride(0), d_output_features.data<T>(), convWeight.size(1),
d_output_features.stride(0), affineWeight.data<T>(),
d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(),
convWeight.data<T>(), d_convWeight.data<T>(), input_features.size(0),
additiveGrad);
}
...@@ -6,20 +6,19 @@ ...@@ -6,20 +6,19 @@
#ifndef CPU_AffineReluTrivialConvolution_H #ifndef CPU_AffineReluTrivialConvolution_H
#define CPU_AffineReluTrivialConvolution_H #define CPU_AffineReluTrivialConvolution_H
#include "../SparseConvNet.h"
#include <cstring> #include <cstring>
// buffer must have size >= nHot * (nIn+nOut)
template <typename T> template <typename T>
void AffineReluTrivialConvolution_ForwardPass( void AffineReluTrivialConvolution_ForwardPass(
T *input_features, uInt input_nPlanes, uInt input_stride, T *input_features, Int input_nPlanes, Int input_stride, T *output_features,
T *output_features, uInt output_nPlanes, uInt output_stride, Int output_nPlanes, Int output_stride, T *affineWeight, T *affineBias,
T *affineWeight, T *affineBias, T *convWeight, uInt nActive) { T *convWeight, Int nActive) {
for (uInt row = 0; row < nActive; row++) { for (Int row = 0; row < nActive; row++) {
for (uInt column = 0; column < output_nPlanes; column++) { for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0; T sum = 0;
for (uInt j = 0; j < input_nPlanes; j++) { for (Int j = 0; j < input_nPlanes; j++) {
T i = input_features[row * input_stride + j] * affineWeight[j] + T i = input_features[row * input_stride + j] * affineWeight[j] +
affineBias[j]; affineBias[j];
i = (i > 0) ? i : 0; i = (i > 0) ? i : 0;
...@@ -32,16 +31,15 @@ void AffineReluTrivialConvolution_ForwardPass( ...@@ -32,16 +31,15 @@ void AffineReluTrivialConvolution_ForwardPass(
template <typename T> template <typename T>
void AffineReluTrivialConvolution_BackwardPass( void AffineReluTrivialConvolution_BackwardPass(
T *input_features, T *d_input_features, uInt input_nPlanes, T *input_features, T *d_input_features, Int input_nPlanes, Int input_stride,
uInt input_stride, T *d_output_features, uInt output_nPlanes, T *d_output_features, Int output_nPlanes, Int output_stride,
uInt output_stride, T *affineWeight, T *dAffineWeight, T *affineBias, T *affineWeight, T *dAffineWeight, T *affineBias, T *dAffineBias,
T *dAffineBias, T *convWeight, T *dConvWeight, uInt nActive, T *convWeight, T *dConvWeight, Int nActive, bool additiveGrad) {
bool additiveGrad) {
for (uInt row = 0; row < input_nPlanes; row++) { for (Int row = 0; row < input_nPlanes; row++) {
for (uInt column = 0; column < output_nPlanes; column++) { for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0; T sum = 0;
for (uInt j = 0; j < nActive; j++) { for (Int j = 0; j < nActive; j++) {
T i = input_features[j * input_stride + row] * affineWeight[row] + T i = input_features[j * input_stride + row] * affineWeight[row] +
affineBias[row]; affineBias[row];
i = (i > 0) ? i : 0; i = (i > 0) ? i : 0;
...@@ -50,10 +48,10 @@ void AffineReluTrivialConvolution_BackwardPass( ...@@ -50,10 +48,10 @@ void AffineReluTrivialConvolution_BackwardPass(
dConvWeight[row * output_nPlanes + column] += sum; dConvWeight[row * output_nPlanes + column] += sum;
} }
} }
for (uInt row = 0; row < nActive; row++) { for (Int row = 0; row < nActive; row++) {
for (uInt column = 0; column < input_nPlanes; column++) { for (Int column = 0; column < input_nPlanes; column++) {
T sum = 0; T sum = 0;
for (uInt j = 0; j < output_nPlanes; j++) { for (Int j = 0; j < output_nPlanes; j++) {
sum += d_output_features[row * output_stride + j] * sum += d_output_features[row * output_stride + j] *
convWeight[column * output_nPlanes + j]; convWeight[column * output_nPlanes + j];
} }
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "AveragePooling.h"
template <typename T, Int Dimension>
void cpu_AveragePooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.stride(0),
output_features.stride(0), &r[0], nHot,
_rules.size());
}
}
template <typename T, Int Dimension>
void cpu_AveragePooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.stride(0),
d_output_features.stride(0), &r[0], nHot,
_rules.size());
}
}
...@@ -6,29 +6,29 @@ ...@@ -6,29 +6,29 @@
#ifndef CPU_AVERAGEPOOLING_H #ifndef CPU_AVERAGEPOOLING_H
#define CPU_AVERAGEPOOLING_H #define CPU_AVERAGEPOOLING_H
#include "../SparseConvNet.h"
template <typename T> template <typename T>
void AveragePooling_ForwardPass(T *input_features, T *output_features, void AveragePooling_ForwardPass(T *input_features, T *output_features,
uInt nPlanes, uInt input_stride, Int nPlanes, Int input_stride,
uInt output_stride, uInt *rules, uInt nHot, Int output_stride, Int *rules, Int nHot,
uInt filterVolume) { Int filterVolume) {
for (uInt outSite = 0; outSite < nHot; outSite++) { for (Int outSite = 0; outSite < nHot; outSite++) {
uInt i = rules[2 * outSite] * input_stride; Int i = rules[2 * outSite] * input_stride;
uInt o = rules[2 * outSite + 1] * output_stride; Int o = rules[2 * outSite + 1] * output_stride;
for (uInt plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
output_features[o + plane] += input_features[i + plane] / filterVolume; output_features[o + plane] += input_features[i + plane] / filterVolume;
} }
} }
template <typename T> template <typename T>
void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features, void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
uInt nPlanes, uInt input_stride, Int nPlanes, Int input_stride,
uInt output_stride, uInt *rules, uInt nHot, Int output_stride, Int *rules, Int nHot,
uInt filterVolume) { Int filterVolume) {
for (uInt outSite = 0; outSite < nHot; outSite++) { for (Int outSite = 0; outSite < nHot; outSite++) {
uInt i = rules[2 * outSite] * input_stride; Int i = rules[2 * outSite] * input_stride;
uInt o = rules[2 * outSite + 1] * output_stride; Int o = rules[2 * outSite + 1] * output_stride;
for (uInt plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
d_input_features[i + plane] += d_input_features[i + plane] +=
d_output_features[o + plane] / filterVolume; d_output_features[o + plane] / filterVolume;
} }
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "BatchNormalization.h"
template <typename T>
void cpu_BatchNormalization_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum,
bool train, T leakiness) {
output_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness);
}
}
template <typename T>
void cpu_BatchNormalizationInTensor_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum,
bool train, T leakiness) {
if (input_features.ndimension() == 2) {
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness);
}
}
template <typename T>
void cpu_BatchNormalization_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias, T leakiness) {
d_input_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_BackwardPass<T>(
input_features.data<T>(), d_input_features.data<T>(),
output_features.data<T>(), d_output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
leakiness);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment