Commit d77687a6 authored by Benjamin Graham's avatar Benjamin Graham Committed by Benjamin Thomas Graham
Browse files

Rename ValidConvolutions to SubmanifoldConvolutions, update for PyTorch 0.4 Tensor/Variable merge

parent 297e04c0
...@@ -15,7 +15,7 @@ if torch.cuda.is_available(): ...@@ -15,7 +15,7 @@ if torch.cuda.is_available():
r = os.system( r = os.system(
'cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc' 'cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc'
+ ' -m64 --std c++11 -Xcompiler ' + ' -m64 --std c++11 -Xcompiler '
+ ',\"-fopenmp\",\"-fPIC\",\"-O3\",\"-DNDEBUG\" ' + ',\"-fopenmp\",\"-fPIC\",\"-O3\" '
+ '-gencode arch=compute_62,code=sm_62 ' + '-gencode arch=compute_62,code=sm_62 '
+ '-gencode arch=compute_61,code=sm_61 ' + '-gencode arch=compute_61,code=sm_61 '
+ '-gencode arch=compute_60,code=sm_60 ' + '-gencode arch=compute_60,code=sm_60 '
...@@ -40,10 +40,11 @@ if torch.cuda.is_available(): ...@@ -40,10 +40,11 @@ if torch.cuda.is_available():
this_dir + this_dir +
'/sparseconvnet/SCN/init.cu.o'], '/sparseconvnet/SCN/init.cu.o'],
relative_to=__file__, relative_to=__file__,
extra_compile_args=["-std=c99"],
with_cuda=True) with_cuda=True)
else: else:
r = os.system( r = os.system(
'cd sparseconvnet/SCN; g++ -std=c++11 -fPIC -c init.cpp -o init.cpp.o -I' + 'cd sparseconvnet/SCN; g++ -std=c++11 -DENABLE_OPENMP -fPIC -c init.cpp -o init.cpp.o -I' +
torch_dir + torch_dir +
'/lib/include -I' + '/lib/include -I' +
torch_dir + torch_dir +
...@@ -57,6 +58,7 @@ else: ...@@ -57,6 +58,7 @@ else:
this_dir + this_dir +
'/sparseconvnet/SCN/init.cpp.o'], '/sparseconvnet/SCN/init.cpp.o'],
relative_to=__file__, relative_to=__file__,
extra_compile_args=["-std=c99"],
with_cuda=False) with_cuda=False)
ffi.build() ffi.build()
......
...@@ -68,13 +68,13 @@ extern "C" void scn_DR_(Convolution_backward)( ...@@ -68,13 +68,13 @@ extern "C" void scn_DR_(Convolution_backward)(
} }
} }
extern "C" double scn_DR_(ValidConvolution_updateOutput)( extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume, void *rulesBuffer) { THTensor *bias, long filterVolume, void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getValidRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize); uInt nActive = _m.getNActive(inputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]); THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias) if (not bias)
...@@ -97,14 +97,14 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)( ...@@ -97,14 +97,14 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
return flops; return flops;
} }
extern "C" void scn_DR_(ValidConvolution_backward)( extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *d_input_features, THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight, THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume, void *rulesBuffer) { THTensor *d_bias, long filterVolume, void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getValidRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize); uInt nActive = _m.getNActive(inputSize);
THTensor_(resizeAs)(d_input_features, input_features); THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features); THTensor_(zero)(d_input_features);
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/InputLayer.cpp"
#else
#include "InputLayer.h"
extern "C" void scn_DR_(InputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THTensor *input_features, THTensor *output_features, long batchSize,
long mode, void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.inputLayer(spatialSize, input_coords, batchSize, mode);
auto nPlanes = input_features->size[1];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
auto &rules = _m.inputLayerRuleBook;
auto maxActive = rules[0][1];
auto nRows = rules[0][3];
InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
extern "C" void scn_DR_(InputLayer_updateGradInput)(void **m,
THTensor *d_input_features,
THTensor *d_output_features,
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.inputLayerRuleBook;
auto nPlanes = d_output_features->size[1];
THTensor_(resize2d)(d_input_features, rules[0][2], nPlanes);
THTensor_(zero)(d_input_features);
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][3];
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
THTensor_(data)(d_output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
extern "C" void scn_DR_(BLInputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THTensor *input_features, THTensor *output_features, long mode,
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.blLayer(spatialSize, input_coords, mode);
auto nPlanes = input_features->size[2];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
auto &rules = _m.blLayerRuleBook;
auto maxActive = rules[0][1];
auto nRows = rules[0][4];
InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
extern "C" void
scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
THTensor *d_output_features,
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
auto nPlanes = d_output_features->size[1];
THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(d_input_features);
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][4];
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
THTensor_(data)(d_output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
void **m,
THTensor *input_features, THTensor *output_features,
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
auto nPlanes = input_features->size[1];
THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(output_features);
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][4];
InputLayer_BackwardPass<real>(THTensor_(data)(output_features),
THTensor_(data)(input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
extern "C" void
scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
THTensor *d_output_features,
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
auto nPlanes = d_output_features->size[2];
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][4];
THTensor_(resize2d)(d_input_features, nRows, nPlanes);
THTensor_(zero)(d_input_features);
InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features),
THTensor_(data)(d_input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_INPUTLAYER_H
#define CPU_INPUTLAYER_H
#include "../SparseConvNet.h"
#include <cstring>
// Assume output and d_input_features have been zero-ed
template <typename T>
void InputLayer_ForwardPass(T *input_features, T *output_features, uInt nRows,
uInt maxActive, uInt nPlanes, uInt *rules,
bool average) {
for (uInt row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (uInt i = 1; i <= nActive; ++i) {
auto in_f = input_features + nPlanes * rules[i];
for (uInt plane = 0; plane < nPlanes; plane++) {
output_features[plane] += multiplier * in_f[plane];
}
}
output_features += nPlanes;
rules += 1 + maxActive;
}
}
template <typename T>
void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
uInt nRows, uInt maxActive, uInt nPlanes,
uInt *rules, bool average) {
for (uInt row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (uInt i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + nPlanes * rules[i];
for (uInt plane = 0; plane < nPlanes; plane++)
d_in_f[plane] += multiplier * d_output_features[plane];
}
d_output_features += nPlanes;
rules += 1 + maxActive;
}
}
#endif /* CPU_INPUTLAYER_H */
...@@ -87,12 +87,12 @@ extern "C" void scn_DR_(Convolution_backward)( ...@@ -87,12 +87,12 @@ extern "C" void scn_DR_(Convolution_backward)(
} }
} }
extern "C" double scn_DR_(ValidConvolution_updateOutput)( extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THCTensor *input_features, THCTensor *output_features, THCTensor *weight, THCTensor *input_features, THCTensor *output_features, THCTensor *weight,
THCTensor *bias, long filterVolume, THCITensor *rulesBuffer) { THCTensor *bias, long filterVolume, THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getValidRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize); uInt nActive = _m.getNActive(inputSize);
THCTensor_(resize2d)(state, output_features, nActive, weight->size[1]); THCTensor_(resize2d)(state, output_features, nActive, weight->size[1]);
if (not bias) if (not bias)
...@@ -125,13 +125,13 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)( ...@@ -125,13 +125,13 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
return flops; return flops;
} }
extern "C" void scn_DR_(ValidConvolution_backward)( extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THCTensor *input_features, THCTensor *d_input_features, THCTensor *input_features, THCTensor *d_input_features,
THCTensor *d_output_features, THCTensor *weight, THCTensor *d_weight, THCTensor *d_output_features, THCTensor *weight, THCTensor *d_weight,
THCTensor *d_bias, long filterVolume, THCITensor *rulesBuffer) { THCTensor *d_bias, long filterVolume, THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getValidRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize); uInt nActive = _m.getNActive(inputSize);
THCTensor_(resizeAs)(state, d_input_features, input_features); THCTensor_(resizeAs)(state, d_input_features, input_features);
THCTensor_(zero)(state, d_input_features); THCTensor_(zero)(state, d_input_features);
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/InputLayer.cu"
#else
#include "InputLayer.h"
extern "C" void scn_DR_(InputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THCTensor *input_features, THCTensor *output_features, long batchSize,
long mode, THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.inputLayer(spatialSize, input_coords, batchSize, mode);
uInt nPlanes = input_features->size[1];
THCTensor_(resize2d)(state, output_features, *_m.inputNActive, nPlanes);
THCTensor_(zero)(state, output_features);
auto &rules = _m.inputLayerRuleBook;
uInt maxActive = rules[0][1];
uInt nRows = rules[0][3];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
}
extern "C" void
scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
THCTensor *d_output_features,
THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.inputLayerRuleBook;
uInt nPlanes = d_output_features->size[1];
THCTensor_(resize2d)(state, d_input_features, rules[0][2], nPlanes);
THCTensor_(zero)(state, d_input_features);
uInt mode = rules[0][0];
uInt maxActive = rules[0][1];
uInt nRows = rules[0][3];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto diF = THCTensor_(data)(state, d_input_features);
auto doF = THCTensor_(data)(state, d_output_features);
auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
diF, doF, nRows, maxActive, nPlanes, rb, mode == 4);
}
extern "C" void scn_DR_(BLInputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THCTensor *input_features, THCTensor *output_features, long mode,
THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.blLayer(spatialSize, input_coords, mode);
uInt nPlanes = input_features->size[2];
THCTensor_(resize2d)(state, output_features, *_m.inputNActive, nPlanes);
THCTensor_(zero)(state, output_features);
auto &rules = _m.blLayerRuleBook;
uInt maxActive = rules[0][1];
uInt nRows = rules[0][4];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
}
extern "C" void
scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
THCTensor *d_output_features,
THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
uInt nPlanes = d_output_features->size[1];
THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
THCTensor_(zero)(state, d_input_features);
uInt mode = rules[0][0];
uInt maxActive = rules[0][1];
uInt nRows = rules[0][4];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto diF = THCTensor_(data)(state, d_input_features);
auto doF = THCTensor_(data)(state, d_output_features);
auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
diF, doF, nRows, maxActive, nPlanes, rb, mode == 4);
}
extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
void **m,
THCTensor *input_features, THCTensor *output_features,
THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
uInt nPlanes = input_features->size[1];
THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
THCTensor_(zero)(state, output_features);
auto mode = rules[0][0];
uInt maxActive = rules[0][1];
uInt nRows = rules[0][4];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
oF, iF, nRows, maxActive, nPlanes, rb, false);
}
extern "C" void
scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
THCTensor *d_output_features,
THCITensor *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook;
uInt nPlanes = d_output_features->size[2];
uInt mode = rules[0][0];
uInt maxActive = rules[0][1];
uInt nRows = rules[0][4];
THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
THCTensor_(zero)(state, d_input_features);
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto diF = THCTensor_(data)(state, d_input_features);
auto doF = THCTensor_(data)(state, d_output_features);
auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
doF, diF, nRows, maxActive, nPlanes, rb, false);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef GPU_INPUTLAYER_H
#define GPU_INPUTLAYER_H
template <typename T>
__global__ void InputLayer_fp(T *input_features, T *output_features,
uInt nRows, uInt maxActive, uInt nPlanes,
uInt *rules, bool average) {
for (int row = blockIdx.x; row < nRows; row += gridDim.x) {
T *out = output_features + row * nPlanes;
uInt *r = rules + row * (1 + maxActive);
uInt nActive = r[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (int i = 1; i <= nActive; i++) {
T *inp = input_features + r[i] * nPlanes;
for (uInt plane = threadIdx.x; plane < nPlanes; plane += blockDim.x)
out[plane] += multiplier * inp[plane];
}
}
}
template <typename T>
__global__ void InputLayer_bp(T *d_input_features, T *d_output_features,
uInt nRows, uInt maxActive, uInt nPlanes,
uInt *rules, bool average) {
for (int row = blockIdx.x; row < nRows; row += gridDim.x) {
T *out = d_output_features + row * nPlanes;
uInt *r = rules + row * (1 + maxActive);
uInt nActive = r[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (int i = 1; i <= nActive; i++) {
T *inp = d_input_features + r[i] * nPlanes;
for (uInt plane = threadIdx.x; plane < nPlanes; plane += blockDim.x)
atomicAdd(&inp[plane], multiplier * out[plane]);
}
}
}
#endif /* GPU_INPUTLAYER_H */
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef INPUTLAYER_H
#define INPUTLAYER_H
#include "../SparseConvNet.h"
#include <omp.h>
// mode 1==overwrite, 2=keep, 3=sum, 4=mean
template <uInt dimension>
void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt nInputRows, uInt nInputColumns, uInt batchSize,
uInt mode, uInt &nActive) {
assert(nActive == 0);
assert(rules.size() == 0);
assert(SGs.size() == 0);
SGs.resize(batchSize); // Set a minimum batch size if necessary
Point<dimension> p;
// Compile list of how input rows correspond to output rows
std::vector<std::vector<uInt>> outputRows;
if (nInputColumns == dimension) {
SGs.resize(1);
auto &sg = SGs[0];
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
p[j] = coords[j];
coords += dimension;
auto iter = sg.mp.find(p);
if (iter == sg.mp.end()) {
sg.mp[p] = nActive++;
outputRows.resize(nActive);
}
outputRows[sg.mp[p]].push_back(i);
}
} else { // nInputColumns == dimension + 1
uInt idx;
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
p[j] = coords[j];
idx = coords[dimension];
coords += dimension + 1;
if (idx + 1 >= SGs.size())
SGs.resize(idx + 1);
auto &sg = SGs[idx];
auto iter = sg.mp.find(p);
if (iter == sg.mp.end()) {
sg.mp[p] = nActive++;
outputRows.resize(nActive);
}
outputRows[sg.mp[p]].push_back(i);
}
}
rules.resize(2);
rules[0].push_back(mode);
rules[0].push_back(1); // replace with maxActive if mode==3 or 4
rules[0].push_back(nInputRows);
rules[0].push_back(outputRows.size());
auto &rule = rules[1];
if (mode == 1) {
for (uInt i = 0; i < nActive; ++i) {
rule.push_back(1);
rule.push_back(outputRows[i].front());
}
}
if (mode == 2) {
for (uInt i = 0; i < nActive; ++i) {
rule.push_back(1);
rule.push_back(outputRows[i].back());
}
}
if (mode == 3 or mode == 4) {
uInt maxActive = 0;
for (auto &row : outputRows)
maxActive = std::max(maxActive, (uInt)row.size());
rules[0][1] = maxActive;
for (auto &row : outputRows) {
rule.push_back(row.size());
for (auto &r : row)
rule.push_back(r);
rule.resize((rule.size() + maxActive) / (maxActive + 1) *
(maxActive + 1));
}
}
}
// bl is a batchSize x length x dimension long array of coordinates
// mode 0==guaranteed unique and all present; 1==overwrite, 2=keep, 3=sum,
// 4=mean
template <uInt dimension>
void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt batchSize, uInt length, uInt mode, uInt &nActive) {
assert(nActive == 0);
assert(rules.size() == 0);
assert(SGs.size() == 0);
SGs.resize(batchSize);
uInt I;
if (mode == 0) {
nActive = batchSize * length;
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &sg = SGs[I];
sg.ctr = I * length;
auto c = coords + I * length * dimension;
Point<dimension> p;
for (int l = 0; l < length; ++l) {
for (int j = 0; j < dimension; ++j)
p[j] = c[j];
c += dimension;
sg.mp[p] = l;
}
}
rules.resize(2);
rules[0].push_back(0);
rules[0].push_back(1);
rules[0].push_back(batchSize);
rules[0].push_back(length);
rules[0].push_back(nActive);
auto &rule = rules[1];
int ll = 0;
for (I = 0; I < batchSize; I++) {
for (int l = 0; l < length; ++l, ++ll) {
rule.push_back(1);
rule.push_back(ll);
}
}
return;
}
// Compile list of how input rows correspond to output rows
std::vector<std::vector<std::vector<uInt>>> outputRows(batchSize);
std::vector<uInt> nActives(batchSize);
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &sg = SGs[I];
auto &ors = outputRows[I];
auto &nAct = nActives[I];
auto c = coords + I * length * dimension;
uInt i = I * length;
Point<dimension> p;
for (int l = 0; l < length; ++l, ++i) {
for (int j = 0; j < dimension; ++j)
p[j] = *c++;
if (p[0] >= 0) {
auto iter = sg.mp.find(p);
if (iter == sg.mp.end()) {
sg.mp[p] = nAct++;
ors.resize(nAct);
}
ors[sg.mp[p]].push_back(i);
}
}
}
for (I = 0; I < batchSize; I++) {
SGs[I].ctr = nActive;
nActive += nActives[I];
}
uInt maxActive = 1;
if (mode >= 3)
for (auto &ors : outputRows)
for (auto &row : ors)
maxActive = std::max(maxActive, (uInt)row.size());
rules.resize(2);
rules[0].push_back(mode);
rules[0].push_back(maxActive);
rules[0].push_back(batchSize);
rules[0].push_back(length);
rules[0].push_back(nActive);
auto &rule = rules[1];
if (mode == 1) {
rule.resize(2 * nActive);
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &ors = outputRows[I];
auto rr = &rule[SGs[I].ctr * 2];
for (auto &row : ors) {
rr[0] = row.size();
rr[1] = row.back();
rr += 2;
}
}
}
if (mode == 2) {
rule.resize(2 * nActive);
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &ors = outputRows[I];
auto rr = &rule[SGs[I].ctr * 2];
for (auto &row : ors) {
rr[0] = row.size();
rr[1] = row.front();
rr += 2;
}
}
}
if (mode == 3 or mode == 4) {
std::cout << omp_get_num_threads() << std::endl;
rule.resize((maxActive + 1) * nActive);
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &ors = outputRows[I];
auto rr = &rule[SGs[I].ctr * (maxActive + 1)];
for (auto &row : ors) {
rr[0] = row.size();
for (int i = 0; i < row.size(); ++i)
rr[i + 1] = row[i];
rr += 1 + maxActive;
}
}
}
}
#endif /* INPUTLAYER_H */
...@@ -221,7 +221,7 @@ extern "C" void scn_D_(addSampleFromThresholdedTensor)( ...@@ -221,7 +221,7 @@ extern "C" void scn_D_(addSampleFromThresholdedTensor)(
THFloatTensor_resize2d(features_, nActive, nPlanes); THFloatTensor_resize2d(features_, nActive, nPlanes);
} }
// 3x3 valid convolutions, 3x3/2x2 pooling or strided convolutions // 3x3 submanifold convolutions, 3x3/2x2 pooling or strided convolutions
extern "C" void scn_D_(generateRuleBooks3s2)(void **m) { extern "C" void scn_D_(generateRuleBooks3s2)(void **m) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
long sz[Dimension], str[Dimension], inS[Dimension], outS[Dimension]; long sz[Dimension], str[Dimension], inS[Dimension], outS[Dimension];
...@@ -237,7 +237,7 @@ extern "C" void scn_D_(generateRuleBooks3s2)(void **m) { ...@@ -237,7 +237,7 @@ extern "C" void scn_D_(generateRuleBooks3s2)(void **m) {
auto &SGs = _m.grids[p1]; auto &SGs = _m.grids[p1];
auto &rb = _m.validRuleBooks[p2]; auto &rb = _m.validRuleBooks[p2];
if (rb.empty()) if (rb.empty())
ValidConvolution_SgsToRules(SGs, rb, sz); SubmanifoldConvolution_SgsToRules(SGs, rb, sz);
for (int i = 0; i < Dimension; ++i) for (int i = 0; i < Dimension; ++i)
if (p1[i] < 3 or p1[i] % 2 != 1) if (p1[i] < 3 or p1[i] % 2 != 1)
return; return;
...@@ -253,7 +253,7 @@ extern "C" void scn_D_(generateRuleBooks3s2)(void **m) { ...@@ -253,7 +253,7 @@ extern "C" void scn_D_(generateRuleBooks3s2)(void **m) {
} }
} }
// 3x3 valid convolutions, 2x2 pooling or strided convolutions // 3x3 submanifold convolutions, 2x2 pooling or strided convolutions
extern "C" void scn_D_(generateRuleBooks2s2)(void **m) { extern "C" void scn_D_(generateRuleBooks2s2)(void **m) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
long s2[Dimension], s3[Dimension], inS[Dimension], outS[Dimension]; long s2[Dimension], s3[Dimension], inS[Dimension], outS[Dimension];
...@@ -268,7 +268,7 @@ extern "C" void scn_D_(generateRuleBooks2s2)(void **m) { ...@@ -268,7 +268,7 @@ extern "C" void scn_D_(generateRuleBooks2s2)(void **m) {
while (true) { while (true) {
auto &SGs = _m.grids[p1]; auto &SGs = _m.grids[p1];
auto &rb = _m.validRuleBooks[p2]; auto &rb = _m.validRuleBooks[p2];
ValidConvolution_SgsToRules(SGs, rb, s3); SubmanifoldConvolution_SgsToRules(SGs, rb, s3);
for (int i = 0; i < Dimension; ++i) for (int i = 0; i < Dimension; ++i)
if (p1[i] < 2 or p1[i] % 2 != 0) if (p1[i] < 2 or p1[i] % 2 != 0)
return; return;
......
...@@ -10,20 +10,26 @@ ...@@ -10,20 +10,26 @@
#include "../SparseConvNet.h" #include "../SparseConvNet.h"
#include "ActivePoolingRules.h" #include "ActivePoolingRules.h"
#include "ConvolutionRules.h" #include "ConvolutionRules.h"
#include "ValidConvolutionRules.h" #include "InputLayerRules.h"
#include "SubmanifoldConvolutionRules.h"
#include <tuple> #include <tuple>
#include <unordered_map> #include <unordered_map>
template <uInt dimension> class Metadata { template <uInt dimension> class Metadata {
public: public:
//Count of active sites for each scale
std::unordered_map<Point<dimension>, uInt, IntArrayHash<dimension>> nActive; std::unordered_map<Point<dimension>, uInt, IntArrayHash<dimension>> nActive;
//Hash tables for each scale locating the active points
std::unordered_map<Point<dimension>, SparseGrids<dimension>, std::unordered_map<Point<dimension>, SparseGrids<dimension>,
IntArrayHash<dimension>> grids; IntArrayHash<dimension>> grids;
std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>> std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
activePoolingRuleBooks; activePoolingRuleBooks;
RuleBook inputLayerRuleBook;
RuleBook blLayerRuleBook;
std::unordered_map<Point<2 * dimension>, RuleBook, std::unordered_map<Point<2 * dimension>, RuleBook,
IntArrayHash<2 * dimension>> validRuleBooks; IntArrayHash<2 * dimension>> validRuleBooks;
...@@ -49,6 +55,8 @@ public: ...@@ -49,6 +55,8 @@ public:
inputSGs = nullptr; inputSGs = nullptr;
inputSG = nullptr; inputSG = nullptr;
inputNActive = nullptr; inputNActive = nullptr;
inputLayerRuleBook.clear();
blLayerRuleBook.clear();
} }
void setInputSpatialSize(THLongTensor *spatialSize) { void setInputSpatialSize(THLongTensor *spatialSize) {
...@@ -56,23 +64,43 @@ public: ...@@ -56,23 +64,43 @@ public:
inputSGs = &grids[inputSpatialSize]; inputSGs = &grids[inputSpatialSize];
inputNActive = &nActive[inputSpatialSize]; inputNActive = &nActive[inputSpatialSize];
} }
void inputLayer(THLongTensor *spatialSize, THLongTensor *coords,
uInt batchSize, uInt mode) {
assert(spatialSize->nDimension == 1);
assert(spatialSize->size[0] == dimension);
assert(coords->nDimension == 2);
assert(coords->size[1] >= dimension and coords->size[1] <= dimension + 1);
setInputSpatialSize(spatialSize);
inputLayerRules<dimension>(*inputSGs, inputLayerRuleBook,
THLongTensor_data(coords), coords->size[0],
coords->size[1], batchSize, mode, *inputNActive);
}
void blLayer(THLongTensor *spatialSize, THLongTensor *coords, uInt mode) {
assert(spatialSize->nDimension == 1);
assert(spatialSize->size[0] == dimension);
assert(coords->nDimension == 3);
assert(coords->size[2] == dimension);
setInputSpatialSize(spatialSize);
blRules<dimension>(*inputSGs, blLayerRuleBook, THLongTensor_data(coords),
coords->size[0], coords->size[1], mode, *inputNActive);
}
SparseGrids<dimension> &getSparseGrid(THLongTensor *spatialSize) { SparseGrids<dimension> &getSparseGrid(THLongTensor *spatialSize) {
return grids[LongTensorToPoint<dimension>(spatialSize)]; return grids[LongTensorToPoint<dimension>(spatialSize)];
}; };
uInt getNActive(THLongTensor *spatialSize) { uInt getNActive(THLongTensor *spatialSize) {
return nActive[LongTensorToPoint<dimension>(spatialSize)]; return nActive[LongTensorToPoint<dimension>(spatialSize)];
}; };
RuleBook &getValidRuleBook(THLongTensor *spatialSize, THLongTensor *size, RuleBook &getSubmanifoldRuleBook(THLongTensor *spatialSize, THLongTensor *size,
bool openMP) { bool openMP) {
auto p = TwoLongTensorsToPoint<dimension>(spatialSize, size); auto p = TwoLongTensorsToPoint<dimension>(spatialSize, size);
auto &rb = validRuleBooks[p]; auto &rb = validRuleBooks[p];
if (rb.empty()) { if (rb.empty()) {
auto &SGs = grids[LongTensorToPoint<dimension>(spatialSize)]; auto &SGs = grids[LongTensorToPoint<dimension>(spatialSize)];
#if defined(ENABLE_OPENMP) #if defined(ENABLE_OPENMP)
openMP ? ValidConvolution_SgsToRules_OMP(SGs, rb, THLongTensor_data(size)) openMP ? SubmanifoldConvolution_SgsToRules_OMP(SGs, rb, THLongTensor_data(size))
: :
#endif #endif
ValidConvolution_SgsToRules(SGs, rb, THLongTensor_data(size)); SubmanifoldConvolution_SgsToRules(SGs, rb, THLongTensor_data(size));
} }
return rb; return rb;
} }
......
...@@ -24,7 +24,7 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) { ...@@ -24,7 +24,7 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
// rules is used to carry out the "lowering" whilst carrying out the convolution // rules is used to carry out the "lowering" whilst carrying out the convolution
template <uInt dimension> template <uInt dimension>
double ValidConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules, double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules,
long *size) { long *size) {
uInt sd = volume<dimension>(size); uInt sd = volume<dimension>(size);
double countActiveInputs = 0; double countActiveInputs = 0;
...@@ -46,7 +46,7 @@ double ValidConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules, ...@@ -46,7 +46,7 @@ double ValidConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules,
} }
template <uInt dimension> template <uInt dimension>
uInt ValidConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules, uInt SubmanifoldConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules,
long *size) { long *size) {
uInt sd = volume<dimension>(size); uInt sd = volume<dimension>(size);
uInt countActiveInputs = 0; uInt countActiveInputs = 0;
...@@ -54,11 +54,11 @@ uInt ValidConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules, ...@@ -54,11 +54,11 @@ uInt ValidConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules,
rules.resize(sd); rules.resize(sd);
for (uInt i = 0; i < SGs.size(); i++) for (uInt i = 0; i < SGs.size(); i++)
countActiveInputs += countActiveInputs +=
ValidConvolution_SgToRules<dimension>(SGs[i], rules, size); SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rules, size);
return countActiveInputs; return countActiveInputs;
} }
template <uInt dimension> template <uInt dimension>
uInt ValidConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs, uInt SubmanifoldConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs,
RuleBook &rules, long *size) { RuleBook &rules, long *size) {
std::vector<RuleBook> rbs(SGs.size()); std::vector<RuleBook> rbs(SGs.size());
std::vector<double> countActiveInputs(SGs.size()); std::vector<double> countActiveInputs(SGs.size());
...@@ -71,7 +71,7 @@ uInt ValidConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs, ...@@ -71,7 +71,7 @@ uInt ValidConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs,
for (i = 0; i < SGs.size(); i++) { for (i = 0; i < SGs.size(); i++) {
rbs[i].resize(sd); rbs[i].resize(sd);
countActiveInputs[i] = countActiveInputs[i] =
ValidConvolution_SgToRules<dimension>(SGs[i], rbs[i], size); SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rbs[i], size);
} }
} }
{ {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#define ENABLE_OPENMP YES
#if defined(ENABLE_OPENMP) #if defined(ENABLE_OPENMP)
#include <omp.h> #include <omp.h>
#endif #endif
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -36,6 +36,9 @@ ...@@ -36,6 +36,9 @@
#include "generic/CPU/Deconvolution.cpp" #include "generic/CPU/Deconvolution.cpp"
#include "generic/CPU/THGenerateDimFloatTypes.h" #include "generic/CPU/THGenerateDimFloatTypes.h"
#include "generic/CPU/InputLayer.cpp"
#include "generic/CPU/THGenerateDimFloatTypes.h"
#include "generic/CPU/LeakyReLU.cpp" #include "generic/CPU/LeakyReLU.cpp"
#include "generic/CPU/THGenerateFloatTypes.h" #include "generic/CPU/THGenerateFloatTypes.h"
...@@ -50,7 +53,7 @@ ...@@ -50,7 +53,7 @@
extern "C" long scn_readPtr(void **ptr) { return (long)(ptr[0]); } extern "C" long scn_readPtr(void **ptr) { return (long)(ptr[0]); }
extern "C" void scn_writePtr(long p, void **ptr) { ptr[0] = (void *)p; } extern "C" void scn_writePtr(long p, void **ptr) { ptr[0] = (void *)p; }
extern "C" double scn_ruleBookBits() { return 8 * sizeof(uInt); } extern "C" double scn_ruleBookBits(void) { return 8 * sizeof(uInt); }
#undef scn_D_ #undef scn_D_
#undef scn_DR_ #undef scn_DR_
......
...@@ -37,6 +37,9 @@ extern THCState *state; ...@@ -37,6 +37,9 @@ extern THCState *state;
#include "generic/GPU/Deconvolution.cu" #include "generic/GPU/Deconvolution.cu"
#include "generic/GPU/THGenerateDimCudaFloatTypes.h" #include "generic/GPU/THGenerateDimCudaFloatTypes.h"
#include "generic/GPU/InputLayer.cu"
#include "generic/GPU/THGenerateDimCudaFloatTypes.h"
#include "generic/GPU/LeakyReLU.cu" #include "generic/GPU/LeakyReLU.cu"
#include "generic/GPU/THGenerateCudaFloatTypes.h" #include "generic/GPU/THGenerateCudaFloatTypes.h"
......
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
n_bits = 32
f_cpu = [open('header_cpu.c', 'w'), open('header_cpu.h', 'w')]
f_gpu = [open('header_gpu.c', 'w'), open('header_gpu.h', 'w')]
for f in f_cpu + f_gpu:
f.write("""// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
""")
def fn(st, f=f_cpu):
f[0].write(st + '{}')
f[1].write(st + ';')
def dim_fn(st, f=f_cpu):
for DIMENSION in range(1, 11):
s = st.replace('DIMENSION', str(DIMENSION))
fn(s, f)
def typed_fn(st):
s = st
s = s.replace('ARCH', 'cpu')
s = s.replace('THITensor', 'void')
s = s.replace('REAL', 'float')
s = s.replace('THTensor', 'THFloatTensor')
fn(s, f_cpu)
s = st
s = s.replace('ARCH', 'cpu')
s = s.replace('THITensor', 'void')
s = s.replace('REAL', 'double')
s = s.replace('THTensor', 'THDoubleTensor')
fn(s, f_cpu)
s = st
s = s.replace('ARCH', 'gpu')
s = s.replace('THITensor', 'THCudaIntTensor' if n_bits ==
32 else 'THCudaLongTensor')
s = s.replace('REAL', 'float')
s = s.replace('THTensor', 'THCudaTensor')
fn(s, f_gpu)
def dim_typed_fn(st):
for DIMENSION in range(1, 11):
typed_fn(
st.replace(
'REAL_',
'REAL').replace(
'DIMENSION',
str(DIMENSION)))
fn("""
long scn_readPtr(void **ptr)""")
fn("""
void scn_writePtr(long p, void **ptr)""")
fn("""
double scn_ruleBookBits(void)""")
fn("""
void scn_2_drawCurve(void **m, THFloatTensor *features, THFloatTensor *stroke)""")
dim_fn("""
double scn_DIMENSION_addSampleFromThresholdedTensor(
void **m, THFloatTensor *features_, THFloatTensor *tensor_,
THLongTensor *offset_, THLongTensor *spatialSize_, float threshold)""")
dim_fn("""
void scn_DIMENSION_batchAddSample(void **m)""")
dim_fn("""
void scn_DIMENSION_createMetadataForDenseToSparse(
void **m, THLongTensor *spatialSize_, THLongTensor *pad, THLongTensor *nz,
long batchSize)""")
dim_fn("""
void scn_DIMENSION_freeMetadata(void **metadata)""")
dim_fn("""
void scn_DIMENSION_generateRuleBooks3s2(void **m)""")
dim_fn("""
void scn_DIMENSION_generateRuleBooks2s2(void **m)""")
dim_fn("""
void scn_DIMENSION_setInputSpatialSize(void **m, THLongTensor *spatialSize)""")
dim_fn("""
void scn_DIMENSION_setInputSpatialLocation(void **m, THFloatTensor *features,
THLongTensor *location, THFloatTensor *vec, _Bool overwrite)""")
dim_fn("""
void scn_DIMENSION_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor *locations, THFloatTensor *vecs, _Bool overwrite)""")
dim_fn("""
void scn_DIMENSION_getSpatialLocations(void **m, THLongTensor *spatialSize,
THLongTensor *locations)""")
typed_fn("""
void scn_ARCH_REAL_AffineReluTrivialConvolution_updateOutput(
THTensor *input_features, THTensor *output_features,
THTensor *affineWeight, THTensor *affineBias, THTensor *convWeight)""")
typed_fn("""
void scn_ARCH_REAL_AffineReluTrivialConvolution_backward(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *affineWeight,
THTensor *d_affineWeight, THTensor *affineBias, THTensor *d_affineBias,
THTensor *convWeight, THTensor *d_convWeight, _Bool additiveGrad)""")
typed_fn("""
void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateOutput(
THTensor *input_features, THTensor *output_features,
THTensor *noise, long nPlanes, long input_stride, long output_stride,
float alpha)""")
typed_fn("""
void scn_ARCH_REAL_BatchwiseMultiplicativeDropout_updateGradInput(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *noise, long nPlanes,
long input_stride, long output_stride, float alpha)""")
typed_fn("""
void scn_ARCH_REAL_BatchNormalization_updateOutput(
THTensor *input_features, THTensor *output_features,
THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean,
THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps,
REAL momentum, _Bool train, REAL leakiness)""")
typed_fn("""
void scn_ARCH_REAL_BatchNormalization_backward(
THTensor *input_features, THTensor *d_input_features,
THTensor *output_features, THTensor *d_output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, THTensor *d_weight, THTensor *d_bias,
REAL leakiness)""")
typed_fn("""
void scn_ARCH_REAL_BatchNormalizationInTensor_updateOutput(
THTensor *input_features, THTensor *output_features,
THTensor *saveMean, THTensor *saveInvStd, THTensor *runningMean,
THTensor *runningVar, THTensor *weight, THTensor *bias, REAL eps,
REAL momentum, _Bool train, REAL leakiness)""")
typed_fn("""
void scn_ARCH_REAL_LeakyReLU_updateOutput(
THTensor *input_features, THTensor *output_features,
float alpha)""")
typed_fn("""
void scn_ARCH_REAL_LeakyReLU_updateGradInput(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, float alpha)""")
typed_fn("""
double scn_ARCH_REAL_NetworkInNetwork_updateOutput(
THTensor *input_features, THTensor *output_features,
THTensor *weight, THTensor *bias)""")
typed_fn("""
void scn_ARCH_REAL_NetworkInNetwork_updateGradInput(
THTensor *d_input_features, THTensor *d_output_features,
THTensor *weight)""")
typed_fn("""
void scn_ARCH_REAL_NetworkInNetwork_accGradParameters(
THTensor *input_features, THTensor *d_output_features,
THTensor *d_weight, THTensor *d_bias)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONActivePooling_updateOutput(
THLongTensor *inputSize, void **m, THFloatTensor *input_features,
THFloatTensor *output_features, void *rulesBuffer, _Bool average);""")
dim_typed_fn("""void scn_ARCH_REAL_DIMENSIONActivePooling_updateGradInput(
THLongTensor *inputSize, void **m,
THFloatTensor *d_input_features, THFloatTensor *d_output_features,
void *rulesBuffer, _Bool average);""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONAveragePooling_updateOutput(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *poolSize, THLongTensor *poolStride, void **m,
THTensor *input_features, THTensor *output_features, long nFeaturesToDrop,
THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONAveragePooling_updateGradInput(
THLongTensor * inputSize, THLongTensor * outputSize,
THLongTensor * poolSize, THLongTensor * poolStride, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, long nFeaturesToDrop,
THITensor *rulesBuffer)""")
dim_typed_fn("""
double scn_ARCH_REAL_DIMENSIONConvolution_updateOutput(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *filterSize, THLongTensor *filterStride, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONConvolution_backward(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *filterSize, THLongTensor *filterStride, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
double scn_ARCH_REAL_DIMENSIONDeconvolution_updateOutput(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *filterSize, THLongTensor *filterStride, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONDeconvolution_backward(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *filterSize, THLongTensor *filterStride, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONMaxPooling_updateOutput(
THLongTensor *inputSize, THLongTensor *outputSize,
THLongTensor *poolSize, THLongTensor *poolStride, void **m,
THTensor *input_features, THTensor *output_features, long nFeaturesToDrop,
THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONMaxPooling_updateGradInput(
THLongTensor * inputSize, THLongTensor * outputSize,
THLongTensor * poolSize, THLongTensor * poolStride, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *output_features, THTensor *d_output_features,
long nFeaturesToDrop, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONSparseToDense_updateOutput(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *output_features, THITensor *rulesBuffer, long nPlanes)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONSparseToDense_updateGradInput(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features,
THITensor *rulesBuffer)""")
dim_typed_fn("""
double scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_updateOutput(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONSubmanifoldConvolution_backward(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume, THITensor *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONInputLayer_updateOutput(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THFloatTensor *input_features, THFloatTensor *output_features, long batchSize,
long mode, void *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONInputLayer_updateGradInput(
void **m, THFloatTensor *d_input_features, THFloatTensor *d_output_features,
void *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONBLInputLayer_updateOutput(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THFloatTensor *input_features, THFloatTensor *output_features, long mode,
void *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONBLInputLayer_updateGradInput(
void **m, THFloatTensor *d_input_features,THFloatTensor *d_output_features,
void *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONBLOutputLayer_updateOutput(
void **m, THFloatTensor *input_features, THFloatTensor *output_features,
void *rulesBuffer)""")
dim_typed_fn("""
void scn_ARCH_REAL_DIMENSIONBLOutputLayer_updateGradInput(
void **m, THFloatTensor *d_input_features, THFloatTensor *d_output_features,
void *rulesBuffer)""")
...@@ -16,6 +16,7 @@ from .denseToSparse import DenseToSparse ...@@ -16,6 +16,7 @@ from .denseToSparse import DenseToSparse
from .dropout import Dropout, BatchwiseDropout from .dropout import Dropout, BatchwiseDropout
from .identity import Identity from .identity import Identity
from .inputBatch import InputBatch from .inputBatch import InputBatch
from .inputLayer import InputLayer, BLInputLayer, BLOutputLayer
from .maxPooling import MaxPooling from .maxPooling import MaxPooling
from .metadata import Metadata from .metadata import Metadata
from .networkArchitectures import * from .networkArchitectures import *
...@@ -26,16 +27,18 @@ from .sparseToDense import SparseToDense ...@@ -26,16 +27,18 @@ from .sparseToDense import SparseToDense
from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution
from .tables import * from .tables import *
def concatenate_feature_planes(input): def concatenate_feature_planes(input):
output = SparseConvNetTensor() output = SparseConvNetTensor()
output.metadata = input[0].metadata output.metadata = input[0].metadata
output.spatial_size = input[0].metadata output.spatial_size = input[0].metadata
output.features=torch.cat([i.features for i in input],1) output.features = torch.cat([i.features for i in input], 1)
return output return output
def add_feature_planes(input): def add_feature_planes(input):
output = SparseConvNetTensor() output = SparseConvNetTensor()
output.metadata = input[0].metadata output.metadata = input[0].metadata
output.spatial_size = input[0].metadata output.spatial_size = input[0].metadata
output.features=sum([i.features for i in input]) output.features = sum([i.features for i in input])
return output return output
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment