Commit 1df7b845 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

3d segmantation

parent f2e3800b
...@@ -22,6 +22,7 @@ if torch.cuda.is_available(): ...@@ -22,6 +22,7 @@ if torch.cuda.is_available():
+ '-gencode arch=compute_30,code=sm_30 ' + '-gencode arch=compute_30,code=sm_30 '
+ '-DNVCC ' + '-DNVCC '
+ '-I/usr/local/cuda/include ' + '-I/usr/local/cuda/include '
+ '-I' + '/'.join(torch_dir.split('/')[:-4]) + '/include '
+ '-I' + torch_dir + '/lib/include ' + '-I' + torch_dir + '/lib/include '
+ '-I' + torch_dir + '/lib/include/TH ' + '-I' + torch_dir + '/lib/include/TH '
+ '-I' + torch_dir + '/lib/include/THC ' + '-I' + torch_dir + '/lib/include/THC '
...@@ -43,11 +44,11 @@ if torch.cuda.is_available(): ...@@ -43,11 +44,11 @@ if torch.cuda.is_available():
with_cuda=True) with_cuda=True)
else: else:
r = os.system( r = os.system(
'cd sparseconvnet/SCN; g++ -fopenmp -std=c++11 -O3 -fPIC -c init.cpp -o init.cpp.o -I' + 'cd sparseconvnet/SCN; g++ -fopenmp -std=c++11 -O3 -fPIC -c init.cpp -o init.cpp.o '
torch_dir + + '-I' + '/'.join(torch_dir.split('/')[:-4]) + '/include '
'/lib/include -I' + + '-I' + torch_dir + '/lib/include '
torch_dir + + '-I' + torch_dir + '/lib/include/TH '
'/lib/include/TH -I.') + '-I.')
assert r == 0 assert r == 0
ffi = create_extension( ffi = create_extension(
'sparseconvnet.SCN', 'sparseconvnet.SCN',
......
from torch.utils.ffi import _wrap_function
from ._SCN import lib as _lib, ffi as _ffi
__all__ = []
def _import_symbols(locals):
for symbol in dir(_lib):
fn = getattr(_lib, symbol)
if callable(fn):
locals[symbol] = _wrap_function(fn, _ffi)
else:
locals[symbol] = fn
__all__.append(symbol)
_import_symbols(locals())
...@@ -33,7 +33,7 @@ template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) { ...@@ -33,7 +33,7 @@ template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
return p; return p;
} }
template <uInt dimension> template <uInt dimension>
Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
Point<2 * dimension> p; Point<2 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = THLongTensor_data(t0);
...@@ -45,8 +45,8 @@ Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { ...@@ -45,8 +45,8 @@ Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
return p; return p;
} }
template <uInt dimension> template <uInt dimension>
Point<3*dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1, Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) { THLongTensor *t2) {
Point<3 * dimension> p; Point<3 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = THLongTensor_data(t0);
...@@ -74,6 +74,4 @@ template <uInt dimension> struct IntArrayHash { ...@@ -74,6 +74,4 @@ template <uInt dimension> struct IntArrayHash {
}; };
#define THCITensor THCudaIntTensor #define THCITensor THCudaIntTensor
#define THCITensor_nElement THCudaIntTensor_nElement #define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define THCITensor_resize1d THCudaIntTensor_resize1d
#define THCITensor_data THCudaIntTensor_data
...@@ -33,7 +33,7 @@ template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) { ...@@ -33,7 +33,7 @@ template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
return p; return p;
} }
template <uInt dimension> template <uInt dimension>
Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
Point<2 * dimension> p; Point<2 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = THLongTensor_data(t0);
...@@ -45,8 +45,8 @@ Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { ...@@ -45,8 +45,8 @@ Point<2*dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
return p; return p;
} }
template <uInt dimension> template <uInt dimension>
Point<3*dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1, Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) { THLongTensor *t2) {
Point<3 * dimension> p; Point<3 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = THLongTensor_data(t0);
...@@ -74,6 +74,4 @@ template <uInt dimension> struct IntArrayHash { ...@@ -74,6 +74,4 @@ template <uInt dimension> struct IntArrayHash {
}; };
#define THCITensor THCudaLongTensor #define THCITensor THCudaLongTensor
#define THCITensor_nElement THCudaLongTensor_nElement #define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define THCITensor_resize1d THCudaLongTensor_resize1d
#define THCITensor_data THCudaLongTensor_data
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
extern "C" void scn_DR_(ActivePooling_updateOutput)( extern "C" void scn_DR_(ActivePooling_updateOutput)(
THLongTensor *inputSize, void **m, THTensor *input_features, THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *output_features, void *rulesBuffer, bool average) { THTensor *output_features, bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1]; uInt nPlanes = input_features->size[1];
...@@ -27,7 +27,7 @@ extern "C" void scn_DR_(ActivePooling_updateOutput)( ...@@ -27,7 +27,7 @@ extern "C" void scn_DR_(ActivePooling_updateOutput)(
} }
extern "C" void scn_DR_(ActivePooling_updateGradInput)( extern "C" void scn_DR_(ActivePooling_updateGradInput)(
THLongTensor *inputSize, void **m, THTensor *input_features, THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, void *rulesBuffer, THTensor *d_input_features, THTensor *d_output_features,
bool average) { bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
extern "C" void scn_DR_(AveragePooling_updateOutput)( extern "C" void scn_DR_(AveragePooling_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features, THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *output_features, long nFeaturesToDrop, void *rulesBuffer) { THTensor *output_features, long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop; uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
...@@ -37,7 +37,7 @@ extern "C" void scn_DR_(AveragePooling_updateGradInput)( ...@@ -37,7 +37,7 @@ extern "C" void scn_DR_(AveragePooling_updateGradInput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features, THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *d_input_features, THTensor *d_output_features,
long nFeaturesToDrop, void *rulesBuffer) { long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop; uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
......
...@@ -13,7 +13,7 @@ extern "C" double scn_DR_(Convolution_updateOutput)( ...@@ -13,7 +13,7 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features, THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias, THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume, void *rulesBuffer) { long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = auto _rules =
...@@ -43,8 +43,7 @@ extern "C" void scn_DR_(Convolution_backward)( ...@@ -43,8 +43,7 @@ extern "C" void scn_DR_(Convolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features, THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight, THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume, THTensor *d_weight, THTensor *d_bias, long filterVolume) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = auto _rules =
...@@ -71,7 +70,7 @@ extern "C" void scn_DR_(Convolution_backward)( ...@@ -71,7 +70,7 @@ extern "C" void scn_DR_(Convolution_backward)(
extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)( extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight, THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume, void *rulesBuffer) { THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
...@@ -101,7 +100,7 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)( ...@@ -101,7 +100,7 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m, THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *d_input_features, THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight, THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume, void *rulesBuffer) { THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true); auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
...@@ -123,4 +122,122 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)( ...@@ -123,4 +122,122 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
nActive, THBlas_(gemm)); nActive, THBlas_(gemm));
} }
} }
extern "C" double scn_DR_(FullConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(FullConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(RandomizedStrideConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(RandomizedStrideConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
#endif #endif
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
template <typename T> template <typename T>
void Convolution_ForwardPass( void Convolution_ForwardPass(
T *input_features, uInt input_nPlanes, uInt input_nPLANES, T *output_features, T *input_features, uInt input_nPlanes, uInt input_nPLANES,
uInt output_nPlanes, uInt output_nPLANES, T *weight, T *bias, RuleBook &rules, T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
uInt output_nActive, T *bias, RuleBook &rules, uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha, void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) { T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
...@@ -28,10 +28,11 @@ void Convolution_ForwardPass( ...@@ -28,10 +28,11 @@ void Convolution_ForwardPass(
uInt nHot = r.size() / 2; uInt nHot = r.size() / 2;
input_buffer.resize(nHot * input_nPlanes); input_buffer.resize(nHot * input_nPlanes);
output_buffer.resize(nHot * output_nPlanes); output_buffer.resize(nHot * output_nPlanes);
for (uInt row = 0; row < nHot; row++) for (uInt row = 0; row < nHot; row++) {
std::memcpy(&input_buffer[row * input_nPlanes], std::memcpy(&input_buffer[row * input_nPlanes],
input_features + r[2 * row] * input_nPLANES, input_features + r[2 * row] * input_nPLANES,
sizeof(T) * input_nPlanes); sizeof(T) * input_nPlanes);
}
// Do GEMM (note: gemm assumes column-major matrices) // Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is l*m (row-major) // input_buffer is l*m (row-major)
// weight is m*r (row-major) // weight is m*r (row-major)
...@@ -46,7 +47,7 @@ void Convolution_ForwardPass( ...@@ -46,7 +47,7 @@ void Convolution_ForwardPass(
&input_buffer[0], input_nPlanes, // m &input_buffer[0], input_nPlanes, // m
0, // beta 0, // beta
&output_buffer[0], output_nPlanes // r &output_buffer[0], output_nPlanes // r
); );
weight += input_nPlanes * output_nPlanes; weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) { for (uInt row = 0; row < nHot; row++) {
T *b = &output_buffer[row * output_nPlanes]; T *b = &output_buffer[row * output_nPlanes];
...@@ -59,9 +60,10 @@ void Convolution_ForwardPass( ...@@ -59,9 +60,10 @@ void Convolution_ForwardPass(
template <typename T> template <typename T>
void Convolution_BackwardPass( void Convolution_BackwardPass(
T *input_features, T *d_input_features, uInt input_nPlanes,uInt input_nPLANES, T *input_features, T *d_input_features, uInt input_nPlanes,
T *d_output_features, uInt output_nPlanes,uInt output_nPLANES, T *weight, T *d_weight, uInt input_nPLANES, T *d_output_features, uInt output_nPlanes,
T *d_bias, RuleBook &rules, uInt output_nActive, uInt output_nPLANES, T *weight, T *d_weight, T *d_bias, RuleBook &rules,
uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha, void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) { T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
...@@ -93,7 +95,7 @@ void Convolution_BackwardPass( ...@@ -93,7 +95,7 @@ void Convolution_BackwardPass(
&output_buffer[0], output_nPlanes, // m &output_buffer[0], output_nPlanes, // m
0, // beta 0, // beta
&input_buffer[0], input_nPlanes // r &input_buffer[0], input_nPlanes // r
); );
weight += input_nPlanes * output_nPlanes; weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) { for (uInt row = 0; row < nHot; row++) {
T *b = &input_buffer[row * input_nPlanes]; T *b = &input_buffer[row * input_nPlanes];
...@@ -120,8 +122,34 @@ void Convolution_BackwardPass( ...@@ -120,8 +122,34 @@ void Convolution_BackwardPass(
&input_buffer[0], input_nPlanes, // l &input_buffer[0], input_nPlanes, // l
1, // beta 1, // beta
d_weight, output_nPlanes // r d_weight, output_nPlanes // r
); );
d_weight += input_nPlanes * output_nPlanes; d_weight += input_nPlanes * output_nPlanes;
} }
} }
// template <typename T>
// void Convolution_ForwardPass(
// T *input_features, uInt input_nPlanes, uInt input_nPLANES,
// T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
// T *bias, RuleBook &rules, uInt output_nActive,
// void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
// T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
// if (bias != nullptr) // Set bias
// for (uInt row = 0; row < output_nActive; row++)
// for (uInt column = 0; column < output_nPlanes; column++)
// output_features[row * output_nPLANES + column] = bias[column];
// for (auto &r : rules) {
// uInt nHot = r.size() / 2;
// for (uInt row = 0; row < nHot; row++) {
// T *inp = &input_features[r[2 * row] * input_nPLANES];
// T *out = &output_features[r[2 * row + 1] * output_nPLANES];
// for (uInt i = 0; i < input_nPlanes; i++)
// for (uInt j = 0; j < output_nPlanes; j++)
// out[j] += inp[i] * weight[i * input_nPlanes + j];
// }
// weight += input_nPlanes * output_nPlanes;
// }
// }
#endif /* CPU_CONVOLUTION_H */ #endif /* CPU_CONVOLUTION_H */
...@@ -13,7 +13,7 @@ extern "C" double scn_DR_(Deconvolution_updateOutput)( ...@@ -13,7 +13,7 @@ extern "C" double scn_DR_(Deconvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features, THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias, THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume, void *rulesBuffer) { long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = auto _rules =
...@@ -42,8 +42,7 @@ extern "C" void scn_DR_(Deconvolution_backward)( ...@@ -42,8 +42,7 @@ extern "C" void scn_DR_(Deconvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize, THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features, THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight, THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume, THTensor *d_weight, THTensor *d_bias, long filterVolume) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = auto _rules =
......
...@@ -5,47 +5,96 @@ ...@@ -5,47 +5,96 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_ #ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/InputLayer.cpp" #define TH_GENERIC_FILE_ "generic/CPU/IOLayers.cpp"
#else #else
#include "InputLayer.h" #include "IOLayers.h"
extern "C" void scn_DR_(InputLayer_updateOutput)( extern "C" void scn_DR_(InputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords, void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THTensor *input_features, THTensor *output_features, long batchSize, THTensor *input_features, THTensor *output_features, long batchSize,
long mode, void *rulesBuffer) { long mode) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.inputLayer(spatialSize, input_coords, batchSize, mode); _m.inputLayer(spatialSize, input_coords, batchSize, mode);
auto nPlanes = input_features->size[1]; auto nPlanes = input_features->size[1];
auto &rules = _m.inputLayerRuleBook; auto &rules = _m.inputLayerRuleBook;
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][3]; auto nRows = rules[0][3];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes); if (mode == 0) {
THTensor_(zero)(output_features); THTensor_(resizeAs)(output_features, input_features);
InputLayer_ForwardPass<real>(THTensor_(data)(input_features), THTensor_(copy)(output_features, input_features);
THTensor_(data)(output_features), nRows, } else {
maxActive, nPlanes, &rules[1][0], mode == 4); THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
} }
extern "C" void scn_DR_(InputLayer_updateGradInput)(void **m, extern "C" void scn_DR_(InputLayer_updateGradInput)(void **m,
THTensor *d_input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *d_output_features) {
void *rulesBuffer) { SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.inputLayerRuleBook;
auto nPlanes = d_output_features->size[1];
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][3];
if (mode == 0) {
THTensor_(resizeAs)(d_input_features, d_output_features);
THTensor_(copy)(d_input_features, d_output_features);
} else {
THTensor_(resize2d)(d_input_features, rules[0][2], nPlanes);
THTensor_(zero)(d_input_features);
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
THTensor_(data)(d_output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
}
extern "C" void scn_DR_(OutputLayer_updateOutput)(void **m,
THTensor *input_features,
THTensor *output_features) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.inputLayerRuleBook;
auto nPlanes = input_features->size[1];
auto mode = rules[0][0];
auto maxActive = rules[0][1];
auto nRows = rules[0][3];
if (mode == 0) {
THTensor_(resizeAs)(output_features, input_features);
THTensor_(copy)(output_features, input_features);
} else {
THTensor_(resize2d)(output_features, rules[0][2], nPlanes);
THTensor_(zero)(output_features);
InputLayer_BackwardPass<real>(THTensor_(data)(output_features),
THTensor_(data)(input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
}
extern "C" void
scn_DR_(OutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
THTensor *d_output_features) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.inputLayerRuleBook; auto &rules = _m.inputLayerRuleBook;
auto nPlanes = d_output_features->size[1]; auto nPlanes = d_output_features->size[1];
THTensor_(resize2d)(d_input_features, rules[0][2], nPlanes);
THTensor_(zero)(d_input_features);
auto mode = rules[0][0]; auto mode = rules[0][0];
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][3]; auto nRows = rules[0][3];
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features), if (mode == 0) {
THTensor_(data)(d_output_features), nRows, THTensor_(resizeAs)(d_input_features, d_output_features);
maxActive, nPlanes, &rules[1][0], mode == 4); THTensor_(copy)(d_input_features, d_output_features);
} else {
THTensor_(resize2d)(d_input_features, nRows, nPlanes);
THTensor_(zero)(d_input_features);
InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features),
THTensor_(data)(d_input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
} }
extern "C" void scn_DR_(BLInputLayer_updateOutput)( extern "C" void scn_DR_(BLInputLayer_updateOutput)(
void **m, THLongTensor *spatialSize, THLongTensor *input_coords, void **m, THLongTensor *spatialSize, THLongTensor *input_coords,
THTensor *input_features, THTensor *output_features, long mode, THTensor *input_features, THTensor *output_features, long mode) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.blLayer(spatialSize, input_coords, mode); _m.blLayer(spatialSize, input_coords, mode);
auto nPlanes = input_features->size[2]; auto nPlanes = input_features->size[2];
...@@ -66,8 +115,7 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)( ...@@ -66,8 +115,7 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
} }
extern "C" void extern "C" void
scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features, scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
THTensor *d_output_features, THTensor *d_output_features) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto nPlanes = d_output_features->size[1]; auto nPlanes = d_output_features->size[1];
...@@ -90,8 +138,7 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features, ...@@ -90,8 +138,7 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m, extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m,
THTensor *input_features, THTensor *input_features,
THTensor *output_features, THTensor *output_features) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto nPlanes = input_features->size[1]; auto nPlanes = input_features->size[1];
...@@ -112,8 +159,7 @@ extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m, ...@@ -112,8 +159,7 @@ extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m,
} }
extern "C" void extern "C" void
scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features, scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
THTensor *d_output_features, THTensor *d_output_features) {
void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto nPlanes = d_output_features->size[2]; auto nPlanes = d_output_features->size[2];
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
// This source code is licensed under the license found in the // This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#ifndef CPU_INPUTLAYER_H #ifndef CPU_IOLAYERS_H
#define CPU_INPUTLAYER_H #define CPU_IOLAYERS_H
#include "../SparseConvNet.h" #include "../SparseConvNet.h"
#include <cstring> #include <cstring>
...@@ -44,4 +44,4 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -44,4 +44,4 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
rules += 1 + maxActive; rules += 1 + maxActive;
} }
} }
#endif /* CPU_INPUTLAYER_H */ #endif /* CPU_IOLAYERS_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment