Commit 43d4e998 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

tidy

parent d77687a6
...@@ -12,10 +12,8 @@ torch_dir = os.path.dirname(torch.__file__) ...@@ -12,10 +12,8 @@ torch_dir = os.path.dirname(torch.__file__)
print('Building SCN module') print('Building SCN module')
if torch.cuda.is_available(): if torch.cuda.is_available():
r = os.system( s=('cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc'
'cd sparseconvnet/SCN; nvcc init.cu -c -o init.cu.o -ccbin /usr/bin/cc' + ' -m64 --std c++11 -Xcompiler \"-fopenmp -fPIC -O3\" '
+ ' -m64 --std c++11 -Xcompiler '
+ ',\"-fopenmp\",\"-fPIC\",\"-O3\" '
+ '-gencode arch=compute_62,code=sm_62 ' + '-gencode arch=compute_62,code=sm_62 '
+ '-gencode arch=compute_61,code=sm_61 ' + '-gencode arch=compute_61,code=sm_61 '
+ '-gencode arch=compute_60,code=sm_60 ' + '-gencode arch=compute_60,code=sm_60 '
...@@ -28,6 +26,7 @@ if torch.cuda.is_available(): ...@@ -28,6 +26,7 @@ if torch.cuda.is_available():
+ '-I' + torch_dir + '/lib/include/TH ' + '-I' + torch_dir + '/lib/include/TH '
+ '-I' + torch_dir + '/lib/include/THC ' + '-I' + torch_dir + '/lib/include/THC '
+ '-I.') + '-I.')
r = os.system(s)
assert r == 0 assert r == 0
ffi = create_extension( ffi = create_extension(
'sparseconvnet.SCN', 'sparseconvnet.SCN',
...@@ -44,7 +43,7 @@ if torch.cuda.is_available(): ...@@ -44,7 +43,7 @@ if torch.cuda.is_available():
with_cuda=True) with_cuda=True)
else: else:
r = os.system( r = os.system(
'cd sparseconvnet/SCN; g++ -std=c++11 -DENABLE_OPENMP -fPIC -c init.cpp -o init.cpp.o -I' + 'cd sparseconvnet/SCN; g++ -fopenmp -std=c++11 -O3 -fPIC -c init.cpp -o init.cpp.o -I' +
torch_dir + torch_dir +
'/lib/include -I' + '/lib/include -I' +
torch_dir + torch_dir +
......
...@@ -16,11 +16,11 @@ extern "C" void scn_DR_(InputLayer_updateOutput)( ...@@ -16,11 +16,11 @@ extern "C" void scn_DR_(InputLayer_updateOutput)(
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.inputLayer(spatialSize, input_coords, batchSize, mode); _m.inputLayer(spatialSize, input_coords, batchSize, mode);
auto nPlanes = input_features->size[1]; auto nPlanes = input_features->size[1];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
auto &rules = _m.inputLayerRuleBook; auto &rules = _m.inputLayerRuleBook;
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][3]; auto nRows = rules[0][3];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
InputLayer_ForwardPass<real>(THTensor_(data)(input_features), InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), nRows, THTensor_(data)(output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4); maxActive, nPlanes, &rules[1][0], mode == 4);
...@@ -49,14 +49,20 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)( ...@@ -49,14 +49,20 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
_m.blLayer(spatialSize, input_coords, mode); _m.blLayer(spatialSize, input_coords, mode);
auto nPlanes = input_features->size[2]; auto nPlanes = input_features->size[2];
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][4]; auto nRows = rules[0][4];
InputLayer_ForwardPass<real>(THTensor_(data)(input_features), if (mode == 0) {
THTensor_(data)(output_features), nRows, THTensor_(resizeAs)(output_features, input_features);
maxActive, nPlanes, &rules[1][0], mode == 4); THTensor_(copy)(output_features, input_features);
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
} else {
THTensor_(resize2d)(output_features, *_m.inputNActive, nPlanes);
THTensor_(zero)(output_features);
InputLayer_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
} }
extern "C" void extern "C" void
scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features, scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
...@@ -65,32 +71,44 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features, ...@@ -65,32 +71,44 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THTensor *d_input_features,
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto nPlanes = d_output_features->size[1]; auto nPlanes = d_output_features->size[1];
THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(d_input_features);
auto mode = rules[0][0]; auto mode = rules[0][0];
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][4]; auto nRows = rules[0][4];
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features), if (mode == 0) {
THTensor_(data)(d_output_features), nRows, THTensor_(resizeAs)(d_input_features, d_output_features);
maxActive, nPlanes, &rules[1][0], mode == 4); THTensor_(copy)(d_input_features, d_output_features);
THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
} else {
THTensor_(resize3d)(d_input_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(d_input_features);
InputLayer_BackwardPass<real>(THTensor_(data)(d_input_features),
THTensor_(data)(d_output_features), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
}
} }
extern "C" void scn_DR_(BLOutputLayer_updateOutput)( extern "C" void scn_DR_(BLOutputLayer_updateOutput)(void **m,
void **m, THTensor *input_features,
THTensor *input_features, THTensor *output_features, THTensor *output_features,
void *rulesBuffer) { void *rulesBuffer) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
auto nPlanes = input_features->size[1]; auto nPlanes = input_features->size[1];
THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(output_features);
auto mode = rules[0][0]; auto mode = rules[0][0];
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][4]; auto nRows = rules[0][4];
InputLayer_BackwardPass<real>(THTensor_(data)(output_features), if (mode == 0) {
THTensor_(data)(input_features), nRows, THTensor_(resizeAs)(output_features, input_features);
maxActive, nPlanes, &rules[1][0], false); THTensor_(copy)(output_features, input_features);
THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
} else {
THTensor_(resize3d)(output_features, rules[0][2], rules[0][3], nPlanes);
THTensor_(zero)(output_features);
InputLayer_BackwardPass<real>(THTensor_(data)(output_features),
THTensor_(data)(input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
} }
extern "C" void extern "C" void
scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features, scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
...@@ -102,11 +120,16 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features, ...@@ -102,11 +120,16 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THTensor *d_input_features,
auto mode = rules[0][0]; auto mode = rules[0][0];
auto maxActive = rules[0][1]; auto maxActive = rules[0][1];
auto nRows = rules[0][4]; auto nRows = rules[0][4];
THTensor_(resize2d)(d_input_features, nRows, nPlanes); if (mode == 0) {
THTensor_(zero)(d_input_features); THTensor_(resizeAs)(d_input_features, d_output_features);
THTensor_(copy)(d_input_features, d_output_features);
InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features), THTensor_(resize2d)(d_input_features, nRows, nPlanes);
THTensor_(data)(d_input_features), nRows, } else {
maxActive, nPlanes, &rules[1][0], false); THTensor_(resize2d)(d_input_features, nRows, nPlanes);
THTensor_(zero)(d_input_features);
InputLayer_ForwardPass<real>(THTensor_(data)(d_output_features),
THTensor_(data)(d_input_features), nRows,
maxActive, nPlanes, &rules[1][0], false);
}
} }
#endif #endif
...@@ -22,15 +22,15 @@ extern "C" void scn_DR_(InputLayer_updateOutput)( ...@@ -22,15 +22,15 @@ extern "C" void scn_DR_(InputLayer_updateOutput)(
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][3]; uInt nRows = rules[0][3];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size()); THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto iF = THCTensor_(data)(state, input_features); auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features); auto oF = THCTensor_(data)(state, output_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer); auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(), cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0, InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>( THCState_getCurrentStream(state)>>>(
iF, oF, nRows, maxActive, nPlanes, rb, mode == 4); iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
} }
extern "C" void extern "C" void
scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features, scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
...@@ -41,7 +41,7 @@ scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features, ...@@ -41,7 +41,7 @@ scn_DR_(InputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
uInt nPlanes = d_output_features->size[1]; uInt nPlanes = d_output_features->size[1];
THCTensor_(resize2d)(state, d_input_features, rules[0][2], nPlanes); THCTensor_(resize2d)(state, d_input_features, rules[0][2], nPlanes);
THCTensor_(zero)(state, d_input_features); THCTensor_(zero)(state, d_input_features);
uInt mode = rules[0][0]; auto mode = rules[0][0];
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][3]; uInt nRows = rules[0][3];
...@@ -69,6 +69,11 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)( ...@@ -69,6 +69,11 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][4]; uInt nRows = rules[0][4];
if (mode == 0) {
THCTensor_(resizeAs)(state, output_features, input_features);
THCTensor_(copy)(state, output_features, input_features);
THCTensor_(resize2d)(state, output_features, *_m.inputNActive, nPlanes);
} else {
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size()); THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto iF = THCTensor_(data)(state, input_features); auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features); auto oF = THCTensor_(data)(state, output_features);
...@@ -78,6 +83,7 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)( ...@@ -78,6 +83,7 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0, InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>( THCState_getCurrentStream(state)>>>(
iF, oF, nRows, maxActive, nPlanes, rb, mode == 4); iF, oF, nRows, maxActive, nPlanes, rb, mode == 4);
}
} }
extern "C" void extern "C" void
scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features, scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
...@@ -86,11 +92,17 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features, ...@@ -86,11 +92,17 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
uInt nPlanes = d_output_features->size[1]; uInt nPlanes = d_output_features->size[1];
THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
THCTensor_(zero)(state, d_input_features);
uInt mode = rules[0][0]; uInt mode = rules[0][0];
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][4]; uInt nRows = rules[0][4];
if (mode == 0) {
THCTensor_(resizeAs)(state, d_input_features, d_output_features);
THCTensor_(copy)(state, d_input_features, d_output_features);
THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
} else {
THCTensor_(resize3d)(state, d_input_features, rules[0][2], rules[0][3], nPlanes);
THCTensor_(zero)(state, d_input_features);
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size()); THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
auto diF = THCTensor_(data)(state, d_input_features); auto diF = THCTensor_(data)(state, d_input_features);
auto doF = THCTensor_(data)(state, d_output_features); auto doF = THCTensor_(data)(state, d_output_features);
...@@ -100,6 +112,7 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features, ...@@ -100,6 +112,7 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0, InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>( THCState_getCurrentStream(state)>>>(
diF, doF, nRows, maxActive, nPlanes, rb, mode == 4); diF, doF, nRows, maxActive, nPlanes, rb, mode == 4);
}
} }
extern "C" void scn_DR_(BLOutputLayer_updateOutput)( extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
...@@ -109,20 +122,26 @@ extern "C" void scn_DR_(BLOutputLayer_updateOutput)( ...@@ -109,20 +122,26 @@ extern "C" void scn_DR_(BLOutputLayer_updateOutput)(
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m) SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto &rules = _m.blLayerRuleBook; auto &rules = _m.blLayerRuleBook;
uInt nPlanes = input_features->size[1]; uInt nPlanes = input_features->size[1];
THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
THCTensor_(zero)(state, output_features);
auto mode = rules[0][0]; auto mode = rules[0][0];
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][4]; uInt nRows = rules[0][4];
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size()); if (mode==0) {
auto iF = THCTensor_(data)(state, input_features); THCTensor_(resizeAs)(state, output_features, input_features);
auto oF = THCTensor_(data)(state, output_features); THCTensor_(copy)(state, output_features, input_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer); THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(), } else {
cudaMemcpyHostToDevice); THCTensor_(resize3d)(state, output_features, rules[0][2], rules[0][3], nPlanes);
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0, THCTensor_(zero)(state, output_features);
THCState_getCurrentStream(state)>>>( THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
oF, iF, nRows, maxActive, nPlanes, rb, false); auto iF = THCTensor_(data)(state, input_features);
auto oF = THCTensor_(data)(state, output_features);
auto rb = (uInt*) THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_bp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
oF, iF, nRows, maxActive, nPlanes, rb, false);
}
} }
extern "C" void extern "C" void
scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features, scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
...@@ -134,16 +153,22 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features, ...@@ -134,16 +153,22 @@ scn_DR_(BLOutputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
uInt mode = rules[0][0]; uInt mode = rules[0][0];
uInt maxActive = rules[0][1]; uInt maxActive = rules[0][1];
uInt nRows = rules[0][4]; uInt nRows = rules[0][4];
THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes); if (mode==0) {
THCTensor_(zero)(state, d_input_features); THCTensor_(resizeAs)(state, d_input_features, d_output_features);
THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size()); THCTensor_(copy)(state, d_input_features, d_output_features);
auto diF = THCTensor_(data)(state, d_input_features); THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
auto doF = THCTensor_(data)(state, d_output_features); } else {
auto rb = (uInt*)THCITensor_data(state, rulesBuffer); THCTensor_(resize2d)(state, d_input_features, nRows, nPlanes);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(), THCTensor_(zero)(state, d_input_features);
cudaMemcpyHostToDevice); THCITensor_resize1d(state, rulesBuffer, sizeof(uInt) * rules[1].size());
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0, auto diF = THCTensor_(data)(state, d_input_features);
THCState_getCurrentStream(state)>>>( auto doF = THCTensor_(data)(state, d_output_features);
doF, diF, nRows, maxActive, nPlanes, rb, false); auto rb = (uInt*)THCITensor_data(state, rulesBuffer);
cudaMemcpy(rb, &rules[1][0], sizeof(uInt) * rules[1].size(),
cudaMemcpyHostToDevice);
InputLayer_fp<real><<<std::min(nRows, 32768U), std::min(nPlanes, 32U), 0,
THCState_getCurrentStream(state)>>>(
doF, diF, nRows, maxActive, nPlanes, rb, false);
}
} }
#endif #endif
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#ifndef INPUTLAYER_H #ifndef INPUTLAYER_H
#define INPUTLAYER_H #define INPUTLAYER_H
#include "../SparseConvNet.h" #include "../SparseConvNet.h"
#include <omp.h>
// mode 1==overwrite, 2=keep, 3=sum, 4=mean // mode 1==overwrite, 2=keep, 3=sum, 4=mean
template <uInt dimension> template <uInt dimension>
...@@ -100,6 +99,12 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -100,6 +99,12 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
if (mode == 0) { if (mode == 0) {
nActive = batchSize * length; nActive = batchSize * length;
rules.resize(1);
rules[0].push_back(mode);
rules[0].push_back(1);
rules[0].push_back(batchSize);
rules[0].push_back(length);
rules[0].push_back(nActive);
#pragma omp parallel for private(I) #pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) { for (I = 0; I < batchSize; I++) {
auto &sg = SGs[I]; auto &sg = SGs[I];
...@@ -113,20 +118,6 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -113,20 +118,6 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
sg.mp[p] = l; sg.mp[p] = l;
} }
} }
rules.resize(2);
rules[0].push_back(0);
rules[0].push_back(1);
rules[0].push_back(batchSize);
rules[0].push_back(length);
rules[0].push_back(nActive);
auto &rule = rules[1];
int ll = 0;
for (I = 0; I < batchSize; I++) {
for (int l = 0; l < length; ++l, ++ll) {
rule.push_back(1);
rule.push_back(ll);
}
}
return; return;
} }
...@@ -199,10 +190,10 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -199,10 +190,10 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
} }
} }
if (mode == 3 or mode == 4) { if (mode == 3 or mode == 4) {
std::cout << omp_get_num_threads() << std::endl;
rule.resize((maxActive + 1) * nActive); rule.resize((maxActive + 1) * nActive);
#pragma omp parallel for private(I) #pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) { for (I = 0; I < batchSize; I++) {
std::cout << omp_get_num_threads() << "\n";
auto &ors = outputRows[I]; auto &ors = outputRows[I];
auto rr = &rule[SGs[I].ctr * (maxActive + 1)]; auto rr = &rule[SGs[I].ctr * (maxActive + 1)];
for (auto &row : ors) { for (auto &row : ors) {
......
...@@ -101,17 +101,17 @@ class AveragePooling(Module): ...@@ -101,17 +101,17 @@ class AveragePooling(Module):
def __repr__(self): def __repr__(self):
s = 'AveragePooling' s = 'AveragePooling'
if self.pool_size.max() == self.pool_size.min() and\ if self.pool_size.max().item() == self.pool_size.min().item() and\
self.pool_stride.max() == self.pool_stride.min(): self.pool_stride.max().item() == self.pool_stride.min().item():
s = s + str(self.pool_size[0].item()) + \ s = s + str(self.pool_size[0].item()) + \
'/' + str(self.pool_stride[0].item()) '/' + str(self.pool_stride[0].item())
else: else:
s = s + '(' + str(self.pool_size[0].item()) s = s + '(' + str(self.pool_size[0].item())
for i in self.pool_size[1:]: for i in self.pool_size[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')/(' + str(self.pool_stride[0].item()) s = s + ')/(' + str(self.pool_stride[0].item())
for i in self.pool_stride[1:]: for i in self.pool_stride[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')' s = s + ')'
if self.nFeaturesToDrop > 0: if self.nFeaturesToDrop > 0:
......
...@@ -127,17 +127,17 @@ class Convolution(Module): ...@@ -127,17 +127,17 @@ class Convolution(Module):
def __repr__(self): def __repr__(self):
s = 'Convolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C' s = 'Convolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
if self.filter_size.max() == self.filter_size.min() and\ if self.filter_size.max().item() == self.filter_size.min().item() and\
self.filter_stride.max() == self.filter_stride.min(): self.filter_stride.max().item() == self.filter_stride.min().item():
s = s + str(self.filter_size[0].item()) + \ s = s + str(self.filter_size[0].item()) + \
'/' + str(self.filter_stride[0].item()) '/' + str(self.filter_stride[0].item())
else: else:
s = s + '(' + str(self.filter_size[0].item()) s = s + '(' + str(self.filter_size[0].item())
for i in self.filter_size[1:]: for i in self.filter_size[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')/(' + str(self.filter_stride[0].item()) s = s + ')/(' + str(self.filter_stride[0].item())
for i in self.filter_stride[1:]: for i in self.filter_stride[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')' s = s + ')'
return s return s
......
...@@ -127,17 +127,17 @@ class Deconvolution(Module): ...@@ -127,17 +127,17 @@ class Deconvolution(Module):
def __repr__(self): def __repr__(self):
s = 'Deconvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C' s = 'Deconvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
if self.filter_size.max() == self.filter_size.min() and\ if self.filter_size.max().item() == self.filter_size.min().item() and\
self.filter_stride.max() == self.filter_stride.min(): self.filter_stride.max().item() == self.filter_stride.min().item():
s = s + str(self.filter_size[0].item()) + \ s = s + str(self.filter_size[0].item()) + \
'/' + str(self.filter_stride[0].item()) '/' + str(self.filter_stride[0].item())
else: else:
s = s + '(' + str(self.filter_size[0].item()) s = s + '(' + str(self.filter_size[0].item())
for i in self.filter_size[1:]: for i in self.filter_size[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')/(' + str(self.filter_stride[0].item()) s = s + ')/(' + str(self.filter_stride[0].item())
for i in self.filter_stride[1:]: for i in self.filter_stride[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')' s = s + ')'
return s return s
......
...@@ -72,12 +72,14 @@ class InputBatch(SparseConvNetTensor): ...@@ -72,12 +72,14 @@ class InputBatch(SparseConvNetTensor):
self.spatial_size, self.spatial_size,
threshold) threshold)
def precomputeMetadata(self, stride): def precomputeMetadata(self, size):
if stride == 2: """
Optional.
Allows precomputation of 'rulebooks' in data loading threads.
Use size == 2 if downsizing with size-2 stride-2 operations
Use size == 3 if downsizing with size-3 stride-2 operations
"""
if size == 2:
dim_fn(self.dimension, 'generateRuleBooks2s2')(self.metadata.ffi) dim_fn(self.dimension, 'generateRuleBooks2s2')(self.metadata.ffi)
else: if size == 3 :
dim_fn(self.dimension, 'generateRuleBooks3s2')(self.metadata.ffi) dim_fn(self.dimension, 'generateRuleBooks3s2')(self.metadata.ffi)
def __repr__(self):
return 'InputBatch<<' + repr(self.features) + repr(self.metadata) + \
repr(self.spatial_size) + '>>'
...@@ -10,6 +10,118 @@ from .utils import * ...@@ -10,6 +10,118 @@ from .utils import *
from .sparseConvNetTensor import SparseConvNetTensor from .sparseConvNetTensor import SparseConvNetTensor
from .metadata import Metadata from .metadata import Metadata
class InputLayer(Module):
"""
Takes a tuple (coords, features, batch_size [optional])
* coords is 2d with size
N x dimension (batch size == 1)
or
N x (dimension+1) (first d columns are coordinates, last column is batch index)
* features is a tensor with size
N x n_feature_planes
* batch_size if given, set a lower bound on the the number of samples in the output tensor.
Batch size can normally be inferred from the last column of coords, but this may fail if
some of the batch items are totally empty.
In case of repetition in coords:
mode == 1 to use the last item at each spatial location
mode == 2 to keep the first item at each spatial location
mode == 3 to sum feature vectors sharing one spatial location
mode == 4 to average feature vectors at each spatial location
Output is a SparseConvNetTensor
"""
def __init__(self, dimension, spatial_size, mode=3):
Module.__init__(self)
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
def forward(self, input):
output = SparseConvNetTensor(
metadata=Metadata(
self.dimension),
spatial_size=self.spatial_size)
output.features = InputLayerFunction.apply(
self.dimension,
output.metadata,
self.spatial_size,
input[0],
input[1],
0 if len(input) == 2 else input[2],
self.mode
)
return output
class BLInputLayer(Module):
"""
Takes a tuple (coords, features)
* coords is 3d LongTensor with size
batch_size x length x dimension
Coordinates should be >=0, or -1 to indicate 'empty'
* features is a 3d float Tensor with size
batch_size x length x n_feature_planes
mode == 0 Assumes that for each coords[i, :], the locations are unique and not 'empty'.
mode == 1 Use the last item at each spatial location
mode == 2 Keep the first item at each spatial location
mode == 3 Sum feature vectors sharing one spatial location
mode == 4 Average feature vectors at each spatial location
Output is a SparseConvNetTensor
"""
def __init__(self, dimension, spatial_size, mode=3):
Module.__init__(self)
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
# (coords,input_features) = input
def forward(self, input):
output = SparseConvNetTensor(
metadata=Metadata(
self.dimension),
spatial_size=self.spatial_size)
output.features = BLInputLayerFunction.apply(
self.dimension,
output.metadata,
self.spatial_size,
input[0],
input[1],
self.mode
)
return output
class BLOutputLayer(Module):
"""
Used in conjunction with a BLInputLayer for 'autoencoder' style networks
Takes a SparseConvNetTensor and results a float Tensor of batch_size
batch_size x length x n_feature_planes
batch_size and length are defined by the BLInputLayer
Behavior during forward-/back-propagation depends on the BLInputLayer's Module mode
"""
def __init__(self, dimension):
Module.__init__(self)
self.dimension = dimension
def forward(self, input):
output = BLOutputLayerFunction.apply(
self.dimension,
input.metadata,
input.features
)
return output
class InputLayerFunction(Function): class InputLayerFunction(Function):
@staticmethod @staticmethod
...@@ -30,7 +142,7 @@ class InputLayerFunction(Function): ...@@ -30,7 +142,7 @@ class InputLayerFunction(Function):
metadata.ffi, metadata.ffi,
spatial_size, spatial_size,
coords, coords,
input_features, input_features.contiguous(),
output_features, output_features,
batch_size, batch_size,
mode, mode,
...@@ -52,29 +164,6 @@ class InputLayerFunction(Function): ...@@ -52,29 +164,6 @@ class InputLayerFunction(Function):
return None, None, None, None, grad_input, None, None return None, None, None, None, grad_input, None, None
class InputLayer(Module):
def __init__(self, dimension, spatial_size, mode=3):
Module.__init__(self)
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
# (coords,input_features,batch_size or None) = input
def forward(self, input):
output = SparseConvNetTensor(
metadata=Metadata(
self.dimension),
spatial_size=self.spatial_size)
output.features = InputLayerFunction.apply(
self.dimension,
output.metadata,
self.spatial_size,
input[0],
input[1],
0 if len(input == 2) else input[2],
self.mode
)
return output
class BLInputLayerFunction(Function): class BLInputLayerFunction(Function):
...@@ -94,7 +183,7 @@ class BLInputLayerFunction(Function): ...@@ -94,7 +183,7 @@ class BLInputLayerFunction(Function):
metadata.ffi, metadata.ffi,
spatial_size, spatial_size,
coords, coords,
input_features, input_features.contiguous(),
output_features, output_features,
mode, mode,
torch.cuda.IntTensor() if input_features.is_cuda else nullptr torch.cuda.IntTensor() if input_features.is_cuda else nullptr
...@@ -115,28 +204,7 @@ class BLInputLayerFunction(Function): ...@@ -115,28 +204,7 @@ class BLInputLayerFunction(Function):
return None, None, None, None, grad_input, None return None, None, None, None, grad_input, None
class BLInputLayer(Module):
def __init__(self, dimension, spatial_size, mode=3):
Module.__init__(self)
self.dimension = dimension
self.spatial_size = toLongTensor(dimension, spatial_size)
self.mode = mode
# (coords,input_features) = input
def forward(self, input):
output = SparseConvNetTensor(
metadata=Metadata(
self.dimension),
spatial_size=self.spatial_size)
output.features = BLInputLayerFunction.apply(
self.dimension,
output.metadata,
self.spatial_size,
input[0],
input[1],
self.mode
)
return output
class BLOutputLayerFunction(Function): class BLOutputLayerFunction(Function):
...@@ -151,7 +219,7 @@ class BLOutputLayerFunction(Function): ...@@ -151,7 +219,7 @@ class BLOutputLayerFunction(Function):
ctx.dimension = dimension ctx.dimension = dimension
dim_typed_fn(dimension, input_features, 'BLOutputLayer_updateOutput')( dim_typed_fn(dimension, input_features, 'BLOutputLayer_updateOutput')(
metadata.ffi, metadata.ffi,
input_features, input_features.contiguous(),
output_features, output_features,
torch.cuda.IntTensor() if input_features.is_cuda else nullptr torch.cuda.IntTensor() if input_features.is_cuda else nullptr
) )
...@@ -169,17 +237,3 @@ class BLOutputLayerFunction(Function): ...@@ -169,17 +237,3 @@ class BLOutputLayerFunction(Function):
grad_output.contiguous().data, grad_output.contiguous().data,
torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr) torch.cuda.IntTensor() if grad_output.data.is_cuda else nullptr)
return None, None, grad_input return None, None, grad_input
class BLOutputLayer(Module):
def __init__(self, dimension):
Module.__init__(self)
self.dimension = dimension
def forward(self, input):
output = BLOutputLayerFunction.apply(
self.dimension,
input.metadata,
input.features
)
return output
...@@ -101,17 +101,17 @@ class MaxPooling(Module): ...@@ -101,17 +101,17 @@ class MaxPooling(Module):
def __repr__(self): def __repr__(self):
s = 'MaxPooling' s = 'MaxPooling'
if self.pool_size.max() == self.pool_size.min() and\ if self.pool_size.max().item() == self.pool_size.min().item() and\
self.pool_stride.max() == self.pool_stride.min(): self.pool_stride.max().item() == self.pool_stride.min().item():
s = s + str(self.pool_size[0].item()) + \ s = s + str(self.pool_size[0].item()) + \
'/' + str(self.pool_stride[0].item()) '/' + str(self.pool_stride[0].item())
else: else:
s = s + '(' + str(self.pool_size[0].item()) s = s + '(' + str(self.pool_size[0].item())
for i in self.pool_size[1:]: for i in self.pool_size[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')/(' + str(self.pool_stride[0].item()) s = s + ')/(' + str(self.pool_stride[0].item())
for i in self.pool_stride[1:]: for i in self.pool_stride[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')' s = s + ')'
if self.nFeaturesToDrop > 0: if self.nFeaturesToDrop > 0:
......
...@@ -16,7 +16,7 @@ class SparseConvNetTensor(object): ...@@ -16,7 +16,7 @@ class SparseConvNetTensor(object):
self.metadata = metadata self.metadata = metadata
self.spatial_size = spatial_size self.spatial_size = spatial_size
def getSpatialLocations(self, spatial_size=None): def get_spatial_locations(self, spatial_size=None):
"Coordinates and batch index for the active spatial locations" "Coordinates and batch index for the active spatial locations"
if spatial_size is None: if spatial_size is None:
spatial_size = self.spatial_size spatial_size = self.spatial_size
...@@ -51,7 +51,10 @@ class SparseConvNetTensor(object): ...@@ -51,7 +51,10 @@ class SparseConvNetTensor(object):
def __repr__(self): def __repr__(self):
return 'SparseConvNetTensor<<' + \ return 'SparseConvNetTensor<<' + \
repr(self.features) + repr(self.metadata) + repr(self.spatial_size) + '>>' repr(self.features) + \
repr(self.get_spatial_locations() if self.metadata else None) + \
repr(self.spatial_size) + \
'>>'
def to_variable(self, requires_grad=False, volatile=False): def to_variable(self, requires_grad=False, volatile=False):
"Convert self.features to a variable for use with modern PyTorch interface." "Convert self.features to a variable for use with modern PyTorch interface."
......
...@@ -118,7 +118,7 @@ class SubmanifoldConvolution(Module): ...@@ -118,7 +118,7 @@ class SubmanifoldConvolution(Module):
else: else:
s = s + '(' + str(self.filter_size[0].item()) s = s + '(' + str(self.filter_size[0].item())
for i in self.filter_size[1:]: for i in self.filter_size[1:]:
s = s + ',' + str(i) s = s + ',' + str(i.item())
s = s + ')' s = s + ')'
return s return s
......
...@@ -19,7 +19,7 @@ if not os.path.exists('pickle/'): ...@@ -19,7 +19,7 @@ if not os.path.exists('pickle/'):
import process import process
def train(spatial_size, Scale, precomputeStride): def train(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/train.pickle', 'rb')) d = pickle.load(open('pickle/train.pickle', 'rb'))
print('Replicating training set 10 times (1 epoch = 10 iterations through the training set = 10x6588 training samples)') print('Replicating training set 10 times (1 epoch = 10 iterations through the training set = 10x6588 training samples)')
for i in range(9): for i in range(9):
...@@ -82,7 +82,7 @@ def train(spatial_size, Scale, precomputeStride): ...@@ -82,7 +82,7 @@ def train(spatial_size, Scale, precomputeStride):
# p[1]=math.floor(y1*j+y2*(1-j)) # p[1]=math.floor(y1*j+y2*(1-j))
# inp.setLocation(p,v,False) # inp.setLocation(p,v,False)
############################################################### ###############################################################
inp.precomputeMetadata(precomputeStride) inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1} return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
bd = torchnet.dataset.BatchDataset(d, 108, perm=perm, merge=merge) bd = torchnet.dataset.BatchDataset(d, 108, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd) tdi = scn.threadDatasetIterator(bd)
...@@ -93,7 +93,7 @@ def train(spatial_size, Scale, precomputeStride): ...@@ -93,7 +93,7 @@ def train(spatial_size, Scale, precomputeStride):
return iter return iter
def val(spatial_size, Scale, precomputeStride): def val(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/test.pickle', 'rb')) d = pickle.load(open('pickle/test.pickle', 'rb'))
d = torchnet.dataset.ListDataset(d) d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d)) randperm = torch.randperm(len(d))
...@@ -117,7 +117,7 @@ def val(spatial_size, Scale, precomputeStride): ...@@ -117,7 +117,7 @@ def val(spatial_size, Scale, precomputeStride):
inp.metadata.ffi, inp.metadata.ffi,
inp.features, inp.features,
stroke) stroke)
inp.precomputeMetadata(precomputeStride) inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1} return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge) bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd) tdi = scn.threadDatasetIterator(bd)
......
...@@ -25,7 +25,7 @@ if not os.path.exists('pickle/'): ...@@ -25,7 +25,7 @@ if not os.path.exists('pickle/'):
os.system('python readPotFiles2.py') os.system('python readPotFiles2.py')
def train(spatial_size, Scale, precomputeStride): def train(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/train.pickle', 'rb')) d = pickle.load(open('pickle/train.pickle', 'rb'))
d = torchnet.dataset.ListDataset(d) d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d)) randperm = torch.randperm(len(d))
...@@ -68,7 +68,7 @@ def train(spatial_size, Scale, precomputeStride): ...@@ -68,7 +68,7 @@ def train(spatial_size, Scale, precomputeStride):
# p[1]=math.floor(y1*j+y2*(1-j)) # p[1]=math.floor(y1*j+y2*(1-j))
# inp.setLocation(p,v,False) # inp.setLocation(p,v,False)
############################################################### ###############################################################
inp.precomputeMetadata(precomputeStride) inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target'])} return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge) bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd) tdi = scn.threadDatasetIterator(bd)
...@@ -79,7 +79,7 @@ def train(spatial_size, Scale, precomputeStride): ...@@ -79,7 +79,7 @@ def train(spatial_size, Scale, precomputeStride):
return iter return iter
def val(spatial_size, Scale, precomputeStride): def val(spatial_size, Scale, precomputeSize):
d = pickle.load(open('pickle/test.pickle', 'rb')) d = pickle.load(open('pickle/test.pickle', 'rb'))
d = torchnet.dataset.ListDataset(d) d = torchnet.dataset.ListDataset(d)
randperm = torch.randperm(len(d)) randperm = torch.randperm(len(d))
...@@ -103,7 +103,7 @@ def val(spatial_size, Scale, precomputeStride): ...@@ -103,7 +103,7 @@ def val(spatial_size, Scale, precomputeStride):
inp.metadata.ffi, inp.metadata.ffi,
inp.features, inp.features,
stroke) stroke)
inp.precomputeMetadata(precomputeStride) inp.precomputeMetadata(precomputeSize)
return {'input': inp, 'target': torch.LongTensor(tbl['target'])} return {'input': inp, 'target': torch.LongTensor(tbl['target'])}
bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge) bd = torchnet.dataset.BatchDataset(d, 100, perm=perm, merge=merge)
tdi = scn.threadDatasetIterator(bd) tdi = scn.threadDatasetIterator(bd)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment