Commit d27491d4 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

group convolutions

parent 8422a6f5
......@@ -24,9 +24,9 @@ Higher dimensional input is more likely to be sparse because of the 'curse of di
Dimension|Name in 'torch.nn'|Use cases
:--:|:--:|:--:
1|TemporalConvolution| Text, audio
2|SpatialConvolution|Lines in 2D space, e.g. handwriting
3|VolumetricConvolution|Lines and surfaces in 3D space or (2+1)D space-time
1|Conv1d| Text, audio
2|Conv2d|Lines in 2D space, e.g. handwriting
3|Conv3d|Lines and surfaces in 3D space or (2+1)D space-time
4| - |Lines, etc, in (3+1)D space-time
We use the term 'submanifold' to refer to input data that is sparse because it has a lower effective dimension than the space in which it lives, for example a one-dimensional curve in 2+ dimensional space, or a two-dimensional surface in 3+ dimensional space.
......@@ -137,7 +137,7 @@ apt-get install unrar
```
## License
SparseConvNet is Attribution-NonCommercial 4.0 International licensed, as found in the LICENSE file.
SparseConvNet is BSD licensed, as found in the LICENSE file.
## Links
1. [ICDAR 2013 Chinese Handwriting Recognition Competition 2013](http://www.nlpr.ia.ac.cn/events/CHRcompetition2013/competition/Home.html) First place in task 3, with test error of 2.61%. Human performance on the test set was 4.81%. [Report](http://www.nlpr.ia.ac.cn/events/CHRcompetition2013/competition/ICDAR%202013%20CHR%20competition.pdf)
......
This diff is collapsed.
......@@ -15,29 +15,26 @@ double cpu_Deconvolution_updateOutput(
auto _rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(2)});
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
if (bias.numel() and nActive)
output_features.copy_(bias);
else
output_features.zero_();
double flops = 0;
auto ip = weight.size(1);
auto op = weight.size(2);
for (Int i = 0; i < (Int)_rules.size(); i++) {
auto groups = weight.size(1);
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
int nRules = r.size() / 2;
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op;
// auto rt = torch::CPU(at_kINT).tensorFromBlob(&r[0], {nRules, 2});
// auto input_rows = input_features.index_select(0, rt.select(1, 1));
// auto w = weight.select(0, i);
// auto output_rows = at::mm(input_rows, w);
// output_features.index_add_(0, rt.select(1, 0), output_rows);
auto input_rows = rule_index_select<T>(input_features, nRules, &r[1]);
flops += nRules * ip * op * groups;
auto w = weight.select(0, i);
auto output_rows = at::mm(input_rows, w);
rule_index_add_<T>(output_features, output_rows, nRules, &r[0]);
auto input_rows =
rule_index_select<T>(input_features, nRules, &r[1], groups);
auto output_rows = at::matmul(input_rows, w);
rule_index_add_<T>(output_features, output_rows, nRules, &r[0], groups);
}
}
return flops;
......@@ -59,26 +56,22 @@ void cpu_Deconvolution_backward(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto groups = weight.size(1);
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); i++) {
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
int nRules = r.size() / 2;
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
auto dw = d_weight.select(0, i);
// auto rt = torch::CPU(at_kINT).tensorFromBlob(&r[0], {nRules, 2});
// auto input_rows = input_features.index_select(0, rt.select(1, 1));
// auto d_output_rows = d_output_features.index_select(0, rt.select(1,
// 0));
// at::mm_out(dw, input_rows.t(), d_output_rows);
// auto d_input_rows = at::mm(d_output_rows, w.t());
// d_input_features.index_add_(0, rt.select(1, 1), d_input_rows);
auto input_rows = rule_index_select<T>(input_features, nRules, &r[1]);
auto d_output_rows = rule_index_select<T>(d_output_features, nRules, &r[0]);
at::mm_out(dw, input_rows.t(), d_output_rows);
auto d_input_rows = at::mm(d_output_rows, w.t());
rule_index_add_<T>(d_input_features, d_input_rows, nRules, &r[1]);
auto input_rows =
rule_index_select<T>(input_features, nRules, &r[1], groups);
auto d_output_rows =
rule_index_select<T>(d_output_features, nRules, &r[0], groups);
at::matmul_out(dw, input_rows.transpose(1, 2), d_output_rows);
auto d_input_rows = at::matmul(d_output_rows, w.transpose(1, 2));
rule_index_add_<T>(d_input_features, d_input_rows, nRules, &r[1], groups);
}
}
}
......@@ -12,13 +12,13 @@ template <typename T>
double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride);
Int output_stride, Int nGroups);
template <typename T>
void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride);
Int output_stride, Int nGroups);
template <typename T, Int Dimension>
double cuda_Convolution_updateOutput(
......@@ -32,9 +32,10 @@ double cuda_Convolution_updateOutput(
auto _rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActiveOut, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
......@@ -46,7 +47,8 @@ double cuda_Convolution_updateOutput(
else
output_features.zero_();
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -67,9 +69,10 @@ void cuda_Convolution_backward(
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActiveIn, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActiveIn, ip * nGroups});
d_input_features.zero_();
if (nActiveOut) {
......@@ -79,7 +82,8 @@ void cuda_Convolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
......@@ -98,9 +102,10 @@ double cuda_SubmanifoldConvolution_updateOutput(
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActive, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActive, op * nGroups});
if (nActive) {
auto iF = input_features.data<T>();
......@@ -112,7 +117,8 @@ double cuda_SubmanifoldConvolution_updateOutput(
else
output_features.zero_();
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -130,9 +136,10 @@ void cuda_SubmanifoldConvolution_backward(
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActive, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActive, ip * nGroups});
d_input_features.zero_();
if (nActive) {
......@@ -142,7 +149,8 @@ void cuda_SubmanifoldConvolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
......@@ -160,9 +168,10 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActive, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActive, op * nGroups});
if (nActive) {
auto iF = input_features.data<T>();
......@@ -174,7 +183,8 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
else
output_features.zero_();
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -191,9 +201,10 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActive, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActive, ip * nGroups});
d_input_features.zero_();
if (nActive) {
......@@ -203,7 +214,8 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
......@@ -225,9 +237,10 @@ double cuda_FullConvolution_updateOutput(
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut);
Int nActiveOut = mOut.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActiveOut, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
......@@ -239,7 +252,8 @@ double cuda_FullConvolution_updateOutput(
else
output_features.zero_();
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -261,9 +275,10 @@ void cuda_FullConvolution_backward(
filterSize, filterStride, mOut);
Int nActiveIn = mIn.getNActive(inputSize);
Int nActiveOut = mOut.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActiveIn, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActiveIn, ip * nGroups});
d_input_features.zero_();
if (nActiveOut) {
......@@ -273,7 +288,8 @@ void cuda_FullConvolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
......@@ -293,9 +309,10 @@ double cuda_RandomizedStrideConvolution_updateOutput(
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActiveOut, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
......@@ -307,7 +324,8 @@ double cuda_RandomizedStrideConvolution_updateOutput(
else
output_features.zero_();
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dConvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -328,9 +346,10 @@ void cuda_RandomizedStrideConvolution_backward(
filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActiveIn, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActiveIn, ip * nGroups});
d_input_features.zero_();
if (nActiveOut) {
......@@ -340,7 +359,8 @@ void cuda_RandomizedStrideConvolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
......
......@@ -49,6 +49,11 @@ void Convolution_bp_bias(T *d_oF, T *d_b, Int nPlanes, Int nActive) {
}
}
// .._nPlanes == planes per nGroup
// weight = nGroups x input_nPlanes x output_nPlanes
// = nGroups x M*K x N*K
template <typename T, Int K, Int V>
__global__ void
dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
......@@ -57,7 +62,7 @@ dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
// nHot must be a multiple of K!!
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x KM -> nHot x KN - parallel over N,nHot - loop over M
......@@ -65,8 +70,10 @@ dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = input_nPlanes / K;
// N = gridDim.y == output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
TACC O[V];
__shared__ T W[K][K];
......@@ -125,7 +132,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x KM -> nHot x KN - parallel over N,nHot - loop over M
......@@ -133,8 +140,10 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = input_nPlanes / K;
// N = gridDim.y == output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
TACC O[V];
__shared__ T W[K][K];
......@@ -199,13 +208,13 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int o = (nHot / K) * K; \
if (o >= K) \
dConvolution_KMxKN_forwardA< \
T, K, V><<<dim3(std::min(o / K, (Int)512), output_nPlanes / K), \
T, K, V><<<dim3(std::min(o / K, (Int)512), output_nPlanes / K, nGroups), \
dim3(K, K / V)>>>(inFeatures, outFeatures, w, rules, o, \
input_nPlanes, input_stride, \
output_nPlanes, output_stride); \
if (nHot > o) \
dConvolution_KMxKN_forwardB< \
T, K, V><<<dim3(1, output_nPlanes / K), dim3(K, K / V)>>>( \
T, K, V><<<dim3(1, output_nPlanes / K, nGroups), dim3(K, K / V )>>>( \
inFeatures, outFeatures, w, rules + 2 * o, nHot - o, \
input_nPlanes, input_stride, output_nPlanes, output_stride); \
return; \
......@@ -215,7 +224,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
template <typename T>
void dConvolution_forward(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(T, 64, 16)
FOO(T, 32, 8)
FOO(T, 16, 4)
......@@ -226,7 +235,7 @@ template <>
void dConvolution_forward<double>(double *inFeatures, double *outFeatures,
double *w, Int *rules, Int nHot,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(double, 32, 8)
FOO(double, 16, 4)
FOO(double, 8, 2)
......@@ -236,7 +245,7 @@ void dConvolution_forward<double>(double *inFeatures, double *outFeatures,
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void
dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures,
......@@ -246,10 +255,12 @@ dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures,
// M = gridDim.y == input_nPlanes / K
Int N = output_nPlanes / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
TACC dI[V];
TACC dW[V];
......@@ -313,7 +324,7 @@ dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures,
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void
dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
......@@ -323,10 +334,12 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
// M = gridDim.y == input_nPlanes / K
Int N = output_nPlanes / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
TACC dI[V];
TACC dW[V];
......@@ -402,13 +415,13 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
Int o = (nHot / K) * K; \
if (o >= K) \
dConvolution_KMxKN_backward_dW_A< \
T, K, V><<<dim3(std::min(o / K, (Int)512), input_nPlanes / K), \
T, K, V><<<dim3(std::min(o / K, (Int)512), input_nPlanes / K, nGroups), \
dim3(K, K / V)>>>( \
inFeatures, dInFeatures, dOutFeatures, w, dw, rules, o, \
input_nPlanes, input_stride, output_nPlanes, output_stride); \
if (nHot > o) \
dConvolution_KMxKN_backward_dW_B< \
T, K, V><<<dim3(1, input_nPlanes / K), dim3(K, K / V)>>>( \
T, K, V><<<dim3(1, input_nPlanes / K, nGroups), dim3(K, K / V)>>>( \
inFeatures, dInFeatures, dOutFeatures, w, dw, rules + 2 * o, \
nHot - o, input_nPlanes, input_stride, output_nPlanes, \
output_stride); \
......@@ -420,7 +433,7 @@ template <typename T>
void dConvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, Int *rules, Int nHot,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(T, 32, 8)
FOO(T, 16, 4)
FOO(T, 8, 2)
......@@ -434,7 +447,7 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x input_nplanes<=KM -> nHot x output_nPlanes<=KN
......@@ -443,8 +456,10 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = (input_nPlanes + K - 1) / K;
// N = gridDim.y ~ output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
Int KO = min(K, output_nPlanes - K * n);
TACC O[V];
......@@ -507,7 +522,7 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void
dConvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
......@@ -517,10 +532,12 @@ dConvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
// M = gridDim.y == input_nPlanes / K
Int N = (output_nPlanes + K - 1) / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m);
TACC dI[V];
......@@ -602,8 +619,8 @@ template <typename T>
double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride) {
Int c = input_nPlanes * output_nPlanes;
Int output_stride, Int nGroups) {
Int c = input_nPlanes * output_nPlanes * nGroups;
double flops = 0;
if (input_nPlanes % 8 != 0 or output_nPlanes % 8 != 0) {
const int K = 16;
......@@ -611,14 +628,14 @@ double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RULEBOOKITERATOR(
(dConvolution_KMxKN_forward2<
T, K,
V><<<dim3(128, (output_nPlanes + K - 1) / K), dim3(K, K / V)>>>(
V><<<dim3(128, (output_nPlanes + K - 1) / K, nGroups), dim3(K, K / V)>>>(
inFeatures, outFeatures, w, rbB, nHotB, input_nPlanes, input_stride,
output_nPlanes, output_stride));
, w += c; flops += nHotB * c;)
} else {
RULEBOOKITERATOR(dConvolution_forward(inFeatures, outFeatures, w, rbB,
nHotB, input_nPlanes, input_stride,
output_nPlanes, output_stride);
output_nPlanes, output_stride, nGroups);
, w += c; flops += nHotB * c;)
}
return flops;
......@@ -628,15 +645,15 @@ template <typename T>
void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride) {
Int c = input_nPlanes * output_nPlanes;
Int output_stride, Int nGroups) {
Int c = input_nPlanes * output_nPlanes * nGroups;
if (input_nPlanes % 8 != 0 or output_nPlanes % 8 != 0) {
const int K = 16;
const int V = 4;
RULEBOOKITERATOR(
(dConvolution_KMxKN_backward_dW2<
T, K,
V><<<dim3(128, (input_nPlanes + K - 1) / K), dim3(K, K / V)>>>(
V><<<dim3(128, (input_nPlanes + K - 1) / K, nGroups), dim3(K, K / V)>>>(
inFeatures, dInFeatures, dOutFeatures, w, dw, rbB, nHotB,
input_nPlanes, input_stride, output_nPlanes, output_stride));
, w += c; dw += c;)
......@@ -644,7 +661,7 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
RULEBOOKITERATOR(dConvolution_backward_dW(inFeatures, dInFeatures,
dOutFeatures, w, dw, rbB, nHotB,
input_nPlanes, input_stride,
output_nPlanes, output_stride);
output_nPlanes, output_stride, nGroups);
, w += c; dw += c;)
}
}
......
......@@ -8,13 +8,14 @@ template <typename T>
double dDeconvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride);
Int output_stride, Int nGroups);
template <typename T>
void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, RuleBook _rules,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride);
Int output_nPlanes, Int output_stride,
Int nGroups);
template <typename T, Int Dimension>
double cuda_Deconvolution_updateOutput(
......@@ -28,9 +29,10 @@ double cuda_Deconvolution_updateOutput(
auto _rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
output_features.resize_({nActiveOut, op});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
......@@ -42,7 +44,8 @@ double cuda_Deconvolution_updateOutput(
else
output_features.zero_();
return dDeconvolution_forward2<T>(iF, oF, w, _rules, ip, ip, op, op);
return dDeconvolution_forward2<T>(iF, oF, w, _rules, ip, ip * nGroups, op,
op * nGroups, nGroups);
} else {
return 0;
}
......@@ -63,9 +66,10 @@ void cuda_Deconvolution_backward(
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
Int ip = weight.size(1);
Int op = weight.size(2);
d_input_features.resize_({nActiveIn, ip});
Int nGroups = weight.size(1);
Int ip = weight.size(2);
Int op = weight.size(3);
d_input_features.resize_({nActiveIn, ip * nGroups});
d_input_features.zero_();
if (nActiveOut) {
......@@ -75,7 +79,8 @@ void cuda_Deconvolution_backward(
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip,
ip * nGroups, op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, nActiveOut);
......
......@@ -14,7 +14,7 @@ dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
// nHot must be a multiple of K!!
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x KM -> nHot x KN - parallel over N,nHot - loop over M
......@@ -22,8 +22,10 @@ dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = input_nPlanes / K;
// N = gridDim.y == output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
TACC O[V];
__shared__ T W[K][K];
......@@ -82,7 +84,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x KM -> nHot x KN - parallel over N,nHot - loop over M
......@@ -90,8 +92,10 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = input_nPlanes / K;
// N = gridDim.y == output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
TACC O[V];
__shared__ T W[K][K];
......@@ -156,13 +160,13 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int o = (nHot / K) * K; \
if (o >= K) \
dDeconvolution_KMxKN_forwardA< \
T, K, V><<<dim3(std::min(o / K, (Int)512), output_nPlanes / K), \
T, K, V><<<dim3(std::min(o / K, (Int)512), output_nPlanes / K, nGroups), \
dim3(K, K / V)>>>(inFeatures, outFeatures, w, rules, o, \
input_nPlanes, input_stride, \
output_nPlanes, output_stride); \
if (nHot > o) \
dDeconvolution_KMxKN_forwardB< \
T, K, V><<<dim3(1, output_nPlanes / K), dim3(K, K / V)>>>( \
T, K, V><<<dim3(1, output_nPlanes / K, nGroups), dim3(K, K / V)>>>( \
inFeatures, outFeatures, w, rules + 2 * o, nHot - o, \
input_nPlanes, input_stride, output_nPlanes, output_stride); \
return; \
......@@ -172,7 +176,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
template <typename T>
void dDeconvolution_forward(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(T, 64, 16)
FOO(T, 32, 8)
FOO(T, 16, 4)
......@@ -183,7 +187,7 @@ template <>
void dDeconvolution_forward<double>(double *inFeatures, double *outFeatures,
double *w, Int *rules, Int nHot,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(double, 32, 8)
FOO(double, 16, 4)
FOO(double, 8, 2)
......@@ -193,7 +197,7 @@ void dDeconvolution_forward<double>(double *inFeatures, double *outFeatures,
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void dDeconvolution_KMxKN_backward_dW_A(
T *inFeatures, T *dInFeatures, T *dOutFeatures, T *w, T *dw, Int *rules,
......@@ -202,10 +206,12 @@ __global__ void dDeconvolution_KMxKN_backward_dW_A(
// M = gridDim.y == input_nPlanes / K
Int N = output_nPlanes / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
TACC dI[V];
TACC dW[V];
......@@ -269,7 +275,7 @@ __global__ void dDeconvolution_KMxKN_backward_dW_A(
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void dDeconvolution_KMxKN_backward_dW_B(
T *inFeatures, T *dInFeatures, T *dOutFeatures, T *w, T *dw, Int *rules,
......@@ -278,10 +284,12 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
// M = gridDim.y == input_nPlanes / K
Int N = output_nPlanes / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
TACC dI[V];
TACC dW[V];
......@@ -357,13 +365,13 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
Int o = (nHot / K) * K; \
if (o >= K) \
dDeconvolution_KMxKN_backward_dW_A< \
T, K, V><<<dim3(std::min(o / K, (Int)512), input_nPlanes / K), \
T, K, V><<<dim3(std::min(o / K, (Int)512), input_nPlanes / K, nGroups), \
dim3(K, K / V)>>>( \
inFeatures, dInFeatures, dOutFeatures, w, dw, rules, o, \
input_nPlanes, input_stride, output_nPlanes, output_stride); \
if (nHot > o) \
dDeconvolution_KMxKN_backward_dW_B< \
T, K, V><<<dim3(1, input_nPlanes / K), dim3(K, K / V)>>>( \
T, K, V><<<dim3(1, input_nPlanes / K, nGroups), dim3(K, K / V)>>>( \
inFeatures, dInFeatures, dOutFeatures, w, dw, rules + 2 * o, \
nHot - o, input_nPlanes, input_stride, output_nPlanes, \
output_stride); \
......@@ -375,7 +383,7 @@ template <typename T>
void dDeconvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, Int *rules, Int nHot,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int output_nPlanes, Int output_stride, Int nGroups) {
FOO(T, 32, 8)
FOO(T, 16, 4)
FOO(T, 8, 2)
......@@ -389,7 +397,7 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int nHot, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
// Input x Weight -> Output
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,1) Volkov-blocks
// blockDim=(K,K/V,1), gridDim=(nBlocks,N,nGroups) Volkov-blocks
// K is a multiple of V,
// nHot x input_nplanes<=KM -> nHot x output_nPlanes<=KN
......@@ -398,8 +406,10 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
Int M = (input_nPlanes + K - 1) / K;
// N = gridDim.y ~ output_nPlanes/K
Int n = blockIdx.y;
outFeatures += n * K;
w += n * K;
Int g = blockIdx.z;
inFeatures += g * input_nPlanes;
outFeatures += n * K + g * output_nPlanes;
w += n * K + g * input_nPlanes * output_nPlanes;
Int KO = min(K, output_nPlanes - K * n);
TACC O[V];
......@@ -462,7 +472,7 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,1)
// blockDim=(K,K/V,1), gridDim=(nBlocks,M,nGroups)
template <typename T, Int K, Int V>
__global__ void
dDeconvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures,
......@@ -472,10 +482,12 @@ dDeconvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures,
// M = gridDim.y == input_nPlanes / K
Int N = (output_nPlanes + K - 1) / K;
Int m = blockIdx.y;
inFeatures += m * K;
dInFeatures += m * K;
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
Int g = blockIdx.z;
inFeatures += m * K + g * input_nPlanes;
dInFeatures += m * K + g * input_nPlanes;
dOutFeatures += g * output_nPlanes;
w += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
dw += m * K * output_nPlanes+ g * input_nPlanes * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m);
TACC dI[V];
......@@ -557,8 +569,8 @@ template <typename T>
double dDeconvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes,
Int input_stride, Int output_nPlanes,
Int output_stride) {
Int c = input_nPlanes * output_nPlanes;
Int output_stride, Int nGroups) {
Int c = input_nPlanes * output_nPlanes * nGroups;
double flops = 0;
if (input_nPlanes % 8 != 0 or output_nPlanes % 8 != 0) {
const int K = 16;
......@@ -566,14 +578,14 @@ double dDeconvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RULEBOOKITERATOR(
(dDeconvolution_KMxKN_forward2<
T, K,
V><<<dim3(128, (output_nPlanes + K - 1) / K), dim3(K, K / V)>>>(
V><<<dim3(128, (output_nPlanes + K - 1) / K, nGroups), dim3(K, K / V)>>>(
inFeatures, outFeatures, w, rbB, nHotB, input_nPlanes, input_stride,
output_nPlanes, output_stride));
, w += c; flops += nHotB * c;)
} else {
RULEBOOKITERATOR(dDeconvolution_forward(inFeatures, outFeatures, w, rbB,
nHotB, input_nPlanes, input_stride,
output_nPlanes, output_stride);
output_nPlanes, output_stride, nGroups);
, w += c; flops += nHotB * c;)
}
return flops;
......@@ -583,15 +595,15 @@ template <typename T>
void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
T *w, T *dw, RuleBook _rules,
Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride) {
Int c = input_nPlanes * output_nPlanes;
Int output_nPlanes, Int output_stride, Int nGroups) {
Int c = input_nPlanes * output_nPlanes * nGroups;
if (input_nPlanes % 8 != 0 or output_nPlanes % 8 != 0) {
const int K = 16;
const int V = 4;
RULEBOOKITERATOR(
(dDeconvolution_KMxKN_backward_dW2<
T, K,
V><<<dim3(128, (input_nPlanes + K - 1) / K), dim3(K, K / V)>>>(
V><<<dim3(128, (input_nPlanes + K - 1) / K, nGroups), dim3(K, K / V)>>>(
inFeatures, dInFeatures, dOutFeatures, w, dw, rbB, nHotB,
input_nPlanes, input_stride, output_nPlanes, output_stride));
, w += c; dw += c;)
......@@ -599,9 +611,8 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
RULEBOOKITERATOR(dDeconvolution_backward_dW(inFeatures, dInFeatures,
dOutFeatures, w, dw, rbB, nHotB,
input_nPlanes, input_stride,
output_nPlanes, output_stride);
output_nPlanes, output_stride, nGroups);
, w += c; dw += c;)
}
}
#undef TACC
\ No newline at end of file
......@@ -55,21 +55,21 @@ template void Convolution_bp_bias<float>(float *d_oF, float *d_b,
Int nPlanes, Int nActive);
template double dConvolution_forward2<float>(
float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride, Int nGroups);
template void dConvolution_backward_dW2<float>(
float *inFeatures, float *dInFeatures, float *dOutFeatures, float *w,
float *dw, RuleBook _rules, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride);
Int output_nPlanes, Int output_stride, Int nGroups);
template double dDeconvolution_forward2<float>(
float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride, Int nGroups);
template void dDeconvolution_backward_dW2<float>(
float *inFeatures, float *dInFeatures, float *dOutFeatures, float *w,
float *dw, RuleBook _rules, Int input_nPlanes, Int input_stride,
Int output_nPlanes, Int output_stride);
Int output_nPlanes, Int output_stride, Int nGroups);
template void InputLayer_fp<float>(float *input_features,
float *output_features, Int nRows,
......
......@@ -25,10 +25,10 @@ from .networkInNetwork import NetworkInNetwork
from .permutohedralSubmanifoldConvolution import PermutohedralSubmanifoldConvolution, permutohedral_basis
from .randomizedStrideConvolution import RandomizedStrideConvolution
from .randomizedStrideMaxPooling import RandomizedStrideMaxPooling
from .sequential import Sequential
from .sequential import Sequential, CheckpointedSequential
from .sparseConvNetTensor import SparseConvNetTensor
from .sparseToDense import SparseToDense
from .sparsify import Sparsify
from .sparsify import Sparsify, SparsifyFCS
from .spectral_norm import spectral_norm
from .submanifoldConvolution import SubmanifoldConvolution, ValidConvolution
from .tables import *
......
......@@ -41,7 +41,9 @@ class BatchNormalization(Module):
self.bias = Parameter(torch.Tensor(nPlanes).fill_(0))
def forward(self, input):
assert input.features.nelement() == 0 or input.features.size(1) == self.nPlanes, (self.nPlanes, input.features.shape)
if input.features.nelement() == 0:
return input
assert input.features.size(1) == self.nPlanes, (self.nPlanes, input.features.shape)
output = SparseConvNetTensor()
output.metadata = input.metadata
output.spatial_size = input.spatial_size
......
......@@ -11,17 +11,18 @@ from .utils import *
from .sparseConvNetTensor import SparseConvNetTensor
class Convolution(Module):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias, groups=1):
Module.__init__(self)
self.dimension = dimension
self.groups = groups
self.nIn = nIn
self.nOut = nOut
self.filter_size = toLongTensor(dimension, filter_size)
self.filter_volume = self.filter_size.prod().item()
self.filter_stride = toLongTensor(dimension, filter_stride)
std = (2.0 / nIn / self.filter_volume)**0.5
std = (2.0 * groups / nIn / self.filter_volume)**0.5
self.weight = Parameter(torch.Tensor(
self.filter_volume, nIn, nOut).normal_(
self.filter_volume, groups, nIn//groups, nOut//groups).normal_(
0,
std))
if bias:
......
......@@ -11,17 +11,18 @@ from .utils import *
from .sparseConvNetTensor import SparseConvNetTensor
class Deconvolution(Module):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias, groups=1):
Module.__init__(self)
self.dimension = dimension
self.groups = groups
self.nIn = nIn
self.nOut = nOut
self.filter_size = toLongTensor(dimension, filter_size)
self.filter_volume = self.filter_size.prod().item()
self.filter_stride = toLongTensor(dimension, filter_stride)
std = (2.0 / nIn / self.filter_volume)**0.5
std = (2.0 * groups / nIn / self.filter_volume)**0.5
self.weight = Parameter(torch.Tensor(
self.filter_volume, nIn, nOut).normal_(
self.filter_volume, groups, nIn//groups, nOut//groups).normal_(
0,
std))
if bias:
......
......@@ -12,17 +12,18 @@ from .sparseConvNetTensor import SparseConvNetTensor
from .metadata import Metadata
class FullConvolution(Module):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias, groups=1):
Module.__init__(self)
self.dimension = dimension
self.groups = groups
self.nIn = nIn
self.nOut = nOut
self.filter_size = toLongTensor(dimension, filter_size)
self.filter_volume = self.filter_size.prod().item()
self.filter_stride = toLongTensor(dimension, filter_stride)
std = (2.0 / nIn / self.filter_volume)**0.5
std = (2.0 * groups / nIn / self.filter_volume)**0.5
self.weight = Parameter(torch.Tensor(
self.filter_volume, nIn, nOut).normal_(
self.filter_volume, groups, nIn//groups, nOut//groups).normal_(
0,
std))
if bias:
......@@ -68,16 +69,16 @@ class FullConvolution(Module):
def __repr__(self):
s = 'FullConvolution ' + str(self.nIn) + '->' + str(self.nOut) + ' C'
if self.filter_size.max() == self.filter_size.min() and\
self.filter_stride.max() == self.filter_stride.min():
s = s + str(self.filter_size[0]) + '/' + str(self.filter_stride[0])
if self.filter_size.max().item() == self.filter_size.min().item() and\
self.filter_stride.max().item() == self.filter_stride.min().item():
s = s + str(self.filter_size[0].item()) + '/' + str(self.filter_stride[0].item())
else:
s = s + '(' + str(self.filter_size[0])
s = s + '(' + str(self.filter_size[0].item())
for i in self.filter_size[1:]:
s = s + ',' + str(i)
s = s + ')/(' + str(self.filter_stride[0])
s = s + ',' + str(i.item())
s = s + ')/(' + str(self.filter_stride[0].item())
for i in self.filter_stride[1:]:
s = s + ',' + str(i)
s = s + ',' + str(i.item())
s = s + ')'
return s
......
......@@ -310,3 +310,42 @@ def FullyConvolutionalNet(dimension, reps, nPlanes, residual_blocks=False, downs
return m
m = U(nPlanes)
return m
def FullConvolutionalNetIntegratedLinear(dimension, reps, nPlanes, nClasses=-1, residual=False, downsample=[2,2], leakiness=0):
if nClasses==-1:
nClasses=reps[0]
def l(x):
return x+nPlanes
def foo(m,np):
for _ in range(reps):
if residual_blocks: #ResNet style blocks
m.add(scn.ConcatTable()
.add(scn.Identity())
.add(scn.Sequential()
.add(scn.BatchNormLeakyReLU(np,leakiness=leakiness))
.add(scn.SubmanifoldConvolution(dimension, np, np, 3, False))
.add(scn.BatchNormLeakyReLU(np,leakiness=leakiness))
.add(scn.SubmanifoldConvolution(dimension, np, np, 3, False)))
).add(scn.AddTable())
else: #VGG style blocks
m.add(scn.BatchNormLeakyReLU(np,leakiness=leakiness)
).add(scn.SubmanifoldConvolution(dimension, np, np, 3, False))
def bar(m,nPlanes,bias):
m.add(scn.BatchNormLeakyReLU(nPlanes,leakiness=leakiness))
m.add(scn.NetworkInNetwork(nPlanes,nClasses,bias)) #accumulte softmax input, only one set of biases
def baz(depth,nPlanes):
m=scn.Sequential()
foo(m,nPlanes[0])
if len(nPlanes)==1:
bar(m,nPlanes[0],True)
else:
a=scn.Sequential()
bar(a,nPlanes,False)
b=scn.Sequential(
scn.BatchNormLeakyReLU(nPlanes,leakiness=leakiness),
scn.Convolution(dimension, nPlanes[0], nPlanes[1], downsample[0], downsample[1], False),
baz(nPlanes[1:]),
scn.UnPooling(dimension, downsample[0], downsample[1]))
m.add(ConcatTable(a,b))
m.add(scn.AddTable())
return baz(depth,nPlanes)
......@@ -22,17 +22,18 @@ class RandomizedStrideConvolution(Module):
sparseconvnet.Deconvolution module in an UNet style network, to restore
the input sparsity pattern.
"""
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias):
def __init__(self, dimension, nIn, nOut, filter_size, filter_stride, bias, groups=1):
Module.__init__(self)
self.dimension = dimension
self.groups = groups
self.nIn = nIn
self.nOut = nOut
self.filter_size = toLongTensor(dimension, filter_size)
self.filter_volume = self.filter_size.prod().item()
self.filter_stride = toLongTensor(dimension, filter_stride)
std = (2.0 / nIn / self.filter_volume)**0.5
std = (2.0 * groups / nIn / self.filter_volume)**0.5
self.weight = Parameter(torch.Tensor(
self.filter_volume, nIn, nOut).normal_(
self.filter_volume, groups, nIn//groups, nOut//groups).normal_(
0,
std))
if bias:
......
......@@ -4,7 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch, torch.utils.checkpoint
class Sequential(torch.nn.Sequential):
def input_spatial_size(self, out_size):
......@@ -15,6 +15,15 @@ class Sequential(torch.nn.Sequential):
def add(self, module):
self._modules[str(len(self._modules))] = module
return self
def insert(self, index, module):
for i in range(len(self._modules), index, -1):
self._modules[str(i)] = self._modules[str(i - 1)]
self._modules[str(index)] = module
def append(self, module):
self._modules[str(len(self._modules))] = module
return self
def reweight(self, input):
for module in self._modules.values():
......@@ -44,3 +53,9 @@ class Sequential(torch.nn.Sequential):
else:
input = module(input)
return input
class CheckpointedSequential(Sequential):
def forward(self, x):
def run(x):
return Sequential.forward(self,x)
return torch.utils.checkpoint.checkpoint(run, x)
......@@ -80,13 +80,16 @@ class Sparsify(Module):
self.net = Sequential(NetworkInNetwork(nIn,1,True),Sigmoid())
else:
self.net = NetworkInNetwork(nIn,1,True)
self.threshold=0.5 if activation else 0
def forward(self,input):
if input.features.numel():
output = SparseConvNetTensor()
output.spatial_size = input.spatial_size
output.metadata = Metadata(self.dimension)
output.mask = self.net(input).features.view(-1)
active = output.mask>(0.5 if self.activation else 0)
if self.threshold<0:
print(output.mask.mean(),output.mask.std())
active = output.mask>self.threshold
output.features=input.features[active]
active=active.cpu()
input.metadata.sparsifyMetadata(
......
......@@ -4,8 +4,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# 'SubmanifoldConvolution == SubmanifoldConvolution'
import sparseconvnet
import sparseconvnet.SCN
from torch.autograd import Function
......@@ -14,16 +12,17 @@ from .utils import *
from .sparseConvNetTensor import SparseConvNetTensor
class SubmanifoldConvolution(Module):
def __init__(self, dimension, nIn, nOut, filter_size, bias):
def __init__(self, dimension, nIn, nOut, filter_size, bias, groups=1):
Module.__init__(self)
self.dimension = dimension
self.groups = groups
self.nIn = nIn
self.nOut = nOut
self.filter_size = toLongTensor(dimension, filter_size)
self.filter_volume = self.filter_size.prod().item()
std = (2.0 / nIn / self.filter_volume)**0.5
std = (2.0 * groups / nIn / self.filter_volume)**0.5
self.weight = Parameter(torch.Tensor(
self.filter_volume, nIn, nOut
self.filter_volume, groups, nIn//groups, nOut//groups
).normal_(0, std))
if bias:
self.bias = Parameter(torch.Tensor(nOut).zero_())
......
......@@ -132,7 +132,7 @@ def prepare_BLInput(l,f):
for i, (ll, ff) in enumerate(zip(l,f)):
L[i,:ll.size(0),:].copy_(ll)
F[i,:ff.size(0),:].copy_(ff)
return (L,F)
return [L,F]
def checkpoint_restore(model,exp_name,name2,use_cuda=True,epoch=0):
if use_cuda:
......@@ -155,8 +155,10 @@ def checkpoint_restore(model,exp_name,name2,use_cuda=True,epoch=0):
def is_power2(num):
return num != 0 and ((num & (num - 1)) == 0)
def has_only_one_nonzero_digit(num): #https://oeis.org/A037124
return num != 0 and (num/10**math.floor(math.log(num,10))).is_integer()
def checkpoint_save(model,exp_name,name2,epoch, use_cuda=True):
f=exp_name+'-%09d-'%epoch+name2+'.pth'
model.cpu()
......@@ -170,24 +172,128 @@ def checkpoint_save(model,exp_name,name2,epoch, use_cuda=True):
if not is_power2(epoch):
os.remove(f)
def random_rotation(dimension=3):
return torch.qr(torch.randn(dimension,dimension))[0]
class LayerNormLeakyReLU(torch.nn.Module):
def __init__(self,num_features,leakiness):
torch.nn.Module.__init__(self)
self.leakiness=leakiness
self.in1d=torch.nn.LayerNorm(num_features)
def forward(self,x):
if x.features.numel():
x.features=self.in1d(x.features)
x.features=torch.nn.functional.leaky_relu(x.features,self.leakiness,inplace=True)
return x
def random_rotation(dimension=3,allow_mirror=False):
r=torch.qr(torch.randn(dimension,dimension))[0]
f=torch.randint(2,(3,))
if f.sum()%2==0 and not allow_mirror:
f=1-f
return r*(2*f-1).float()
def squareroot_rotation(a):
import scipy.spatial
b=scipy.spatial.transform.Slerp(
[0,1],
scipy.spatial.transform.Rotation.from_dcm(torch.stack([torch.eye(3),a])))([0.5]).as_dcm()
return torch.from_numpy(b).float()[0]
def voxelize_pointcloud(xyz,rgb,average=True,accumulate=False):
if xyz.numel()==0:
return xyz, rgb
if average or accumulate:
xyz,inv,counts=np.unique(xyz.numpy(),axis=0,return_inverse=True,return_counts=True)
xyz=torch.from_numpy(xyz)
inv=torch.from_numpy(inv)
rgb_out=torch.zeros(xyz.size(0),rgb.size(1),dtype=torch.float32)
rgb_out.index_add_(0,inv,rgb)
if average:
rgb=rgb_out/torch.from_numpy(counts[:,None]).float()
return xyz, rgb
else:
xyz,idxs=np.unique(xyz,axis=0,return_index=True)
xyz=torch.from_numpy(xyz)
rgb=rgb[idxs]
return xyz, rgb
class checkpointFunction(torch.autograd.Function):
@staticmethod
def forward(ctx, run_function, x_features, x_metadata, x_spatial_size):
ctx.run_function = run_function
ctx.save_for_backward(x_features, x_spatial_size)
ctx.x_metadata=x_metadata
with torch.no_grad():
y = run_function(
scn.SparseConvNetTensor
(x_features, x_metadata, x_spatial_size))
return y.features
@staticmethod
def backward(ctx, grad_y_features):
x_features, x_spatial_size = ctx.saved_tensors
x_features = x_features.detach()
x_features.requires_grad = True
with torch.enable_grad():
y = ctx.run_function(
scn.SparseConvNetTensor
(x_features, ctx.x_metadata, x_spatial_size))
torch.autograd.backward(y.features, grad_y_features,retain_graph=False)
return None, x_features.grad, None, None
def checkpoint101(run_function, x, down=1):
f=checkpointFunction.apply(run_function, x.features, x.metadata, x.spatial_size)
s=x.spatial_size//down
return scn.SparseConvNetTensor(f, x.metadata, s)
def matplotlib_cubes(ax, positions,colors):
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
"""
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
ax = fig.gca(projection='3d')
...
plt.show()
"""
try:
positions=positions.numpy()
colors=colors.numpy()
X = np.array([[[0, 1, 0], [0, 0, 0], [1, 0, 0], [1, 1, 0]],
[[0, 0, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0]],
[[1, 0, 1], [1, 0, 0], [1, 1, 0], [1, 1, 1]],
[[0, 0, 1], [0, 0, 0], [0, 1, 0], [0, 1, 1]],
[[0, 1, 0], [0, 1, 1], [1, 1, 1], [1, 1, 0]],
[[0, 1, 1], [0, 0, 1], [1, 0, 1], [1, 1, 1]]]).astype(np.float32)[None]-0.5
X=X+positions[:,None,None,:]
X.resize(X.shape[0]*6,4,3)
m=positions.min(0)
M=positions.max(0)+1
ax.set_xlim([m[0],M[0]])
ax.set_ylim([m[1],M[1]])
ax.set_zlim([m[2],M[2]])
ax.add_collection3d(Poly3DCollection(X,
facecolors=np.repeat(colors,6, axis=0)))
except:
print('matplotlibcubes fail!?!')
pass
ax.set_axis_off()
def matplotlib_planes(ax, positions,colors):
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
"""
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
ax = fig.gca(projection='3d')
...
plt.show()
"""
try:
positions=positions.numpy()
colors=colors.numpy()
X = np.array([[[0, -0.5, 0.5], [0, -0.5, -0.5], [0, 0.5, -0.5], [0, 0.5, 0.5]]]).astype(np.float32)[None]
X=X+positions[:,None,None,:]
X.resize(X.shape[0]*1,4,3)
m=positions.min(0)
M=positions.max(0)+1
ax.set_xlim([m[0],M[0]])
ax.set_ylim([m[1],M[1]])
ax.set_zlim([m[2],M[2]])
ax.add_collection3d(Poly3DCollection(X,
facecolors=np.repeat(colors,1, axis=0)))
except:
pass
ax.set_axis_off()
def visdom_scatter(vis, xyz, rgb, win='3d', markersize=3):
vis.scatter(
xyz,
opts={'markersize': markersize,'markercolor': rgb},
win=win)
def voxelize_pointcloud(xyz,rgb):
xyz,inv,counts=np.unique(xyz.long().numpy(),axis=0,return_inverse=True,return_counts=True)
xyz=torch.from_numpy(xyz)
inv=torch.from_numpy(inv)
rgb_out=torch.zeros(xyz.size(0),rgb.size(1),dtype=torch.float32)
rgb_out.index_add_(0,inv,rgb)
return xyz, rgb_out/torch.from_numpy(counts[:,None]).float()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment