Unverified Commit 2082f213 authored by Ben Graham's avatar Ben Graham Committed by GitHub
Browse files

Merge pull request #118 from facebookresearch/references

Use references where possible
parents 1171aae3 d8c8a060
...@@ -8,12 +8,12 @@ ...@@ -8,12 +8,12 @@
template <typename T> template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features, void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) { const RuleBook &rules, bool average) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < batchSize; outSite++) { for (outSite = 0; outSite < batchSize; outSite++) {
T *out = &output_features[outSite * nPlanes]; T *out = &output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)]; const Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) { while (nActive-- > 0) {
...@@ -26,12 +26,12 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features, ...@@ -26,12 +26,12 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
template <typename T> template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) { const RuleBook &rules, bool average) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < batchSize; outSite++) { for (outSite = 0; outSite < batchSize; outSite++) {
T *out = &d_output_features[outSite * nPlanes]; T *out = &d_output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)]; const Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) { while (nActive-- > 0) {
...@@ -44,12 +44,12 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -44,12 +44,12 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_ActivePooling_updateOutput( void cpu_ActivePooling_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, bool average) { /*float*/ at::Tensor &output_features, bool average) {
Int nPlanes = input_features.size(1); Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize); const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0]; Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1]; Int maxActive = _rules[1][1];
output_features.resize_({batchSize, nPlanes}); output_features.resize_({batchSize, nPlanes});
...@@ -62,13 +62,13 @@ void cpu_ActivePooling_updateOutput( ...@@ -62,13 +62,13 @@ void cpu_ActivePooling_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_ActivePooling_updateGradInput( void cpu_ActivePooling_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, bool average) { /*float*/ at::Tensor &d_output_features, bool average) {
Int nPlanes = input_features.size(1); Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize); const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0]; Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1]; Int maxActive = _rules[1][1];
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
......
...@@ -69,9 +69,9 @@ void AffineReluTrivialConvolution_BackwardPass( ...@@ -69,9 +69,9 @@ void AffineReluTrivialConvolution_BackwardPass(
template <typename T> template <typename T>
double cpu_AffineReluTrivialConvolution_updateOutput( double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features, /*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor affineWeight, /*float*/ at::Tensor &affineWeight,
/*float*/ at::Tensor affineBias, /*float*/ at::Tensor convWeight) { /*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)}); output_features.resize_({input_features.size(0), convWeight.size(1)});
AffineReluTrivialConvolution_ForwardPass( AffineReluTrivialConvolution_ForwardPass(
input_features.data<T>(), convWeight.size(0), input_features.stride(0), input_features.data<T>(), convWeight.size(0), input_features.stride(0),
...@@ -84,11 +84,12 @@ double cpu_AffineReluTrivialConvolution_updateOutput( ...@@ -84,11 +84,12 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
template <typename T> template <typename T>
void cpu_AffineReluTrivialConvolution_backward( void cpu_AffineReluTrivialConvolution_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor affineWeight, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_affineWeight, /*float*/ at::Tensor affineBias, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &affineWeight,
/*float*/ at::Tensor d_affineBias, /*float*/ at::Tensor &d_affineWeight, /*float*/ at::Tensor &affineBias,
/*float*/ at::Tensor convWeight, /*float*/ at::Tensor d_convWeight, /*float*/ at::Tensor &d_affineBias,
/*float*/ at::Tensor &convWeight, /*float*/ at::Tensor &d_convWeight,
bool additiveGrad) { bool additiveGrad) {
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
template <typename T> template <typename T>
void AveragePooling_ForwardPass(T *input_features, T *output_features, void AveragePooling_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride, Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot, Int output_stride, const Int *rules, Int nHot,
Int filterVolume) { Int filterVolume) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -21,7 +21,7 @@ void AveragePooling_ForwardPass(T *input_features, T *output_features, ...@@ -21,7 +21,7 @@ void AveragePooling_ForwardPass(T *input_features, T *output_features,
template <typename T> template <typename T>
void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features, void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride, Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot, Int output_stride, const Int *rules, Int nHot,
Int filterVolume) { Int filterVolume) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -36,14 +36,14 @@ void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -36,14 +36,14 @@ void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_AveragePooling_updateOutput( void cpu_AveragePooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
...@@ -52,7 +52,7 @@ void cpu_AveragePooling_updateOutput( ...@@ -52,7 +52,7 @@ void cpu_AveragePooling_updateOutput(
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data<T>();
for (auto &r : _rules) { for (const auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.stride(0), AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.stride(0),
output_features.stride(0), &r[0], nHot, output_features.stride(0), &r[0], nHot,
...@@ -61,15 +61,15 @@ void cpu_AveragePooling_updateOutput( ...@@ -61,15 +61,15 @@ void cpu_AveragePooling_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_AveragePooling_updateGradInput( void cpu_AveragePooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
...@@ -77,7 +77,7 @@ void cpu_AveragePooling_updateGradInput( ...@@ -77,7 +77,7 @@ void cpu_AveragePooling_updateGradInput(
auto diF = d_input_features.data<T>() + nFeaturesToDrop; auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data<T>();
for (auto &r : _rules) { for (const auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.stride(0), AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.stride(0),
d_output_features.stride(0), &r[0], nHot, d_output_features.stride(0), &r[0], nHot,
...@@ -86,8 +86,8 @@ void cpu_AveragePooling_updateGradInput( ...@@ -86,8 +86,8 @@ void cpu_AveragePooling_updateGradInput(
} }
template <typename T> template <typename T>
void cpu_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context, void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor Context) { at::Tensor &Context) {
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
Int nPlanes = context.size(1); Int nPlanes = context.size(1);
auto iF = context.data<T>(); auto iF = context.data<T>();
...@@ -102,9 +102,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context, ...@@ -102,9 +102,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
} }
} }
template <typename T> template <typename T>
void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor rules, void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor dcontext, at::Tensor &dcontext,
at::Tensor dContext) { at::Tensor &dContext) {
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
Int nPlanes = dcontext.size(1); Int nPlanes = dcontext.size(1);
auto iF = dcontext.data<T>(); auto iF = dcontext.data<T>();
......
...@@ -108,11 +108,11 @@ void BatchNormalization_BackwardPass(T *input_features, T *d_input_features, ...@@ -108,11 +108,11 @@ void BatchNormalization_BackwardPass(T *input_features, T *d_input_features,
template <typename T> template <typename T>
void cpu_BatchNormalization_updateOutput( void cpu_BatchNormalization_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features, /*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor saveMean, /*float*/ at::Tensor &saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean, /*float*/ at::Tensor &saveInvStd, /*float*/ at::Tensor &runningMean,
/*float*/ at::Tensor runningVar, /*float*/ at::Tensor &runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum, /*float*/ at::Tensor &weight, /*float*/ at::Tensor &bias, T eps, T momentum,
bool train, T leakiness) { bool train, T leakiness) {
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
...@@ -131,13 +131,14 @@ void cpu_BatchNormalization_updateOutput( ...@@ -131,13 +131,14 @@ void cpu_BatchNormalization_updateOutput(
template <typename T> template <typename T>
void cpu_BatchNormalization_backward( void cpu_BatchNormalization_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor saveMean, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &saveMean,
/*float*/ at::Tensor runningVar, /*float*/ at::Tensor &saveInvStd, /*float*/ at::Tensor &runningMean,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, /*float*/ at::Tensor &runningVar,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias, T leakiness) { /*float*/ at::Tensor &weight, /*float*/ at::Tensor &bias,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias, T leakiness) {
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
...@@ -155,4 +156,3 @@ void cpu_BatchNormalization_backward( ...@@ -155,4 +156,3 @@ void cpu_BatchNormalization_backward(
leakiness); leakiness);
} }
} }
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
template <typename T> template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateOutput( void cpu_BatchwiseMultiplicativeDropout_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features, /*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor noise, T alpha) { /*float*/ at::Tensor &noise, T alpha) {
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
...@@ -21,8 +21,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput( ...@@ -21,8 +21,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
} }
template <typename T> template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateGradInput( void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor noise, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &noise,
T alpha) { T alpha) {
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
// rows x groups x planes -> groups x rows x planes // rows x groups x planes -> groups x rows x planes
template <typename T> template <typename T>
at::Tensor rule_index_select(at::Tensor src, Int nRules, Int *rules, at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
Int groups) { Int groups) {
auto planes = src.size(1) / groups; auto planes = src.size(1) / groups;
auto target = at::empty({groups, nRules, planes}, src.options()); auto target = at::empty({groups, nRules, planes}, src.options());
auto s_ptr = src.data<T>(); auto s_ptr = src.data<T>();
...@@ -26,8 +26,8 @@ at::Tensor rule_index_select(at::Tensor src, Int nRules, Int *rules, ...@@ -26,8 +26,8 @@ at::Tensor rule_index_select(at::Tensor src, Int nRules, Int *rules,
// groups x rows x planes -> rows x groups x planes // groups x rows x planes -> rows x groups x planes
template <typename T> template <typename T>
void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules, Int *rules, void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules,
Int groups) { const Int *rules, Int groups) {
auto planes = target.size(1) / groups; auto planes = target.size(1) / groups;
auto s_ptr = src.data<T>(); auto s_ptr = src.data<T>();
auto t_ptr = target.data<T>(); auto t_ptr = target.data<T>();
...@@ -44,13 +44,13 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules, Int *rules, ...@@ -44,13 +44,13 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules, Int *rules,
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_Convolution_updateOutput( double cpu_Convolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true); m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
...@@ -64,7 +64,7 @@ double cpu_Convolution_updateOutput( ...@@ -64,7 +64,7 @@ double cpu_Convolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -80,15 +80,15 @@ double cpu_Convolution_updateOutput( ...@@ -80,15 +80,15 @@ double cpu_Convolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_Convolution_backward( void cpu_Convolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true); m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
...@@ -98,7 +98,7 @@ void cpu_Convolution_backward( ...@@ -98,7 +98,7 @@ void cpu_Convolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
...@@ -116,13 +116,13 @@ void cpu_Convolution_backward( ...@@ -116,13 +116,13 @@ void cpu_Convolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_SubmanifoldConvolution_updateOutput( double cpu_SubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor weight, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true); const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
if (bias.numel() and nActive) if (bias.numel() and nActive)
...@@ -135,7 +135,7 @@ double cpu_SubmanifoldConvolution_updateOutput( ...@@ -135,7 +135,7 @@ double cpu_SubmanifoldConvolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -151,15 +151,15 @@ double cpu_SubmanifoldConvolution_updateOutput( ...@@ -151,15 +151,15 @@ double cpu_SubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_SubmanifoldConvolution_backward( void cpu_SubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true); const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
...@@ -168,7 +168,7 @@ void cpu_SubmanifoldConvolution_backward( ...@@ -168,7 +168,7 @@ void cpu_SubmanifoldConvolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
...@@ -186,12 +186,12 @@ void cpu_SubmanifoldConvolution_backward( ...@@ -186,12 +186,12 @@ void cpu_SubmanifoldConvolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_PermutohedralSubmanifoldConvolution_updateOutput( double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor weight, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true); const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
if (bias.numel() and nActive) if (bias.numel() and nActive)
...@@ -204,7 +204,7 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput( ...@@ -204,7 +204,7 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -220,14 +220,14 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput( ...@@ -220,14 +220,14 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_PermutohedralSubmanifoldConvolution_backward( void cpu_PermutohedralSubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true); const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
...@@ -236,7 +236,7 @@ void cpu_PermutohedralSubmanifoldConvolution_backward( ...@@ -236,7 +236,7 @@ void cpu_PermutohedralSubmanifoldConvolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
...@@ -254,15 +254,15 @@ void cpu_PermutohedralSubmanifoldConvolution_backward( ...@@ -254,15 +254,15 @@ void cpu_PermutohedralSubmanifoldConvolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_FullConvolution_updateOutput( double cpu_FullConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut, Metadata<Dimension> &mOut,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor weight, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize, const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut); filterSize, filterStride, mOut);
Int nActive = mOut.getNActive(outputSize); Int nActive = mOut.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
...@@ -276,7 +276,7 @@ double cpu_FullConvolution_updateOutput( ...@@ -276,7 +276,7 @@ double cpu_FullConvolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -292,17 +292,17 @@ double cpu_FullConvolution_updateOutput( ...@@ -292,17 +292,17 @@ double cpu_FullConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_FullConvolution_backward( void cpu_FullConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut, Metadata<Dimension> &mOut,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize, const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut); filterSize, filterStride, mOut);
Int nActive = mOut.getNActive(inputSize); Int nActive = mOut.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
...@@ -312,7 +312,7 @@ void cpu_FullConvolution_backward( ...@@ -312,7 +312,7 @@ void cpu_FullConvolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
...@@ -330,13 +330,13 @@ void cpu_FullConvolution_backward( ...@@ -330,13 +330,13 @@ void cpu_FullConvolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_RandomizedStrideConvolution_updateOutput( double cpu_RandomizedStrideConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true); filterStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
...@@ -350,7 +350,7 @@ double cpu_RandomizedStrideConvolution_updateOutput( ...@@ -350,7 +350,7 @@ double cpu_RandomizedStrideConvolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -366,15 +366,15 @@ double cpu_RandomizedStrideConvolution_updateOutput( ...@@ -366,15 +366,15 @@ double cpu_RandomizedStrideConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_RandomizedStrideConvolution_backward( void cpu_RandomizedStrideConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true); filterStride, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
...@@ -384,7 +384,7 @@ void cpu_RandomizedStrideConvolution_backward( ...@@ -384,7 +384,7 @@ void cpu_RandomizedStrideConvolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
......
...@@ -6,13 +6,13 @@ ...@@ -6,13 +6,13 @@
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cpu_Deconvolution_updateOutput( double cpu_Deconvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true); m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)}); output_features.resize_({nActive, weight.size(1) * weight.size(3)});
...@@ -26,7 +26,7 @@ double cpu_Deconvolution_updateOutput( ...@@ -26,7 +26,7 @@ double cpu_Deconvolution_updateOutput(
auto ip = weight.size(2); auto ip = weight.size(2);
auto op = weight.size(3); auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
flops += nRules * ip * op * groups; flops += nRules * ip * op * groups;
...@@ -42,15 +42,15 @@ double cpu_Deconvolution_updateOutput( ...@@ -42,15 +42,15 @@ double cpu_Deconvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_Deconvolution_backward( void cpu_Deconvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight, /*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true); m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
...@@ -60,7 +60,7 @@ void cpu_Deconvolution_backward( ...@@ -60,7 +60,7 @@ void cpu_Deconvolution_backward(
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) { for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i]; const auto &r = _rules[i];
Int nRules = r.size() / 2; Int nRules = r.size() / 2;
if (nRules) { if (nRules) {
auto w = weight.select(0, i); auto w = weight.select(0, i);
......
...@@ -48,10 +48,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -48,10 +48,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
/*long*/ at::Tensor spatialSize, /*long*/ at::Tensor &spatialSize,
/*long*/ at::Tensor input_coords, /*long*/ at::Tensor &input_coords,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
long batchSize, long mode) { long batchSize, long mode) {
m.inputLayer(spatialSize, input_coords, batchSize, mode); m.inputLayer(spatialSize, input_coords, batchSize, mode);
...@@ -72,8 +72,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -72,8 +72,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m, void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features) { /*float*/ at::Tensor &d_output_features) {
auto &rules = m.inputLayerRuleBook; auto &rules = m.inputLayerRuleBook;
auto nPlanes = d_output_features.size(1); auto nPlanes = d_output_features.size(1);
...@@ -94,8 +94,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -94,8 +94,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m, void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features) { /*float*/ at::Tensor &output_features) {
auto &rules = m.inputLayerRuleBook; auto &rules = m.inputLayerRuleBook;
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
...@@ -115,8 +115,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -115,8 +115,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m, void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features) { /*float*/ at::Tensor &d_output_features) {
auto &rules = m.inputLayerRuleBook; auto &rules = m.inputLayerRuleBook;
auto nPlanes = d_output_features.size(1); auto nPlanes = d_output_features.size(1);
...@@ -137,10 +137,10 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -137,10 +137,10 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m, void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
/*long*/ at::Tensor spatialSize, /*long*/ at::Tensor &spatialSize,
/*long*/ at::Tensor input_coords, /*long*/ at::Tensor &input_coords,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
long mode) { long mode) {
m.blLayer(spatialSize, input_coords, mode); m.blLayer(spatialSize, input_coords, mode);
...@@ -162,8 +162,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -162,8 +162,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m, void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features) { /*float*/ at::Tensor &d_output_features) {
auto &rules = m.blLayerRuleBook; auto &rules = m.blLayerRuleBook;
auto nPlanes = d_output_features.size(1); auto nPlanes = d_output_features.size(1);
...@@ -186,8 +186,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -186,8 +186,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m, void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features) { /*float*/ at::Tensor &output_features) {
auto &rules = m.blLayerRuleBook; auto &rules = m.blLayerRuleBook;
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
...@@ -207,9 +207,10 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -207,9 +207,10 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
} }
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m, void cpu_BLOutputLayer_updateGradInput(
/*float*/ at::Tensor d_input_features, Metadata<Dimension> &m,
/*float*/ at::Tensor d_output_features) { /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
auto &rules = m.blLayerRuleBook; auto &rules = m.blLayerRuleBook;
auto nPlanes = d_output_features.size(2); auto nPlanes = d_output_features.size(2);
......
...@@ -5,8 +5,9 @@ ...@@ -5,8 +5,9 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
template <typename T> template <typename T>
void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features, void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, T alpha) { /*float*/ at::Tensor &output_features,
T alpha) {
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto iF = input_features.data<T>(); auto iF = input_features.data<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data<T>();
...@@ -19,9 +20,9 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features, ...@@ -19,9 +20,9 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features,
} }
} }
template <typename T> template <typename T>
void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor input_features, void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor &d_output_features,
T alpha) { T alpha) {
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto iF = input_features.data<T>(); auto iF = input_features.data<T>();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
template <typename T> template <typename T>
void MaxPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes, void MaxPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
Int input_stride, Int output_stride, Int *rules, Int input_stride, Int output_stride, const Int *rules,
Int nHot) { Int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -22,7 +22,7 @@ template <typename T> ...@@ -22,7 +22,7 @@ template <typename T>
void MaxPooling_BackwardPass(T *input_features, T *d_input_features, void MaxPooling_BackwardPass(T *input_features, T *d_input_features,
T *output_features, T *d_output_features, T *output_features, T *d_output_features,
Int nPlanes, Int input_stride, Int output_stride, Int nPlanes, Int input_stride, Int output_stride,
Int *rules, Int nHot) { const Int *rules, Int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) { for (outSite = 0; outSite < nHot; outSite++) {
...@@ -36,14 +36,14 @@ void MaxPooling_BackwardPass(T *input_features, T *d_input_features, ...@@ -36,14 +36,14 @@ void MaxPooling_BackwardPass(T *input_features, T *d_input_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_MaxPooling_updateOutput( void cpu_MaxPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
...@@ -60,15 +60,16 @@ void cpu_MaxPooling_updateOutput( ...@@ -60,15 +60,16 @@ void cpu_MaxPooling_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_MaxPooling_updateGradInput( void cpu_MaxPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor output_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
...@@ -87,14 +88,14 @@ void cpu_MaxPooling_updateGradInput( ...@@ -87,14 +88,14 @@ void cpu_MaxPooling_updateGradInput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_RandomizedStrideMaxPooling_updateOutput( void cpu_RandomizedStrideMaxPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
poolStride, true); poolStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
...@@ -111,15 +112,16 @@ void cpu_RandomizedStrideMaxPooling_updateOutput( ...@@ -111,15 +112,16 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_RandomizedStrideMaxPooling_updateGradInput( void cpu_RandomizedStrideMaxPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor output_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
poolStride, true); poolStride, true);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
......
...@@ -5,10 +5,10 @@ ...@@ -5,10 +5,10 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
template <typename T> template <typename T>
double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features, double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor weight, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor bias) { /*float*/ at::Tensor &bias) {
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto input_nPlanes = weight.size(0); auto input_nPlanes = weight.size(0);
auto output_nPlanes = weight.size(1); auto output_nPlanes = weight.size(1);
...@@ -23,9 +23,9 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features, ...@@ -23,9 +23,9 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
} }
template <typename T> template <typename T>
void cpu_NetworkInNetwork_updateGradInput( void cpu_NetworkInNetwork_updateGradInput(
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor &d_output_features,
/*float*/ at::Tensor weight) { /*float*/ at::Tensor &weight) {
int nActive = d_output_features.size(0); int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)}); d_input_features.resize_({nActive, weight.size(0)});
...@@ -35,9 +35,9 @@ void cpu_NetworkInNetwork_updateGradInput( ...@@ -35,9 +35,9 @@ void cpu_NetworkInNetwork_updateGradInput(
} }
template <typename T> template <typename T>
void cpu_NetworkInNetwork_accGradParameters( void cpu_NetworkInNetwork_accGradParameters(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor &d_output_features,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) { /*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
if (nActive and d_bias.numel()) if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false); at::sum_out(d_bias, d_output_features, {0}, false);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
template <typename T> template <typename T>
void SparseToDense_ForwardPass(T *input_features, T *output_features, void SparseToDense_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int spatialVolume, Int *rules, Int nPlanes, Int spatialVolume, const Int *rules,
int nHot) { int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -20,7 +20,7 @@ void SparseToDense_ForwardPass(T *input_features, T *output_features, ...@@ -20,7 +20,7 @@ void SparseToDense_ForwardPass(T *input_features, T *output_features,
template <typename T> template <typename T>
void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features, void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int spatialVolume, Int *rules, Int nPlanes, Int spatialVolume, const Int *rules,
int nHot) { int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -34,9 +34,9 @@ void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -34,9 +34,9 @@ void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_SparseToDense_updateOutput( void cpu_SparseToDense_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, long nPlanes) { /*float*/ at::Tensor &output_features, long nPlanes) {
{ {
std::array<long, Dimension + 2> sz; std::array<long, Dimension + 2> sz;
...@@ -49,7 +49,7 @@ void cpu_SparseToDense_updateOutput( ...@@ -49,7 +49,7 @@ void cpu_SparseToDense_updateOutput(
output_features.zero_(); output_features.zero_();
} }
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
auto _rules = m.getSparseToDenseRuleBook(inputSize, true); const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
Int _nPlanes = input_features.size(1); Int _nPlanes = input_features.size(1);
auto iF = input_features.data<T>(); auto iF = input_features.data<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data<T>();
...@@ -64,15 +64,15 @@ void cpu_SparseToDense_updateOutput( ...@@ -64,15 +64,15 @@ void cpu_SparseToDense_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_SparseToDense_updateGradInput( void cpu_SparseToDense_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features) { /*float*/ at::Tensor &d_output_features) {
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
auto _rules = m.getSparseToDenseRuleBook(inputSize, true); const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
long spatialVolume = inputSize.prod().data<long>()[0]; long spatialVolume = inputSize.prod().data<long>()[0];
Int _nPlanes = d_input_features.size(1); Int _nPlanes = d_input_features.size(1);
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data<T>();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
template <typename T> template <typename T>
void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes, void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
Int input_stride, Int output_stride, Int *rules, Int input_stride, Int output_stride, const Int *rules,
Int nHot) { Int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
...@@ -20,7 +20,7 @@ void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes, ...@@ -20,7 +20,7 @@ void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
template <typename T> template <typename T>
void UnPooling_BackwardPass(T *d_input_features, T *d_output_features, void UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride, Int output_stride, Int nPlanes, Int input_stride, Int output_stride,
Int *rules, Int nHot) { const Int *rules, Int nHot) {
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) { for (outSite = 0; outSite < nHot; outSite++) {
...@@ -33,14 +33,14 @@ void UnPooling_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -33,14 +33,14 @@ void UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_UnPooling_updateOutput( void cpu_UnPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features, /*float*/ at::Tensor &input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true); m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
...@@ -57,14 +57,14 @@ void cpu_UnPooling_updateOutput( ...@@ -57,14 +57,14 @@ void cpu_UnPooling_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cpu_UnPooling_updateGradInput( void cpu_UnPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) { /*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = d_input_features.size(1) - nFeaturesToDrop; Int nPlanes = d_input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true); m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
auto diF = d_input_features.data<T>() + nFeaturesToDrop; auto diF = d_input_features.data<T>() + nFeaturesToDrop;
......
...@@ -7,21 +7,21 @@ ...@@ -7,21 +7,21 @@
template <typename T> template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features, void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average); const Int *rules, bool average);
template <typename T> template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average); const Int *rules, bool average);
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_ActivePooling_updateOutput( void cuda_ActivePooling_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, bool average) { /*cuda float*/ at::Tensor &output_features, bool average) {
Int nPlanes = input_features.size(1); Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize); const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0]; Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1]; Int maxActive = _rules[1][1];
output_features.resize_({batchSize, nPlanes}); output_features.resize_({batchSize, nPlanes});
...@@ -34,13 +34,13 @@ void cuda_ActivePooling_updateOutput( ...@@ -34,13 +34,13 @@ void cuda_ActivePooling_updateOutput(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_ActivePooling_updateGradInput( void cuda_ActivePooling_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, bool average) { /*cuda float*/ at::Tensor &d_output_features, bool average) {
Int nPlanes = input_features.size(1); Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize); const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0]; Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1]; Int maxActive = _rules[1][1];
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
......
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
template <typename T> template <typename T>
__global__ void ActivePooling_fp(T *input_features, T *output_features, __global__ void ActivePooling_fp(T *input_features, T *output_features,
Int maxActive, Int nPlanes, Int *rules, Int maxActive, Int nPlanes, const Int *rules,
bool average) { bool average) {
T *out = &output_features[blockIdx.x * nPlanes]; T *out = &output_features[blockIdx.x * nPlanes];
Int *r = &rules[blockIdx.x * (maxActive + 1)]; const Int *r = &rules[blockIdx.x * (maxActive + 1)];
Int nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) { while (nActive-- > 0) {
...@@ -21,7 +21,7 @@ __global__ void ActivePooling_fp(T *input_features, T *output_features, ...@@ -21,7 +21,7 @@ __global__ void ActivePooling_fp(T *input_features, T *output_features,
template <typename T> template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features, void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average) { const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data<Int>();
...@@ -41,10 +41,10 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features, ...@@ -41,10 +41,10 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
} }
template <typename T> template <typename T>
__global__ void ActivePooling_bp(T *d_input_features, T *d_output_features, __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
Int maxActive, Int nPlanes, Int *rules, Int maxActive, Int nPlanes, const Int *rules,
bool average) { bool average) {
T *out = &d_output_features[blockIdx.x * nPlanes]; T *out = &d_output_features[blockIdx.x * nPlanes];
Int *r = &rules[blockIdx.x * (maxActive + 1)]; const Int *r = &rules[blockIdx.x * (maxActive + 1)];
Int nActive = *r++; Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) { while (nActive-- > 0) {
...@@ -57,7 +57,7 @@ __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features, ...@@ -57,7 +57,7 @@ __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
template <typename T> template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average) { const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1)); Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
......
...@@ -24,11 +24,11 @@ void dAffineReluTrivialConvolution_backward_dW( ...@@ -24,11 +24,11 @@ void dAffineReluTrivialConvolution_backward_dW(
template <typename T> template <typename T>
double cuda_AffineReluTrivialConvolution_updateOutput( double cuda_AffineReluTrivialConvolution_updateOutput(
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor &output_features,
/*cuda float*/ at::Tensor affineWeight, /*cuda float*/ at::Tensor &affineWeight,
/*cuda float*/ at::Tensor affineBias, /*cuda float*/ at::Tensor &affineBias,
/*cuda float*/ at::Tensor convWeight) { /*cuda float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)}); output_features.resize_({input_features.size(0), convWeight.size(1)});
dAffineReluTrivialConvolution_forward<T>( dAffineReluTrivialConvolution_forward<T>(
...@@ -42,15 +42,15 @@ double cuda_AffineReluTrivialConvolution_updateOutput( ...@@ -42,15 +42,15 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
template <typename T> template <typename T>
void cuda_AffineReluTrivialConvolution_backward( void cuda_AffineReluTrivialConvolution_backward(
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor affineWeight, /*cuda float*/ at::Tensor &affineWeight,
/*cuda float*/ at::Tensor d_affineWeight, /*cuda float*/ at::Tensor &d_affineWeight,
/*cuda float*/ at::Tensor affineBias, /*cuda float*/ at::Tensor &affineBias,
/*cuda float*/ at::Tensor d_affineBias, /*cuda float*/ at::Tensor &d_affineBias,
/*cuda float*/ at::Tensor convWeight, /*cuda float*/ at::Tensor &convWeight,
/*cuda float*/ at::Tensor d_convWeight, bool additiveGrad) { /*cuda float*/ at::Tensor &d_convWeight, bool additiveGrad) {
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
dAffineReluTrivialConvolution_backward_dW<T>( dAffineReluTrivialConvolution_backward_dW<T>(
......
...@@ -18,14 +18,14 @@ void cuda_AveragePooling_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -18,14 +18,14 @@ void cuda_AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_AveragePooling_updateOutput( void cuda_AveragePooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, long nFeaturesToDrop) { /*cuda float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize); Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
...@@ -40,15 +40,15 @@ void cuda_AveragePooling_updateOutput( ...@@ -40,15 +40,15 @@ void cuda_AveragePooling_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_AveragePooling_updateGradInput( void cuda_AveragePooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor poolSize, /*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m, /*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, long nFeaturesToDrop) { /*cuda float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop; Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true); m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
...@@ -70,8 +70,8 @@ void cuda_CopyFeaturesHelper_BackwardPass(T *d_input_features, ...@@ -70,8 +70,8 @@ void cuda_CopyFeaturesHelper_BackwardPass(T *d_input_features,
Int nPlanes, Int nHot); Int nPlanes, Int nHot);
template <typename T> template <typename T>
void cuda_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context, void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor Context) { at::Tensor &Context) {
Int nPlanes = context.size(1); Int nPlanes = context.size(1);
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
...@@ -80,9 +80,9 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context, ...@@ -80,9 +80,9 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
} }
template <typename T> template <typename T>
void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor rules, void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor dcontext, at::Tensor &dcontext,
at::Tensor dContext) { at::Tensor &dContext) {
Int nPlanes = dcontext.size(1); Int nPlanes = dcontext.size(1);
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
......
...@@ -13,8 +13,8 @@ void bmd_b(T *input_features, T *d_input_features, T *d_output_features, ...@@ -13,8 +13,8 @@ void bmd_b(T *input_features, T *d_input_features, T *d_output_features,
template <typename T> template <typename T>
void cuda_BatchwiseMultiplicativeDropout_updateOutput( void cuda_BatchwiseMultiplicativeDropout_updateOutput(
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor noise, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &noise,
T alpha) { T alpha) {
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
...@@ -25,10 +25,10 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput( ...@@ -25,10 +25,10 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
template <typename T> template <typename T>
void cuda_BatchwiseMultiplicativeDropout_updateGradInput( void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor noise, T alpha) { /*cuda float*/ at::Tensor &noise, T alpha) {
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
......
...@@ -22,14 +22,14 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -22,14 +22,14 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_Convolution_updateOutput( double cuda_Convolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &bias) {
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true); m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
...@@ -56,16 +56,16 @@ double cuda_Convolution_updateOutput( ...@@ -56,16 +56,16 @@ double cuda_Convolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_Convolution_backward( void cuda_Convolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true); m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize); Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
...@@ -94,13 +94,13 @@ void cuda_Convolution_backward( ...@@ -94,13 +94,13 @@ void cuda_Convolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_SubmanifoldConvolution_updateOutput( double cuda_SubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true); const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
Int ip = weight.size(2); Int ip = weight.size(2);
...@@ -126,15 +126,15 @@ double cuda_SubmanifoldConvolution_updateOutput( ...@@ -126,15 +126,15 @@ double cuda_SubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_SubmanifoldConvolution_backward( void cuda_SubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true); const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
Int ip = weight.size(2); Int ip = weight.size(2);
...@@ -161,12 +161,12 @@ void cuda_SubmanifoldConvolution_backward( ...@@ -161,12 +161,12 @@ void cuda_SubmanifoldConvolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_PermutohedralSubmanifoldConvolution_updateOutput( double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true); const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
Int ip = weight.size(2); Int ip = weight.size(2);
...@@ -192,14 +192,14 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput( ...@@ -192,14 +192,14 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_PermutohedralSubmanifoldConvolution_backward( void cuda_PermutohedralSubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m, /*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true); const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize); Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
Int ip = weight.size(2); Int ip = weight.size(2);
...@@ -226,15 +226,15 @@ void cuda_PermutohedralSubmanifoldConvolution_backward( ...@@ -226,15 +226,15 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_FullConvolution_updateOutput( double cuda_FullConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut, Metadata<Dimension> &mOut,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize, const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut); filterSize, filterStride, mOut);
Int nActiveOut = mOut.getNActive(outputSize); Int nActiveOut = mOut.getNActive(outputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
...@@ -261,17 +261,17 @@ double cuda_FullConvolution_updateOutput( ...@@ -261,17 +261,17 @@ double cuda_FullConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_FullConvolution_backward( void cuda_FullConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut, Metadata<Dimension> &mOut,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize, const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut); filterSize, filterStride, mOut);
Int nActiveIn = mIn.getNActive(inputSize); Int nActiveIn = mIn.getNActive(inputSize);
Int nActiveOut = mOut.getNActive(outputSize); Int nActiveOut = mOut.getNActive(outputSize);
...@@ -299,14 +299,14 @@ void cuda_FullConvolution_backward( ...@@ -299,14 +299,14 @@ void cuda_FullConvolution_backward(
} }
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_RandomizedStrideConvolution_updateOutput( double cuda_RandomizedStrideConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor &output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true); filterStride, true);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
...@@ -333,16 +333,16 @@ double cuda_RandomizedStrideConvolution_updateOutput( ...@@ -333,16 +333,16 @@ double cuda_RandomizedStrideConvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_RandomizedStrideConvolution_backward( void cuda_RandomizedStrideConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true); filterStride, true);
Int nActiveIn = m.getNActive(inputSize); Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
......
...@@ -19,14 +19,14 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -19,14 +19,14 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
template <typename T, Int Dimension> template <typename T, Int Dimension>
double cuda_Deconvolution_updateOutput( double cuda_Deconvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor bias) { /*cuda float*/ at::Tensor &bias) {
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true); m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1); Int nGroups = weight.size(1);
...@@ -53,16 +53,16 @@ double cuda_Deconvolution_updateOutput( ...@@ -53,16 +53,16 @@ double cuda_Deconvolution_updateOutput(
template <typename T, Int Dimension> template <typename T, Int Dimension>
void cuda_Deconvolution_backward( void cuda_Deconvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize, /*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor filterSize, /*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m, /*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features, /*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor d_input_features, /*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor d_output_features, /*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight, /*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor d_bias) { /*cuda float*/ at::Tensor &d_bias) {
auto _rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true); m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize); Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize); Int nActiveOut = m.getNActive(outputSize);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment