Commit d8c8a060 authored by Michal Pandy's avatar Michal Pandy
Browse files

Use references where possible

parent 1171aae3
......@@ -8,12 +8,12 @@
template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
const RuleBook &rules, bool average) {
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < batchSize; outSite++) {
T *out = &output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
const Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) {
......@@ -26,12 +26,12 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
const RuleBook &rules, bool average) {
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < batchSize; outSite++) {
T *out = &d_output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
const Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) {
......@@ -44,12 +44,12 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cpu_ActivePooling_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, bool average) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
output_features.resize_({batchSize, nPlanes});
......@@ -62,13 +62,13 @@ void cpu_ActivePooling_updateOutput(
template <typename T, Int Dimension>
void cpu_ActivePooling_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, bool average) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
d_input_features.resize_as_(input_features);
......
......@@ -69,9 +69,9 @@ void AffineReluTrivialConvolution_BackwardPass(
template <typename T>
double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor affineWeight,
/*float*/ at::Tensor affineBias, /*float*/ at::Tensor convWeight) {
/*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &affineWeight,
/*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)});
AffineReluTrivialConvolution_ForwardPass(
input_features.data<T>(), convWeight.size(0), input_features.stride(0),
......@@ -84,11 +84,12 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
template <typename T>
void cpu_AffineReluTrivialConvolution_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor affineWeight,
/*float*/ at::Tensor d_affineWeight, /*float*/ at::Tensor affineBias,
/*float*/ at::Tensor d_affineBias,
/*float*/ at::Tensor convWeight, /*float*/ at::Tensor d_convWeight,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &affineWeight,
/*float*/ at::Tensor &d_affineWeight, /*float*/ at::Tensor &affineBias,
/*float*/ at::Tensor &d_affineBias,
/*float*/ at::Tensor &convWeight, /*float*/ at::Tensor &d_convWeight,
bool additiveGrad) {
d_input_features.resize_as_(input_features);
......
......@@ -7,7 +7,7 @@
template <typename T>
void AveragePooling_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int output_stride, const Int *rules, Int nHot,
Int filterVolume) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -21,7 +21,7 @@ void AveragePooling_ForwardPass(T *input_features, T *output_features,
template <typename T>
void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int output_stride, const Int *rules, Int nHot,
Int filterVolume) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -36,14 +36,14 @@ void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cpu_AveragePooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
......@@ -52,7 +52,7 @@ void cpu_AveragePooling_updateOutput(
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
for (auto &r : _rules) {
for (const auto &r : _rules) {
Int nHot = r.size() / 2;
AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.stride(0),
output_features.stride(0), &r[0], nHot,
......@@ -61,15 +61,15 @@ void cpu_AveragePooling_updateOutput(
}
template <typename T, Int Dimension>
void cpu_AveragePooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......@@ -77,7 +77,7 @@ void cpu_AveragePooling_updateGradInput(
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>();
for (auto &r : _rules) {
for (const auto &r : _rules) {
Int nHot = r.size() / 2;
AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.stride(0),
d_output_features.stride(0), &r[0], nHot,
......@@ -86,8 +86,8 @@ void cpu_AveragePooling_updateGradInput(
}
template <typename T>
void cpu_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
at::Tensor Context) {
void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor &Context) {
Int nHot = rules.size(0) / 2;
Int nPlanes = context.size(1);
auto iF = context.data<T>();
......@@ -102,9 +102,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
}
}
template <typename T>
void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor rules,
at::Tensor dcontext,
at::Tensor dContext) {
void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor &dcontext,
at::Tensor &dContext) {
Int nHot = rules.size(0) / 2;
Int nPlanes = dcontext.size(1);
auto iF = dcontext.data<T>();
......
......@@ -108,11 +108,11 @@ void BatchNormalization_BackwardPass(T *input_features, T *d_input_features,
template <typename T>
void cpu_BatchNormalization_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum,
/*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &saveMean,
/*float*/ at::Tensor &saveInvStd, /*float*/ at::Tensor &runningMean,
/*float*/ at::Tensor &runningVar,
/*float*/ at::Tensor &weight, /*float*/ at::Tensor &bias, T eps, T momentum,
bool train, T leakiness) {
output_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
......@@ -131,13 +131,14 @@ void cpu_BatchNormalization_updateOutput(
template <typename T>
void cpu_BatchNormalization_backward(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias, T leakiness) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &saveMean,
/*float*/ at::Tensor &saveInvStd, /*float*/ at::Tensor &runningMean,
/*float*/ at::Tensor &runningVar,
/*float*/ at::Tensor &weight, /*float*/ at::Tensor &bias,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias, T leakiness) {
d_input_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
......@@ -155,4 +156,3 @@ void cpu_BatchNormalization_backward(
leakiness);
}
}
......@@ -6,8 +6,8 @@
template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor noise, T alpha) {
/*float*/ at::Tensor &input_features, /*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &noise, T alpha) {
output_features.resize_as_(input_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
......@@ -21,8 +21,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
}
template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor noise,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &noise,
T alpha) {
d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0);
......
......@@ -6,8 +6,8 @@
// rows x groups x planes -> groups x rows x planes
template <typename T>
at::Tensor rule_index_select(at::Tensor src, Int nRules, Int *rules,
Int groups) {
at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
Int groups) {
auto planes = src.size(1) / groups;
auto target = at::empty({groups, nRules, planes}, src.options());
auto s_ptr = src.data<T>();
......@@ -26,8 +26,8 @@ at::Tensor rule_index_select(at::Tensor src, Int nRules, Int *rules,
// groups x rows x planes -> rows x groups x planes
template <typename T>
void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules, Int *rules,
Int groups) {
void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules,
const Int *rules, Int groups) {
auto planes = target.size(1) / groups;
auto s_ptr = src.data<T>();
auto t_ptr = target.data<T>();
......@@ -44,13 +44,13 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules, Int *rules,
template <typename T, Int Dimension>
double cpu_Convolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules =
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
......@@ -64,7 +64,7 @@ double cpu_Convolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -80,15 +80,15 @@ double cpu_Convolution_updateOutput(
template <typename T, Int Dimension>
void cpu_Convolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features);
......@@ -98,7 +98,7 @@ void cpu_Convolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......@@ -116,13 +116,13 @@ void cpu_Convolution_backward(
template <typename T, Int Dimension>
double cpu_SubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
if (bias.numel() and nActive)
......@@ -135,7 +135,7 @@ double cpu_SubmanifoldConvolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -151,15 +151,15 @@ double cpu_SubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension>
void cpu_SubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight,
/*float*/ at::Tensor d_bias) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor &d_bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......@@ -168,7 +168,7 @@ void cpu_SubmanifoldConvolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......@@ -186,12 +186,12 @@ void cpu_SubmanifoldConvolution_backward(
template <typename T, Int Dimension>
double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
if (bias.numel() and nActive)
......@@ -204,7 +204,7 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -220,14 +220,14 @@ double cpu_PermutohedralSubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension>
void cpu_PermutohedralSubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight,
/*float*/ at::Tensor d_bias) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor &d_bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......@@ -236,7 +236,7 @@ void cpu_PermutohedralSubmanifoldConvolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......@@ -254,15 +254,15 @@ void cpu_PermutohedralSubmanifoldConvolution_backward(
template <typename T, Int Dimension>
double cpu_FullConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut);
Int nActive = mOut.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
......@@ -276,7 +276,7 @@ double cpu_FullConvolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -292,17 +292,17 @@ double cpu_FullConvolution_updateOutput(
template <typename T, Int Dimension>
void cpu_FullConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight,
/*float*/ at::Tensor d_bias) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight,
/*float*/ at::Tensor &d_bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut);
Int nActive = mOut.getNActive(inputSize);
d_input_features.resize_as_(input_features);
......@@ -312,7 +312,7 @@ void cpu_FullConvolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......@@ -330,13 +330,13 @@ void cpu_FullConvolution_backward(
template <typename T, Int Dimension>
double cpu_RandomizedStrideConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
......@@ -350,7 +350,7 @@ double cpu_RandomizedStrideConvolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -366,15 +366,15 @@ double cpu_RandomizedStrideConvolution_updateOutput(
template <typename T, Int Dimension>
void cpu_RandomizedStrideConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true);
Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features);
......@@ -384,7 +384,7 @@ void cpu_RandomizedStrideConvolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......
......@@ -6,13 +6,13 @@
template <typename T, Int Dimension>
double cpu_Deconvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
auto _rules =
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, weight.size(1) * weight.size(3)});
......@@ -26,7 +26,7 @@ double cpu_Deconvolution_updateOutput(
auto ip = weight.size(2);
auto op = weight.size(3);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
flops += nRules * ip * op * groups;
......@@ -42,15 +42,15 @@ double cpu_Deconvolution_updateOutput(
template <typename T, Int Dimension>
void cpu_Deconvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor weight,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &weight,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto _rules =
const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActive = m.getNActive(inputSize);
d_input_features.resize_as_(input_features);
......@@ -60,7 +60,7 @@ void cpu_Deconvolution_backward(
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
for (Int i = 0; i < (Int)_rules.size(); ++i) {
auto r = _rules[i];
const auto &r = _rules[i];
Int nRules = r.size() / 2;
if (nRules) {
auto w = weight.select(0, i);
......
......@@ -48,10 +48,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor input_coords,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*long*/ at::Tensor &spatialSize,
/*long*/ at::Tensor &input_coords,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
long batchSize, long mode) {
m.inputLayer(spatialSize, input_coords, batchSize, mode);
......@@ -72,8 +72,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
}
template <typename T, Int Dimension>
void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features) {
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
auto &rules = m.inputLayerRuleBook;
auto nPlanes = d_output_features.size(1);
......@@ -94,8 +94,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension>
void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features) {
auto &rules = m.inputLayerRuleBook;
auto nPlanes = input_features.size(1);
......@@ -115,8 +115,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
}
template <typename T, Int Dimension>
void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features) {
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
auto &rules = m.inputLayerRuleBook;
auto nPlanes = d_output_features.size(1);
......@@ -137,10 +137,10 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension>
void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor input_coords,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*long*/ at::Tensor &spatialSize,
/*long*/ at::Tensor &input_coords,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
long mode) {
m.blLayer(spatialSize, input_coords, mode);
......@@ -162,8 +162,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
}
template <typename T, Int Dimension>
void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features) {
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
auto &rules = m.blLayerRuleBook;
auto nPlanes = d_output_features.size(1);
......@@ -186,8 +186,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
template <typename T, Int Dimension>
void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features) {
auto &rules = m.blLayerRuleBook;
auto nPlanes = input_features.size(1);
......@@ -207,9 +207,10 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
}
}
template <typename T, Int Dimension>
void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features) {
void cpu_BLOutputLayer_updateGradInput(
Metadata<Dimension> &m,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
auto &rules = m.blLayerRuleBook;
auto nPlanes = d_output_features.size(2);
......
......@@ -5,8 +5,9 @@
// LICENSE file in the root directory of this source tree.
template <typename T>
void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, T alpha) {
void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
T alpha) {
output_features.resize_as_(input_features);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
......@@ -19,9 +20,9 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features,
}
}
template <typename T>
void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features,
void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features,
T alpha) {
d_input_features.resize_as_(d_output_features);
auto iF = input_features.data<T>();
......
......@@ -6,7 +6,7 @@
template <typename T>
void MaxPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
Int input_stride, Int output_stride, Int *rules,
Int input_stride, Int output_stride, const Int *rules,
Int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -22,7 +22,7 @@ template <typename T>
void MaxPooling_BackwardPass(T *input_features, T *d_input_features,
T *output_features, T *d_output_features,
Int nPlanes, Int input_stride, Int output_stride,
Int *rules, Int nHot) {
const Int *rules, Int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) {
......@@ -36,14 +36,14 @@ void MaxPooling_BackwardPass(T *input_features, T *d_input_features,
template <typename T, Int Dimension>
void cpu_MaxPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
......@@ -60,15 +60,16 @@ void cpu_MaxPooling_updateOutput(
}
template <typename T, Int Dimension>
void cpu_MaxPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......@@ -87,14 +88,14 @@ void cpu_MaxPooling_updateGradInput(
}
template <typename T, Int Dimension>
void cpu_RandomizedStrideMaxPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
......@@ -111,15 +112,16 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
}
template <typename T, Int Dimension>
void cpu_RandomizedStrideMaxPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, poolSize,
poolStride, true);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......
......@@ -5,10 +5,10 @@
// LICENSE file in the root directory of this source tree.
template <typename T>
double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
/*float*/ at::Tensor weight,
/*float*/ at::Tensor bias) {
double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
/*float*/ at::Tensor &weight,
/*float*/ at::Tensor &bias) {
auto nActive = input_features.size(0);
auto input_nPlanes = weight.size(0);
auto output_nPlanes = weight.size(1);
......@@ -23,9 +23,9 @@ double cpu_NetworkInNetwork_updateOutput(/*float*/ at::Tensor input_features,
}
template <typename T>
void cpu_NetworkInNetwork_updateGradInput(
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features,
/*float*/ at::Tensor weight) {
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features,
/*float*/ at::Tensor &weight) {
int nActive = d_output_features.size(0);
d_input_features.resize_({nActive, weight.size(0)});
......@@ -35,9 +35,9 @@ void cpu_NetworkInNetwork_updateGradInput(
}
template <typename T>
void cpu_NetworkInNetwork_accGradParameters(
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_output_features,
/*float*/ at::Tensor d_weight, /*float*/ at::Tensor d_bias) {
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_output_features,
/*float*/ at::Tensor &d_weight, /*float*/ at::Tensor &d_bias) {
auto nActive = input_features.size(0);
if (nActive and d_bias.numel())
at::sum_out(d_bias, d_output_features, {0}, false);
......
......@@ -6,7 +6,7 @@
template <typename T>
void SparseToDense_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int spatialVolume, Int *rules,
Int nPlanes, Int spatialVolume, const Int *rules,
int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -20,7 +20,7 @@ void SparseToDense_ForwardPass(T *input_features, T *output_features,
template <typename T>
void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int spatialVolume, Int *rules,
Int nPlanes, Int spatialVolume, const Int *rules,
int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -34,9 +34,9 @@ void SparseToDense_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cpu_SparseToDense_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nPlanes) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, long nPlanes) {
{
std::array<long, Dimension + 2> sz;
......@@ -49,7 +49,7 @@ void cpu_SparseToDense_updateOutput(
output_features.zero_();
}
if (input_features.ndimension() == 2) {
auto _rules = m.getSparseToDenseRuleBook(inputSize, true);
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
Int _nPlanes = input_features.size(1);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
......@@ -64,15 +64,15 @@ void cpu_SparseToDense_updateOutput(
}
template <typename T, Int Dimension>
void cpu_SparseToDense_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features) {
d_input_features.resize_as_(input_features);
d_input_features.zero_();
if (input_features.ndimension() == 2) {
auto _rules = m.getSparseToDenseRuleBook(inputSize, true);
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
long spatialVolume = inputSize.prod().data<long>()[0];
Int _nPlanes = d_input_features.size(1);
auto diF = d_input_features.data<T>();
......
......@@ -6,7 +6,7 @@
template <typename T>
void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
Int input_stride, Int output_stride, Int *rules,
Int input_stride, Int output_stride, const Int *rules,
Int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
......@@ -20,7 +20,7 @@ void UnPooling_ForwardPass(T *input_features, T *output_features, Int nPlanes,
template <typename T>
void UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride, Int output_stride,
Int *rules, Int nHot) {
const Int *rules, Int nHot) {
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) {
......@@ -33,14 +33,14 @@ void UnPooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cpu_UnPooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
......@@ -57,14 +57,14 @@ void cpu_UnPooling_updateOutput(
}
template <typename T, Int Dimension>
void cpu_UnPooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*float*/ at::Tensor &d_input_features,
/*float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = d_input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
......
......@@ -7,21 +7,21 @@
template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average);
const Int *rules, bool average);
template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average);
const Int *rules, bool average);
template <typename T, Int Dimension>
void cuda_ActivePooling_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, bool average) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
output_features.resize_({batchSize, nPlanes});
......@@ -34,13 +34,13 @@ void cuda_ActivePooling_updateOutput(
}
template <typename T, Int Dimension>
void cuda_ActivePooling_updateGradInput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features, bool average) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features, bool average) {
Int nPlanes = input_features.size(1);
auto _rules = m.getActivePoolingRuleBook(inputSize);
const auto &_rules = m.getActivePoolingRuleBook(inputSize);
Int batchSize = _rules[1][0];
Int maxActive = _rules[1][1];
d_input_features.resize_as_(input_features);
......
......@@ -6,10 +6,10 @@
template <typename T>
__global__ void ActivePooling_fp(T *input_features, T *output_features,
Int maxActive, Int nPlanes, Int *rules,
Int maxActive, Int nPlanes, const Int *rules,
bool average) {
T *out = &output_features[blockIdx.x * nPlanes];
Int *r = &rules[blockIdx.x * (maxActive + 1)];
const Int *r = &rules[blockIdx.x * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) {
......@@ -21,7 +21,7 @@ __global__ void ActivePooling_fp(T *input_features, T *output_features,
template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average) {
const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>();
......@@ -41,10 +41,10 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
}
template <typename T>
__global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
Int maxActive, Int nPlanes, Int *rules,
Int maxActive, Int nPlanes, const Int *rules,
bool average) {
T *out = &d_output_features[blockIdx.x * nPlanes];
Int *r = &rules[blockIdx.x * (maxActive + 1)];
const Int *r = &rules[blockIdx.x * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
while (nActive-- > 0) {
......@@ -57,7 +57,7 @@ __global__ void ActivePooling_bp(T *d_input_features, T *d_output_features,
template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
Int *rules, bool average) {
const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
......
......@@ -24,11 +24,11 @@ void dAffineReluTrivialConvolution_backward_dW(
template <typename T>
double cuda_AffineReluTrivialConvolution_updateOutput(
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features,
/*cuda float*/ at::Tensor affineWeight,
/*cuda float*/ at::Tensor affineBias,
/*cuda float*/ at::Tensor convWeight) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features,
/*cuda float*/ at::Tensor &affineWeight,
/*cuda float*/ at::Tensor &affineBias,
/*cuda float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)});
dAffineReluTrivialConvolution_forward<T>(
......@@ -42,15 +42,15 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
template <typename T>
void cuda_AffineReluTrivialConvolution_backward(
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor affineWeight,
/*cuda float*/ at::Tensor d_affineWeight,
/*cuda float*/ at::Tensor affineBias,
/*cuda float*/ at::Tensor d_affineBias,
/*cuda float*/ at::Tensor convWeight,
/*cuda float*/ at::Tensor d_convWeight, bool additiveGrad) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &affineWeight,
/*cuda float*/ at::Tensor &d_affineWeight,
/*cuda float*/ at::Tensor &affineBias,
/*cuda float*/ at::Tensor &d_affineBias,
/*cuda float*/ at::Tensor &convWeight,
/*cuda float*/ at::Tensor &d_convWeight, bool additiveGrad) {
d_input_features.resize_as_(input_features);
dAffineReluTrivialConvolution_backward_dW<T>(
......
......@@ -18,14 +18,14 @@ void cuda_AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
template <typename T, Int Dimension>
void cuda_AveragePooling_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
Int nActive = m.getNActive(outputSize);
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
......@@ -40,15 +40,15 @@ void cuda_AveragePooling_updateOutput(
template <typename T, Int Dimension>
void cuda_AveragePooling_updateGradInput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor poolSize,
/*long*/ at::Tensor poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features, long nFeaturesToDrop) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &poolSize,
/*long*/ at::Tensor &poolStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features, long nFeaturesToDrop) {
Int nPlanes = input_features.size(1) - nFeaturesToDrop;
auto _rules =
const auto &_rules =
m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
d_input_features.resize_as_(input_features);
d_input_features.zero_();
......@@ -70,8 +70,8 @@ void cuda_CopyFeaturesHelper_BackwardPass(T *d_input_features,
Int nPlanes, Int nHot);
template <typename T>
void cuda_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
at::Tensor Context) {
void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor &Context) {
Int nPlanes = context.size(1);
Int nHot = rules.size(0) / 2;
......@@ -80,9 +80,9 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor rules, at::Tensor context,
}
template <typename T>
void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor rules,
at::Tensor dcontext,
at::Tensor dContext) {
void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor &dcontext,
at::Tensor &dContext) {
Int nPlanes = dcontext.size(1);
Int nHot = rules.size(0) / 2;
......
......@@ -13,8 +13,8 @@ void bmd_b(T *input_features, T *d_input_features, T *d_output_features,
template <typename T>
void cuda_BatchwiseMultiplicativeDropout_updateOutput(
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor noise,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &noise,
T alpha) {
output_features.resize_as_(input_features);
auto nActive = input_features.size(0);
......@@ -25,10 +25,10 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
template <typename T>
void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor noise, T alpha) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &noise, T alpha) {
d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
......
......@@ -22,14 +22,14 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
template <typename T, Int Dimension>
double cuda_Convolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight,
/*cuda float*/ at::Tensor bias) {
auto _rules =
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor &bias) {
const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1);
......@@ -56,16 +56,16 @@ double cuda_Convolution_updateOutput(
template <typename T, Int Dimension>
void cuda_Convolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
auto _rules =
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
const auto &_rules =
m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
......@@ -94,13 +94,13 @@ void cuda_Convolution_backward(
template <typename T, Int Dimension>
double cuda_SubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight,
/*cuda float*/ at::Tensor bias) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor &bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1);
Int ip = weight.size(2);
......@@ -126,15 +126,15 @@ double cuda_SubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension>
void cuda_SubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &filterSize,
Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
auto _rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
const auto &_rules = m.getSubmanifoldRuleBook(inputSize, filterSize, true);
Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1);
Int ip = weight.size(2);
......@@ -161,12 +161,12 @@ void cuda_SubmanifoldConvolution_backward(
template <typename T, Int Dimension>
double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight,
/*cuda float*/ at::Tensor bias) {
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor &bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1);
Int ip = weight.size(2);
......@@ -192,14 +192,14 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
template <typename T, Int Dimension>
void cuda_PermutohedralSubmanifoldConvolution_backward(
/*long*/ at::Tensor inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
auto _rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
/*long*/ at::Tensor &inputSize, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
const auto &_rules = m.getPermutohedralSubmanifoldRuleBook(inputSize, true);
Int nActive = m.getNActive(inputSize);
Int nGroups = weight.size(1);
Int ip = weight.size(2);
......@@ -226,15 +226,15 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
template <typename T, Int Dimension>
double cuda_FullConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight,
/*cuda float*/ at::Tensor bias) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor &bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut);
Int nActiveOut = mOut.getNActive(outputSize);
Int nGroups = weight.size(1);
......@@ -261,17 +261,17 @@ double cuda_FullConvolution_updateOutput(
template <typename T, Int Dimension>
void cuda_FullConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &mIn,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &mIn,
Metadata<Dimension> &mOut,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
auto _rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
const auto &_rules = mIn.getFullConvolutionRuleBook(inputSize, outputSize,
filterSize, filterStride, mOut);
Int nActiveIn = mIn.getNActive(inputSize);
Int nActiveOut = mOut.getNActive(outputSize);
......@@ -299,14 +299,14 @@ void cuda_FullConvolution_backward(
}
template <typename T, Int Dimension>
double cuda_RandomizedStrideConvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &bias) {
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1);
......@@ -333,16 +333,16 @@ double cuda_RandomizedStrideConvolution_updateOutput(
template <typename T, Int Dimension>
void cuda_RandomizedStrideConvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
auto _rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
const auto &_rules = m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize,
filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
......
......@@ -19,14 +19,14 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
template <typename T, Int Dimension>
double cuda_Deconvolution_updateOutput(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor output_features, /*cuda float*/ at::Tensor weight,
/*cuda float*/ at::Tensor bias) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &output_features, /*cuda float*/ at::Tensor &weight,
/*cuda float*/ at::Tensor &bias) {
auto _rules =
const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveOut = m.getNActive(outputSize);
Int nGroups = weight.size(1);
......@@ -53,16 +53,16 @@ double cuda_Deconvolution_updateOutput(
template <typename T, Int Dimension>
void cuda_Deconvolution_backward(
/*long*/ at::Tensor inputSize, /*long*/ at::Tensor outputSize,
/*long*/ at::Tensor filterSize,
/*long*/ at::Tensor filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor input_features,
/*cuda float*/ at::Tensor d_input_features,
/*cuda float*/ at::Tensor d_output_features,
/*cuda float*/ at::Tensor weight, /*cuda float*/ at::Tensor d_weight,
/*cuda float*/ at::Tensor d_bias) {
/*long*/ at::Tensor &inputSize, /*long*/ at::Tensor &outputSize,
/*long*/ at::Tensor &filterSize,
/*long*/ at::Tensor &filterStride, Metadata<Dimension> &m,
/*cuda float*/ at::Tensor &input_features,
/*cuda float*/ at::Tensor &d_input_features,
/*cuda float*/ at::Tensor &d_output_features,
/*cuda float*/ at::Tensor &weight, /*cuda float*/ at::Tensor &d_weight,
/*cuda float*/ at::Tensor &d_bias) {
auto _rules =
const auto &_rules =
m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
Int nActiveIn = m.getNActive(inputSize);
Int nActiveOut = m.getNActive(outputSize);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment