Commit 2ad7baf8 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

detach; batch_size; data_ptr

parent 16e4df34
...@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput( ...@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput(
output_features.resize_({batchSize, nPlanes}); output_features.resize_({batchSize, nPlanes});
output_features.zero_(); output_features.zero_();
ActivePooling_ForwardPass<T>(input_features.data<T>(), ActivePooling_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data<T>(), batchSize, maxActive, output_features.data_ptr<T>(), batchSize, maxActive,
nPlanes, _rules, average); nPlanes, _rules, average);
} }
...@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput( ...@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
ActivePooling_BackwardPass<T>(d_input_features.data<T>(), ActivePooling_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data<T>(), batchSize, d_output_features.data_ptr<T>(), batchSize,
maxActive, nPlanes, _rules, average); maxActive, nPlanes, _rules, average);
} }
...@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput( ...@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) { /*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)}); output_features.resize_({input_features.size(0), convWeight.size(1)});
AffineReluTrivialConvolution_ForwardPass( AffineReluTrivialConvolution_ForwardPass(
input_features.data<T>(), convWeight.size(0), input_features.stride(0), input_features.data_ptr<T>(), convWeight.size(0), input_features.stride(0),
output_features.data<T>(), convWeight.size(1), output_features.stride(0), output_features.data_ptr<T>(), convWeight.size(1), output_features.stride(0),
affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(), affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
input_features.size(0)); input_features.size(0));
return input_features.size(0) * input_features.size(1) * return input_features.size(0) * input_features.size(1) *
output_features.size(1); output_features.size(1);
...@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward( ...@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
AffineReluTrivialConvolution_BackwardPass( AffineReluTrivialConvolution_BackwardPass(
input_features.data<T>(), d_input_features.data<T>(), convWeight.size(0), input_features.data_ptr<T>(), d_input_features.data_ptr<T>(), convWeight.size(0),
input_features.stride(0), d_output_features.data<T>(), convWeight.size(1), input_features.stride(0), d_output_features.data_ptr<T>(), convWeight.size(1),
d_output_features.stride(0), affineWeight.data<T>(), d_output_features.stride(0), affineWeight.data_ptr<T>(),
d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(), d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
convWeight.data<T>(), d_convWeight.data<T>(), input_features.size(0), convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), input_features.size(0),
additiveGrad); additiveGrad);
} }
...@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput( ...@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
for (const auto &r : _rules) { for (const auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput( ...@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
auto diF = d_input_features.data<T>() + nFeaturesToDrop; auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
for (const auto &r : _rules) { for (const auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context, ...@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor &Context) { at::Tensor &Context) {
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
Int nPlanes = context.size(1); Int nPlanes = context.size(1);
auto iF = context.data<T>(); auto iF = context.data_ptr<T>();
auto oF = Context.data<T>(); auto oF = Context.data_ptr<T>();
auto r = rules.data<Int>(); auto r = rules.data_ptr<Int>();
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) { for (outSite = 0; outSite < nHot; outSite++) {
...@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules, ...@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor &dContext) { at::Tensor &dContext) {
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
Int nPlanes = dcontext.size(1); Int nPlanes = dcontext.size(1);
auto iF = dcontext.data<T>(); auto iF = dcontext.data_ptr<T>();
auto oF = dContext.data<T>(); auto oF = dContext.data_ptr<T>();
auto r = rules.data<Int>(); auto r = rules.data_ptr<Int>();
Int outSite; Int outSite;
#pragma omp parallel for private(outSite) #pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) { for (outSite = 0; outSite < nHot; outSite++) {
......
...@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput( ...@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput(
auto input_stride = input_features.stride(0); auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0); auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>( BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes, input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(), input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(), saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps, OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness); momentum, train, leakiness);
} }
...@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward( ...@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward(
auto input_stride = input_features.stride(0); auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0); auto output_stride = output_features.stride(0);
BatchNormalization_BackwardPass<T>( BatchNormalization_BackwardPass<T>(
input_features.data<T>(), d_input_features.data<T>(), input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
output_features.data<T>(), d_output_features.data<T>(), nPlanes, output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(), input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(), saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias), OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
leakiness); leakiness);
......
...@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput( ...@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto nz = noise.data<T>(); auto nz = noise.data_ptr<T>();
for (Int row = 0; row < nActive; row++) for (Int row = 0; row < nActive; row++)
for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes; for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++) plane++, o++, i++)
...@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput( ...@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto nz = noise.data<T>(); auto nz = noise.data_ptr<T>();
for (Int row = 0; row < nActive; row++) for (Int row = 0; row < nActive; row++)
for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes; for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++) plane++, o++, i++)
......
...@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules, ...@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
Int groups) { Int groups) {
auto planes = src.size(1) / groups; auto planes = src.size(1) / groups;
auto target = at::empty({groups, nRules, planes}, src.options()); auto target = at::empty({groups, nRules, planes}, src.options());
auto s_ptr = src.data<T>(); auto s_ptr = src.data_ptr<T>();
auto t_ptr = target.data<T>(); auto t_ptr = target.data_ptr<T>();
#pragma omp parallel for #pragma omp parallel for
for (Int i = 0; i < nRules; ++i) { for (Int i = 0; i < nRules; ++i) {
for (Int g = 0; g < groups; ++g) { for (Int g = 0; g < groups; ++g) {
...@@ -29,8 +29,8 @@ template <typename T> ...@@ -29,8 +29,8 @@ template <typename T>
void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules, void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules,
const Int *rules, Int groups) { const Int *rules, Int groups) {
auto planes = target.size(1) / groups; auto planes = target.size(1) / groups;
auto s_ptr = src.data<T>(); auto s_ptr = src.data_ptr<T>();
auto t_ptr = target.data<T>(); auto t_ptr = target.data_ptr<T>();
#pragma omp parallel for #pragma omp parallel for
for (Int i = 0; i < nRules; ++i) { for (Int i = 0; i < nRules; ++i) {
for (Int g = 0; g < groups; ++g) { for (Int g = 0; g < groups; ++g) {
......
...@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
} else { } else {
output_features.resize_({*m.inputNActive, nPlanes}); output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_(); output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(), InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data<T>(), nRows, maxActive, output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4); nPlanes, &rules[1][0], mode == 4);
} }
} }
...@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
} else { } else {
d_input_features.resize_({rules[0][2], nPlanes}); d_input_features.resize_({rules[0][2], nPlanes});
d_input_features.zero_(); d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(), InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data<T>(), nRows, maxActive, d_output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4); nPlanes, &rules[1][0], mode == 4);
} }
} }
...@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
} else { } else {
output_features.resize_({rules[0][2], nPlanes}); output_features.resize_({rules[0][2], nPlanes});
output_features.zero_(); output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(), InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
input_features.data<T>(), nRows, maxActive, input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false); nPlanes, &rules[1][0], false);
} }
} }
...@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
} else { } else {
d_input_features.resize_({nRows, nPlanes}); d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_(); d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(), InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
d_input_features.data<T>(), nRows, maxActive, d_input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false); nPlanes, &rules[1][0], false);
} }
} }
...@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
} else { } else {
output_features.resize_({*m.inputNActive, nPlanes}); output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_(); output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(), InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data<T>(), nRows, maxActive, output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4); nPlanes, &rules[1][0], mode == 4);
} }
} }
...@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m, ...@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
} else { } else {
d_input_features.resize_({rules[0][2], rules[0][3], nPlanes}); d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
d_input_features.zero_(); d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(), InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data<T>(), nRows, maxActive, d_output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4); nPlanes, &rules[1][0], mode == 4);
} }
} }
...@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
} else { } else {
output_features.resize_({rules[0][2], rules[0][3], nPlanes}); output_features.resize_({rules[0][2], rules[0][3], nPlanes});
output_features.zero_(); output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(), InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
input_features.data<T>(), nRows, maxActive, input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false); nPlanes, &rules[1][0], false);
} }
} }
...@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput( ...@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput(
} else { } else {
d_input_features.resize_({nRows, nPlanes}); d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_(); d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(), InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
d_input_features.data<T>(), nRows, maxActive, d_input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false); nPlanes, &rules[1][0], false);
} }
} }
...@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features, ...@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features, /*float*/ at::Tensor &output_features,
T alpha) { T alpha) {
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto n = input_features.numel(); auto n = input_features.numel();
for (Int i = 0; i < n; i++) { for (Int i = 0; i < n; i++) {
...@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features, ...@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_output_features, /*float*/ at::Tensor &d_output_features,
T alpha) { T alpha) {
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto n = d_input_features.numel(); auto n = d_input_features.numel();
for (Int i = 0; i < n; i++) { for (Int i = 0; i < n; i++) {
......
...@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput( ...@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput( ...@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput( ...@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput( ...@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
......
...@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput( ...@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput(
std::array<long, Dimension + 2> sz; std::array<long, Dimension + 2> sz;
sz[0] = m.grids.begin()->second.size(); // batch size sz[0] = m.grids.begin()->second.size(); // batch size
sz[1] = nPlanes; sz[1] = nPlanes;
long *in_sz = inputSize.data<long>(); long *in_sz = inputSize.data_ptr<long>();
for (Int i = 0; i < Dimension; ++i) for (Int i = 0; i < Dimension; ++i)
sz[i + 2] = in_sz[i]; sz[i + 2] = in_sz[i];
output_features.resize_(sz); output_features.resize_(sz);
...@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput( ...@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput(
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true); const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
Int _nPlanes = input_features.size(1); Int _nPlanes = input_features.size(1);
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
long spatialVolume = inputSize.prod().data<long>()[0]; long spatialVolume = inputSize.prod().data_ptr<long>()[0];
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
SparseToDense_ForwardPass<T>(iF, oF, _nPlanes, spatialVolume, &r[0], SparseToDense_ForwardPass<T>(iF, oF, _nPlanes, spatialVolume, &r[0],
...@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput( ...@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput(
d_input_features.zero_(); d_input_features.zero_();
if (input_features.ndimension() == 2) { if (input_features.ndimension() == 2) {
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true); const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
long spatialVolume = inputSize.prod().data<long>()[0]; long spatialVolume = inputSize.prod().data_ptr<long>()[0];
Int _nPlanes = d_input_features.size(1); Int _nPlanes = d_input_features.size(1);
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
SparseToDense_BackwardPass<T>(diF, doF, _nPlanes, spatialVolume, &r[0], SparseToDense_BackwardPass<T>(diF, doF, _nPlanes, spatialVolume, &r[0],
......
...@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput( ...@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
...@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput( ...@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput(
const auto &_rules = const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true); m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
auto diF = d_input_features.data<T>() + nFeaturesToDrop; auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) { for (auto &r : _rules) {
Int nHot = r.size() / 2; Int nHot = r.size() / 2;
......
...@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput( ...@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput(
output_features.resize_({batchSize, nPlanes}); output_features.resize_({batchSize, nPlanes});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
ActivePooling_ForwardPass<T>(iF, oF, batchSize, maxActive, nPlanes, ActivePooling_ForwardPass<T>(iF, oF, batchSize, maxActive, nPlanes,
&_rules[0][0], average); &_rules[0][0], average);
} }
...@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput( ...@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
ActivePooling_BackwardPass<T>(diF, doF, batchSize, maxActive, nPlanes, ActivePooling_BackwardPass<T>(diF, doF, batchSize, maxActive, nPlanes,
&_rules[0][0], average); &_rules[0][0], average);
......
...@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features, ...@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
const Int *rules, bool average) { const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1)); Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
assert(rowBatchSize > 0); assert(rowBatchSize > 0);
Int kernelBlockDim = std::min(nPlanes, (Int)32); Int kernelBlockDim = std::min(nPlanes, (Int)32);
...@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes, Int batchSize, Int maxActive, Int nPlanes,
const Int *rules, bool average) { const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1)); Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
assert(rowBatchSize > 0); assert(rowBatchSize > 0);
Int kernelBlockDim = std::min(nPlanes, (Int)32); Int kernelBlockDim = std::min(nPlanes, (Int)32);
......
...@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput( ...@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
output_features.resize_({input_features.size(0), convWeight.size(1)}); output_features.resize_({input_features.size(0), convWeight.size(1)});
dAffineReluTrivialConvolution_forward<T>( dAffineReluTrivialConvolution_forward<T>(
input_features.data<T>(), output_features.data<T>(), input_features.data_ptr<T>(), output_features.data_ptr<T>(),
affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(), affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
convWeight.size(0), input_features.stride(0), convWeight.size(1), convWeight.size(0), input_features.stride(0), convWeight.size(1),
output_features.size(1), input_features.size(0)); output_features.size(1), input_features.size(0));
return input_features.size(0) * input_features.size(1) * return input_features.size(0) * input_features.size(1) *
...@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward( ...@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
dAffineReluTrivialConvolution_backward_dW<T>( dAffineReluTrivialConvolution_backward_dW<T>(
input_features.data<T>(), d_input_features.data<T>(), input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
d_output_features.data<T>(), affineWeight.data<T>(), d_output_features.data_ptr<T>(), affineWeight.data_ptr<T>(),
d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(), d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
convWeight.data<T>(), d_convWeight.data<T>(), convWeight.size(0), convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), convWeight.size(0),
input_features.stride(0), convWeight.size(1), d_output_features.stride(0), input_features.stride(0), convWeight.size(1), d_output_features.stride(0),
input_features.size(0), additiveGrad); input_features.size(0), additiveGrad);
} }
...@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput( ...@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop}); output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_(); output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop; auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
cuda_AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1), cuda_AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1),
output_features.size(1), _rules, output_features.size(1), _rules,
_rules.size()); _rules.size());
...@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput( ...@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput(
d_input_features.resize_as_(input_features); d_input_features.resize_as_(input_features);
d_input_features.zero_(); d_input_features.zero_();
auto diF = d_input_features.data<T>() + nFeaturesToDrop; auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
cuda_AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1), cuda_AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1),
d_output_features.size(1), _rules, d_output_features.size(1), _rules,
_rules.size()); _rules.size());
...@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context ...@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context
Int nPlanes = context.size(1); Int nPlanes = context.size(1);
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
cuda_CopyFeaturesHelper_ForwardPass<T>(context.data<T>(), Context.data<T>(), cuda_CopyFeaturesHelper_ForwardPass<T>(context.data_ptr<T>(), Context.data_ptr<T>(),
rules.data<Int>(), nPlanes, nHot); rules.data_ptr<Int>(), nPlanes, nHot);
} }
template <typename T> template <typename T>
...@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules, ...@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
Int nPlanes = dcontext.size(1); Int nPlanes = dcontext.size(1);
Int nHot = rules.size(0) / 2; Int nHot = rules.size(0) / 2;
cuda_CopyFeaturesHelper_BackwardPass<T>( cuda_CopyFeaturesHelper_BackwardPass<T>(
dcontext.data<T>(), dContext.data<T>(), rules.data<Int>(), nPlanes, nHot); dcontext.data_ptr<T>(), dContext.data_ptr<T>(), rules.data_ptr<Int>(), nPlanes, nHot);
} }
...@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput( ...@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput(
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0); auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0); auto output_stride = output_features.stride(0);
bn_f(input_features.data<T>(), output_features.data<T>(), nPlanes, bn_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(), input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(), saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps, OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness); momentum, train, leakiness);
} }
...@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward( ...@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward(
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0); auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0); auto output_stride = output_features.stride(0);
bn_b(input_features.data<T>(), d_input_features.data<T>(), bn_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
output_features.data<T>(), d_output_features.data<T>(), nPlanes, output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(), input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(), saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias), OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
leakiness); leakiness);
......
...@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput( ...@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
output_features.resize_as_(input_features); output_features.resize_as_(input_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
bmd_f(input_features.data<T>(), output_features.data<T>(), noise.data<T>(), bmd_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), noise.data_ptr<T>(),
nActive, nPlanes, alpha); nActive, nPlanes, alpha);
} }
...@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput( ...@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features.resize_as_(d_output_features); d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0); auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1); auto nPlanes = input_features.size(1);
bmd_b(input_features.data<T>(), d_input_features.data<T>(), bmd_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
d_output_features.data<T>(), noise.data<T>(), nActive, nPlanes, alpha); d_output_features.data_ptr<T>(), noise.data_ptr<T>(), nActive, nPlanes, alpha);
} }
...@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput( ...@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups}); output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else else
output_features.zero_(); output_features.zero_();
...@@ -76,17 +76,17 @@ void cuda_Convolution_backward( ...@@ -76,17 +76,17 @@ void cuda_Convolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups, dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups); op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
...@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput( ...@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput(
output_features.resize_({nActive, op * nGroups}); output_features.resize_({nActive, op * nGroups});
if (nActive) { if (nActive) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActive); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
else else
output_features.zero_(); output_features.zero_();
...@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward( ...@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActive) { if (nActive) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups, dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups); op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActive); Convolution_bp_bias(doF, db, op, nActive);
} }
} }
...@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput( ...@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
output_features.resize_({nActive, op * nGroups}); output_features.resize_({nActive, op * nGroups});
if (nActive) { if (nActive) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActive); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
else else
output_features.zero_(); output_features.zero_();
...@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward( ...@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActive) { if (nActive) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups, dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups); op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActive); Convolution_bp_bias(doF, db, op, nActive);
} }
} }
...@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput( ...@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups}); output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else else
output_features.zero_(); output_features.zero_();
...@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward( ...@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups, dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups); op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
...@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput( ...@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups}); output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else else
output_features.zero_(); output_features.zero_();
...@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward( ...@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups, dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups); op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
......
...@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput( ...@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups}); output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
if (bias.numel()) if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut); Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else else
output_features.zero_(); output_features.zero_();
...@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward( ...@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward(
d_input_features.zero_(); d_input_features.zero_();
if (nActiveOut) { if (nActiveOut) {
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
auto w = weight.data<T>(); auto w = weight.data_ptr<T>();
auto dw = d_weight.data<T>(); auto dw = d_weight.data_ptr<T>();
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip,
ip * nGroups, op, op * nGroups, nGroups); ip * nGroups, op, op * nGroups, nGroups);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
......
...@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes}); output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_(); output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4); mode == 4);
} }
...@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput( ...@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput(
d_input_features.resize_({rules[0][2], nPlanes}); d_input_features.resize_({rules[0][2], nPlanes});
d_input_features.zero_(); d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4); mode == 4);
} }
...@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({rules[0][2], nPlanes}); output_features.resize_({rules[0][2], nPlanes});
output_features.zero_(); output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false); InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
} }
} }
...@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput( ...@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput(
d_input_features.resize_({nRows, nPlanes}); d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_(); d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
false); false);
} }
...@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m, ...@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes}); output_features.resize_({*m.inputNActive, nPlanes});
} else { } else {
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4); mode == 4);
} }
...@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput( ...@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput(
d_input_features.resize_({rules[0][2], rules[0][3], nPlanes}); d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
d_input_features.zero_(); d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4); mode == 4);
} }
...@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput( ...@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput(
output_features.resize_({rules[0][2], rules[0][3], nPlanes}); output_features.resize_({rules[0][2], rules[0][3], nPlanes});
output_features.zero_(); output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>(); auto iF = input_features.data_ptr<T>();
auto oF = output_features.data<T>(); auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false); InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
} }
} }
...@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput( ...@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput(
d_input_features.resize_({nRows, nPlanes}); d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_(); d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT)); auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>(); auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data<T>(); auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data<Int>(); Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb, InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
false); false);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment