Commit 2ad7baf8 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

detach; batch_size; data_ptr

parent 16e4df34
......@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput(
output_features.resize_({batchSize, nPlanes});
output_features.zero_();
ActivePooling_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), batchSize, maxActive,
ActivePooling_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data_ptr<T>(), batchSize, maxActive,
nPlanes, _rules, average);
}
......@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
ActivePooling_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), batchSize,
ActivePooling_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data_ptr<T>(), batchSize,
maxActive, nPlanes, _rules, average);
}
......@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/ at::Tensor &affineBias, /*float*/ at::Tensor &convWeight) {
output_features.resize_({input_features.size(0), convWeight.size(1)});
AffineReluTrivialConvolution_ForwardPass(
input_features.data<T>(), convWeight.size(0), input_features.stride(0),
output_features.data<T>(), convWeight.size(1), output_features.stride(0),
affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(),
input_features.data_ptr<T>(), convWeight.size(0), input_features.stride(0),
output_features.data_ptr<T>(), convWeight.size(1), output_features.stride(0),
affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
input_features.size(0));
return input_features.size(0) * input_features.size(1) *
output_features.size(1);
......@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward(
d_input_features.resize_as_(input_features);
AffineReluTrivialConvolution_BackwardPass(
input_features.data<T>(), d_input_features.data<T>(), convWeight.size(0),
input_features.stride(0), d_output_features.data<T>(), convWeight.size(1),
d_output_features.stride(0), affineWeight.data<T>(),
d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(),
convWeight.data<T>(), d_convWeight.data<T>(), input_features.size(0),
input_features.data_ptr<T>(), d_input_features.data_ptr<T>(), convWeight.size(0),
input_features.stride(0), d_output_features.data_ptr<T>(), convWeight.size(1),
d_output_features.stride(0), affineWeight.data_ptr<T>(),
d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), input_features.size(0),
additiveGrad);
}
......@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data_ptr<T>();
for (const auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>();
auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data_ptr<T>();
for (const auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at::Tensor &Context) {
Int nHot = rules.size(0) / 2;
Int nPlanes = context.size(1);
auto iF = context.data<T>();
auto oF = Context.data<T>();
auto r = rules.data<Int>();
auto iF = context.data_ptr<T>();
auto oF = Context.data_ptr<T>();
auto r = rules.data_ptr<Int>();
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) {
......@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at::Tensor &dContext) {
Int nHot = rules.size(0) / 2;
Int nPlanes = dcontext.size(1);
auto iF = dcontext.data<T>();
auto oF = dContext.data<T>();
auto r = rules.data<Int>();
auto iF = dcontext.data_ptr<T>();
auto oF = dContext.data_ptr<T>();
auto r = rules.data_ptr<Int>();
Int outSite;
#pragma omp parallel for private(outSite)
for (outSite = 0; outSite < nHot; outSite++) {
......
......@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput(
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness);
}
......@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward(
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_BackwardPass<T>(
input_features.data<T>(), d_input_features.data<T>(),
output_features.data<T>(), d_output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
leakiness);
......
......@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
output_features.resize_as_(input_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto nz = noise.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto nz = noise.data_ptr<T>();
for (Int row = 0; row < nActive; row++)
for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
......@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto nz = noise.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto nz = noise.data_ptr<T>();
for (Int row = 0; row < nActive; row++)
for (Int plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
......
......@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
Int groups) {
auto planes = src.size(1) / groups;
auto target = at::empty({groups, nRules, planes}, src.options());
auto s_ptr = src.data<T>();
auto t_ptr = target.data<T>();
auto s_ptr = src.data_ptr<T>();
auto t_ptr = target.data_ptr<T>();
#pragma omp parallel for
for (Int i = 0; i < nRules; ++i) {
for (Int g = 0; g < groups; ++g) {
......@@ -29,8 +29,8 @@ template <typename T>
void rule_index_add_(at::Tensor &target, at::Tensor &src, Int nRules,
const Int *rules, Int groups) {
auto planes = target.size(1) / groups;
auto s_ptr = src.data<T>();
auto t_ptr = target.data<T>();
auto s_ptr = src.data_ptr<T>();
auto t_ptr = target.data_ptr<T>();
#pragma omp parallel for
for (Int i = 0; i < nRules; ++i) {
for (Int g = 0; g < groups; ++g) {
......
......@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
} else {
output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), nRows, maxActive,
InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
} else {
d_input_features.resize_({rules[0][2], nPlanes});
d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), nRows, maxActive,
InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
} else {
output_features.resize_({rules[0][2], nPlanes});
output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(),
input_features.data<T>(), nRows, maxActive,
InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
......@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
} else {
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(),
d_input_features.data<T>(), nRows, maxActive,
InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
d_input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
......@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
} else {
output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), nRows, maxActive,
InputLayer_ForwardPass<T>(input_features.data_ptr<T>(),
output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
} else {
d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), nRows, maxActive,
InputLayer_BackwardPass<T>(d_input_features.data_ptr<T>(),
d_output_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
} else {
output_features.resize_({rules[0][2], rules[0][3], nPlanes});
output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(),
input_features.data<T>(), nRows, maxActive,
InputLayer_BackwardPass<T>(output_features.data_ptr<T>(),
input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
......@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput(
} else {
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(),
d_input_features.data<T>(), nRows, maxActive,
InputLayer_ForwardPass<T>(d_output_features.data_ptr<T>(),
d_input_features.data_ptr<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
......@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &output_features,
T alpha) {
output_features.resize_as_(input_features);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto n = input_features.numel();
for (Int i = 0; i < n; i++) {
......@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
/*float*/ at::Tensor &d_output_features,
T alpha) {
d_input_features.resize_as_(d_output_features);
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto n = d_input_features.numel();
for (Int i = 0; i < n; i++) {
......
......@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......
......@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput(
std::array<long, Dimension + 2> sz;
sz[0] = m.grids.begin()->second.size(); // batch size
sz[1] = nPlanes;
long *in_sz = inputSize.data<long>();
long *in_sz = inputSize.data_ptr<long>();
for (Int i = 0; i < Dimension; ++i)
sz[i + 2] = in_sz[i];
output_features.resize_(sz);
......@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput(
if (input_features.ndimension() == 2) {
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
Int _nPlanes = input_features.size(1);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
long spatialVolume = inputSize.prod().data<long>()[0];
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
long spatialVolume = inputSize.prod().data_ptr<long>()[0];
for (auto &r : _rules) {
Int nHot = r.size() / 2;
SparseToDense_ForwardPass<T>(iF, oF, _nPlanes, spatialVolume, &r[0],
......@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput(
d_input_features.zero_();
if (input_features.ndimension() == 2) {
const auto &_rules = m.getSparseToDenseRuleBook(inputSize, true);
long spatialVolume = inputSize.prod().data<long>()[0];
long spatialVolume = inputSize.prod().data_ptr<long>()[0];
Int _nPlanes = d_input_features.size(1);
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
SparseToDense_BackwardPass<T>(diF, doF, _nPlanes, spatialVolume, &r[0],
......
......@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput(
const auto &_rules =
m.getRuleBook(outputSize, inputSize, poolSize, poolStride, true);
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>();
auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data_ptr<T>();
for (auto &r : _rules) {
Int nHot = r.size() / 2;
......
......@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput(
output_features.resize_({batchSize, nPlanes});
output_features.zero_();
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
ActivePooling_ForwardPass<T>(iF, oF, batchSize, maxActive, nPlanes,
&_rules[0][0], average);
}
......@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
ActivePooling_BackwardPass<T>(diF, doF, batchSize, maxActive, nPlanes,
&_rules[0][0], average);
......
......@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>();
Int *rb = rulesBuffer.data_ptr<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
assert(rowBatchSize > 0);
Int kernelBlockDim = std::min(nPlanes, (Int)32);
......@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
const Int *rules, bool average) {
auto rulesBuffer = at::empty({1<<22}, at::CUDA(at_kINT));
Int *rb = rulesBuffer.data<Int>();
Int *rb = rulesBuffer.data_ptr<Int>();
Int rowBatchSize = std::min((Int)32768, (1 << 22) / (maxActive + 1));
assert(rowBatchSize > 0);
Int kernelBlockDim = std::min(nPlanes, (Int)32);
......
......@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
output_features.resize_({input_features.size(0), convWeight.size(1)});
dAffineReluTrivialConvolution_forward<T>(
input_features.data<T>(), output_features.data<T>(),
affineWeight.data<T>(), affineBias.data<T>(), convWeight.data<T>(),
input_features.data_ptr<T>(), output_features.data_ptr<T>(),
affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), convWeight.data_ptr<T>(),
convWeight.size(0), input_features.stride(0), convWeight.size(1),
output_features.size(1), input_features.size(0));
return input_features.size(0) * input_features.size(1) *
......@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward(
d_input_features.resize_as_(input_features);
dAffineReluTrivialConvolution_backward_dW<T>(
input_features.data<T>(), d_input_features.data<T>(),
d_output_features.data<T>(), affineWeight.data<T>(),
d_affineWeight.data<T>(), affineBias.data<T>(), d_affineBias.data<T>(),
convWeight.data<T>(), d_convWeight.data<T>(), convWeight.size(0),
input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
d_output_features.data_ptr<T>(), affineWeight.data_ptr<T>(),
d_affineWeight.data_ptr<T>(), affineBias.data_ptr<T>(), d_affineBias.data_ptr<T>(),
convWeight.data_ptr<T>(), d_convWeight.data_ptr<T>(), convWeight.size(0),
input_features.stride(0), convWeight.size(1), d_output_features.stride(0),
input_features.size(0), additiveGrad);
}
......@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput(
output_features.resize_({nActive, input_features.size(1) - nFeaturesToDrop});
output_features.zero_();
auto iF = input_features.data<T>() + nFeaturesToDrop;
auto oF = output_features.data<T>();
auto iF = input_features.data_ptr<T>() + nFeaturesToDrop;
auto oF = output_features.data_ptr<T>();
cuda_AveragePooling_ForwardPass<T>(iF, oF, nPlanes, input_features.size(1),
output_features.size(1), _rules,
_rules.size());
......@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput(
d_input_features.resize_as_(input_features);
d_input_features.zero_();
auto diF = d_input_features.data<T>() + nFeaturesToDrop;
auto doF = d_output_features.data<T>();
auto diF = d_input_features.data_ptr<T>() + nFeaturesToDrop;
auto doF = d_output_features.data_ptr<T>();
cuda_AveragePooling_BackwardPass<T>(diF, doF, nPlanes, input_features.size(1),
d_output_features.size(1), _rules,
_rules.size());
......@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context
Int nPlanes = context.size(1);
Int nHot = rules.size(0) / 2;
cuda_CopyFeaturesHelper_ForwardPass<T>(context.data<T>(), Context.data<T>(),
rules.data<Int>(), nPlanes, nHot);
cuda_CopyFeaturesHelper_ForwardPass<T>(context.data_ptr<T>(), Context.data_ptr<T>(),
rules.data_ptr<Int>(), nPlanes, nHot);
}
template <typename T>
......@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
Int nPlanes = dcontext.size(1);
Int nHot = rules.size(0) / 2;
cuda_CopyFeaturesHelper_BackwardPass<T>(
dcontext.data<T>(), dContext.data<T>(), rules.data<Int>(), nPlanes, nHot);
dcontext.data_ptr<T>(), dContext.data_ptr<T>(), rules.data_ptr<Int>(), nPlanes, nHot);
}
......@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput(
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
bn_f(input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
bn_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness);
}
......@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward(
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
bn_b(input_features.data<T>(), d_input_features.data<T>(),
output_features.data<T>(), d_output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
bn_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
output_features.data_ptr<T>(), d_output_features.data_ptr<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data_ptr<T>(),
saveInvStd.data_ptr<T>(), runningMean.data_ptr<T>(), runningVar.data_ptr<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias),
OptionalTensorData<T>(d_weight), OptionalTensorData<T>(d_bias),
leakiness);
......
......@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
output_features.resize_as_(input_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
bmd_f(input_features.data<T>(), output_features.data<T>(), noise.data<T>(),
bmd_f(input_features.data_ptr<T>(), output_features.data_ptr<T>(), noise.data_ptr<T>(),
nActive, nPlanes, alpha);
}
......@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
bmd_b(input_features.data<T>(), d_input_features.data<T>(),
d_output_features.data<T>(), noise.data<T>(), nActive, nPlanes, alpha);
bmd_b(input_features.data_ptr<T>(), d_input_features.data_ptr<T>(),
d_output_features.data_ptr<T>(), noise.data_ptr<T>(), nActive, nPlanes, alpha);
}
......@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else
output_features.zero_();
......@@ -76,17 +76,17 @@ void cuda_Convolution_backward(
d_input_features.zero_();
if (nActiveOut) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
......@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput(
output_features.resize_({nActive, op * nGroups});
if (nActive) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActive);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
else
output_features.zero_();
......@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward(
d_input_features.zero_();
if (nActive) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActive);
}
}
......@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
output_features.resize_({nActive, op * nGroups});
if (nActive) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActive);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActive);
else
output_features.zero_();
......@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
d_input_features.zero_();
if (nActive) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActive);
}
}
......@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else
output_features.zero_();
......@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward(
d_input_features.zero_();
if (nActiveOut) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
......@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else
output_features.zero_();
......@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward(
d_input_features.zero_();
if (nActiveOut) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dConvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip * nGroups,
op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
......
......@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput(
output_features.resize_({nActiveOut, op * nGroups});
if (nActiveOut) {
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
auto w = weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
if (bias.numel())
Convolution_fp_bias(oF, bias.data<T>(), op, nActiveOut);
Convolution_fp_bias(oF, bias.data_ptr<T>(), op, nActiveOut);
else
output_features.zero_();
......@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward(
d_input_features.zero_();
if (nActiveOut) {
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
auto w = weight.data<T>();
auto dw = d_weight.data<T>();
auto iF = input_features.data_ptr<T>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
auto w = weight.data_ptr<T>();
auto dw = d_weight.data_ptr<T>();
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip,
ip * nGroups, op, op * nGroups, nGroups);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
auto db = d_bias.data_ptr<T>();
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
......
......@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4);
}
......@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput(
d_input_features.resize_({rules[0][2], nPlanes});
d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4);
}
......@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({rules[0][2], nPlanes});
output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
}
}
......@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput(
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
false);
}
......@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes});
} else {
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(iF, oF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4);
}
......@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput(
d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(diF, doF, nRows, maxActive, nPlanes, &rules[1][0], rb,
mode == 4);
}
......@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput(
output_features.resize_({rules[0][2], rules[0][3], nPlanes});
output_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto iF = input_features.data_ptr<T>();
auto oF = output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_bp(oF, iF, nRows, maxActive, nPlanes, &rules[1][0], rb, false);
}
}
......@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput(
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
auto rulesBuffer = at::empty({(int)rules[1].size()}, at::CUDA(at_kINT));
auto diF = d_input_features.data<T>();
auto doF = d_output_features.data<T>();
Int *rb = rulesBuffer.data<Int>();
auto diF = d_input_features.data_ptr<T>();
auto doF = d_output_features.data_ptr<T>();
Int *rb = rulesBuffer.data_ptr<Int>();
InputLayer_fp<T>(doF, diF, nRows, maxActive, nPlanes, &rules[1][0], rb,
false);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment