"git@developer.sourcefind.cn:OpenDAS/openpcdet.git" did not exist on "aaf9cbeb9c9b5753eba821e652c80160c558184d"
Commit de3743f6 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Factor out CUDA code

parent f0407b36
SparseConvNetTorch/build/
*.t7
t7/
*.pth
*.o
*.a
*.so
......@@ -11,3 +10,5 @@ pickle
PyTorch/sparseconvnet.egg-info/
PyTorch/sparseconvnet/SCN/__init__.py
sparseconvnet.egg-info
*.zip
*.rar
#!/bin/bash
rm -rf build/ sparseconvnet.egg-info sparseconvnet_SCN*.so
rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet_SCN*.so
python setup.py install
......@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
p['lr_decay'] = 4e-2
p['weight_decay'] = 1e-4
p['momentum'] = 0.9
p['check_point'] = True
p['check_point'] = False
p['use_cuda'] = torch.cuda.is_available()
dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'
......
......@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
p['lr_decay'] = 4e-2
p['weight_decay'] = 1e-4
p['momentum'] = 0.9
p['check_point'] = True
p['check_point'] = False
p['use_cuda'] = torch.cuda.is_available()
dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'
......
......@@ -4,8 +4,7 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torchnet
import torch, torch.utils.data
import sparseconvnet as scn
import pickle
import math
......
......@@ -4,6 +4,7 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#!/bin/bash
set -e
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00208/Online%20Handwritten%20Assamese%20Characters%20Dataset.rar
unrar e -cl -y "Online Handwritten Assamese Characters Dataset.rar"
mkdir tmp
......
......@@ -24,12 +24,13 @@ setup(
packages=['sparseconvnet','sparseconvnet.SCN'],
ext_modules=[
CUDAExtension('sparseconvnet_SCN',
['sparseconvnet/SCN/pybind_cuda.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp', 'sparseconvnet/SCN/instantiate_cuda.cu'],
[
'sparseconvnet/SCN/cuda.cu', 'sparseconvnet/SCN/sparseconvnet_cuda.cpp', 'sparseconvnet/SCN/pybind.cpp'],
include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
extra_compile_args=extra)
if torch.cuda.is_available() else
CppExtension('sparseconvnet_SCN',
['sparseconvnet/SCN/pybind_cpu.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp'],
['sparseconvnet/SCN/pybind.cpp', 'sparseconvnet/SCN/sparseconvnet_cpu.cpp'],
include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
extra_compile_args=extra['cxx'])],
cmdclass={'build_ext': BuildExtension},
......
......@@ -4,7 +4,39 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "ActivePooling.h"
// Assume output_features and d_input_features have been zero-ed
template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) {
T *inp = &input_features[(*r++) * nPlanes];
for (Int plane = 0; plane < nPlanes; plane++)
out[plane] += inp[plane] * multiplier;
}
}
}
template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &d_output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) {
T *inp = &d_input_features[(*r++) * nPlanes];
for (Int plane = 0; plane < nPlanes; plane++)
inp[plane] = out[plane] * multiplier;
}
}
}
template <typename T, Int Dimension>
void cpu_ActivePooling_updateOutput(
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_ACTIVEPOOLING_H
#define CPU_ACTIVEPOOLING_H
// Assume output_features and d_input_features have been zero-ed
template <typename T>
void ActivePooling_ForwardPass(T *input_features, T *output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) {
T *inp = &input_features[(*r++) * nPlanes];
for (Int plane = 0; plane < nPlanes; plane++)
out[plane] += inp[plane] * multiplier;
}
}
}
template <typename T>
void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int batchSize, Int maxActive, Int nPlanes,
RuleBook &rules, bool average) {
for (Int outSite = 0; outSite < batchSize; outSite++) {
T *out = &d_output_features[outSite * nPlanes];
Int *r = &rules[0][outSite * (maxActive + 1)];
Int nActive = *r++;
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
while (nActive-- > 0) {
T *inp = &d_input_features[(*r++) * nPlanes];
for (Int plane = 0; plane < nPlanes; plane++)
inp[plane] = out[plane] * multiplier;
}
}
}
#endif /* CPU_ACTIVEPOOLING_H */
......@@ -4,7 +4,68 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "AffineReluTrivialConvolution.h"
#include <cstring>
template <typename T>
void AffineReluTrivialConvolution_ForwardPass(
T *input_features, Int input_nPlanes, Int input_stride, T *output_features,
Int output_nPlanes, Int output_stride, T *affineWeight, T *affineBias,
T *convWeight, Int nActive) {
for (Int row = 0; row < nActive; row++) {
for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < input_nPlanes; j++) {
T i = input_features[row * input_stride + j] * affineWeight[j] +
affineBias[j];
i = (i > 0) ? i : 0;
sum += i * convWeight[j * output_nPlanes + column];
}
output_features[row * output_stride + column] = sum;
}
}
}
template <typename T>
void AffineReluTrivialConvolution_BackwardPass(
T *input_features, T *d_input_features, Int input_nPlanes, Int input_stride,
T *d_output_features, Int output_nPlanes, Int output_stride,
T *affineWeight, T *dAffineWeight, T *affineBias, T *dAffineBias,
T *convWeight, T *dConvWeight, Int nActive, bool additiveGrad) {
for (Int row = 0; row < input_nPlanes; row++) {
for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < nActive; j++) {
T i = input_features[j * input_stride + row] * affineWeight[row] +
affineBias[row];
i = (i > 0) ? i : 0;
sum += i * d_output_features[j * output_stride + column];
}
dConvWeight[row * output_nPlanes + column] += sum;
}
}
for (Int row = 0; row < nActive; row++) {
for (Int column = 0; column < input_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < output_nPlanes; j++) {
sum += d_output_features[row * output_stride + j] *
convWeight[column * output_nPlanes + j];
}
T i = input_features[row * input_stride + column] * affineWeight[column] +
affineBias[column];
if (i <= 0) // d_ReLU
sum = 0;
dAffineWeight[column] += sum * i;
dAffineBias[column] += sum;
sum *= affineWeight[column];
if (additiveGrad)
d_input_features[row * input_stride + column] += sum;
else
d_input_features[row * input_stride + column] = sum;
}
}
}
template <typename T>
double cpu_AffineReluTrivialConvolution_updateOutput(
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_AffineReluTrivialConvolution_H
#define CPU_AffineReluTrivialConvolution_H
#include <cstring>
template <typename T>
void AffineReluTrivialConvolution_ForwardPass(
T *input_features, Int input_nPlanes, Int input_stride, T *output_features,
Int output_nPlanes, Int output_stride, T *affineWeight, T *affineBias,
T *convWeight, Int nActive) {
for (Int row = 0; row < nActive; row++) {
for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < input_nPlanes; j++) {
T i = input_features[row * input_stride + j] * affineWeight[j] +
affineBias[j];
i = (i > 0) ? i : 0;
sum += i * convWeight[j * output_nPlanes + column];
}
output_features[row * output_stride + column] = sum;
}
}
}
template <typename T>
void AffineReluTrivialConvolution_BackwardPass(
T *input_features, T *d_input_features, Int input_nPlanes, Int input_stride,
T *d_output_features, Int output_nPlanes, Int output_stride,
T *affineWeight, T *dAffineWeight, T *affineBias, T *dAffineBias,
T *convWeight, T *dConvWeight, Int nActive, bool additiveGrad) {
for (Int row = 0; row < input_nPlanes; row++) {
for (Int column = 0; column < output_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < nActive; j++) {
T i = input_features[j * input_stride + row] * affineWeight[row] +
affineBias[row];
i = (i > 0) ? i : 0;
sum += i * d_output_features[j * output_stride + column];
}
dConvWeight[row * output_nPlanes + column] += sum;
}
}
for (Int row = 0; row < nActive; row++) {
for (Int column = 0; column < input_nPlanes; column++) {
T sum = 0;
for (Int j = 0; j < output_nPlanes; j++) {
sum += d_output_features[row * output_stride + j] *
convWeight[column * output_nPlanes + j];
}
T i = input_features[row * input_stride + column] * affineWeight[column] +
affineBias[column];
if (i <= 0) // d_ReLU
sum = 0;
dAffineWeight[column] += sum * i;
dAffineBias[column] += sum;
sum *= affineWeight[column];
if (additiveGrad)
d_input_features[row * input_stride + column] += sum;
else
d_input_features[row * input_stride + column] = sum;
}
}
}
#endif /* CPU_AffineReluTrivialConvolution_H */
......@@ -4,7 +4,31 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "AveragePooling.h"
template <typename T>
void AveragePooling_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int filterVolume) {
for (Int outSite = 0; outSite < nHot; outSite++) {
Int i = rules[2 * outSite] * input_stride;
Int o = rules[2 * outSite + 1] * output_stride;
for (Int plane = 0; plane < nPlanes; plane++)
output_features[o + plane] += input_features[i + plane] / filterVolume;
}
}
template <typename T>
void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int filterVolume) {
for (Int outSite = 0; outSite < nHot; outSite++) {
Int i = rules[2 * outSite] * input_stride;
Int o = rules[2 * outSite + 1] * output_stride;
for (Int plane = 0; plane < nPlanes; plane++)
d_input_features[i + plane] +=
d_output_features[o + plane] / filterVolume;
}
}
template <typename T, Int Dimension>
void cpu_AveragePooling_updateOutput(
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_AVERAGEPOOLING_H
#define CPU_AVERAGEPOOLING_H
template <typename T>
void AveragePooling_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int filterVolume) {
for (Int outSite = 0; outSite < nHot; outSite++) {
Int i = rules[2 * outSite] * input_stride;
Int o = rules[2 * outSite + 1] * output_stride;
for (Int plane = 0; plane < nPlanes; plane++)
output_features[o + plane] += input_features[i + plane] / filterVolume;
}
}
template <typename T>
void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int *rules, Int nHot,
Int filterVolume) {
for (Int outSite = 0; outSite < nHot; outSite++) {
Int i = rules[2 * outSite] * input_stride;
Int o = rules[2 * outSite + 1] * output_stride;
for (Int plane = 0; plane < nPlanes; plane++)
d_input_features[i + plane] +=
d_output_features[o + plane] / filterVolume;
}
}
#endif /* CPU_AVERAGEPOOLING_H */
......@@ -4,46 +4,125 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "BatchNormalization.h"
#include <vector>
// in/output_stride is normally the same as nPlanes; allow other values to act
// on a subset of columns, i.e. an inplace DenseNet blocks
template <typename T>
void cpu_BatchNormalization_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum,
bool train, T leakiness) {
output_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
saveInvStd.data<T>(), runningMean.data<T>(), runningVar.data<T>(),
OptionalTensorData<T>(weight), OptionalTensorData<T>(bias), eps,
momentum, train, leakiness);
void BatchNormalization_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int nActive, T *saveMean,
T *saveInvStd, T *runningMean,
T *runningVar, T *weight, T *bias, T eps,
T momentum, bool train, T leakiness) {
if (train) {
std::memset(saveMean, 0, nPlanes * sizeof(T));
std::memset(saveInvStd, 0, nPlanes * sizeof(T));
for (Int row = 0, ci = 0; row < nActive;
row++, ci += input_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++) {
saveMean[plane] += input_features[ci];
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
saveMean[plane] /= nActive;
runningMean[plane] =
momentum * runningMean[plane] + (1 - momentum) * saveMean[plane];
}
for (Int row = 0, ci = 0; row < nActive;
row++, ci += input_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++) {
saveInvStd[plane] +=
(input_features[ci] - saveMean[plane]) *
(input_features[ci] - saveMean[plane]); // accumulate sum-squares
// before inverse square
// rooting
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
runningVar[plane] = momentum * runningVar[plane] +
(1 - momentum) * saveInvStd[plane] / (nActive - 1);
saveInvStd[plane] = powf(saveInvStd[plane] / nActive + eps, -0.5);
}
} else {
for (Int plane = 0; plane < nPlanes; plane++) {
saveMean[plane] = runningMean[plane];
saveInvStd[plane] = powf(runningVar[plane] + eps, -0.5);
}
}
std::vector<T> w(nPlanes);
std::vector<T> b(nPlanes);
for (Int plane = 0; plane < nPlanes; plane++) {
w[plane] = saveInvStd[plane] * (weight ? weight[plane] : 1);
b[plane] = -saveMean[plane] * w[plane] + (bias ? bias[plane] : 0);
}
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
T out = input_features[ci] * w[plane] + b[plane];
out = (out > 0) ? out : (out * leakiness);
output_features[co] = out;
}
}
}
template <typename T>
void BatchNormalization_BackwardPass(T *input_features, T *d_input_features,
T *output_features, T *d_output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int nActive,
T *saveMean, T *saveInvStd, T *runningMean,
T *runningVar, T *weight, T *bias,
T *d_weight, T *d_bias, T leakiness) {
std::vector<T> gradMean(nPlanes);
std::vector<T> dotp(nPlanes);
std::vector<T> k(nPlanes);
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
T d = d_output_features[co];
d = (output_features[co] > 0) ? d : (d * leakiness);
d_output_features[co] = d;
gradMean[plane] += d;
dotp[plane] += (input_features[ci] - saveMean[plane]) * d;
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
if (d_bias)
d_bias[plane] = gradMean[plane]; // sum of grads, really, until ...
gradMean[plane] /= nActive; // ...now
k[plane] = dotp[plane] * saveInvStd[plane] * saveInvStd[plane] / nActive;
}
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
d_input_features[ci] =
(d_output_features[co] - gradMean[plane] -
(input_features[ci] - saveMean[plane]) * k[plane]) *
saveInvStd[plane] * (weight ? weight[plane] : 1);
}
}
if (d_weight)
for (Int plane = 0; plane < nPlanes; plane++) {
d_weight[plane] = dotp[plane] * saveInvStd[plane];
}
}
template <typename T>
void cpu_BatchNormalizationInTensor_updateOutput(
void cpu_BatchNormalization_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor saveMean,
/*float*/ at::Tensor saveInvStd, /*float*/ at::Tensor runningMean,
/*float*/ at::Tensor runningVar,
/*float*/ at::Tensor weight, /*float*/ at::Tensor bias, T eps, T momentum,
bool train, T leakiness) {
output_features.resize_as_(input_features);
if (input_features.ndimension() == 2) {
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
auto input_stride = input_features.stride(0);
auto output_stride = output_features.stride(0);
BatchNormalization_ForwardPass<T>(
input_features.data<T>(), output_features.data<T>(), nPlanes,
input_stride, output_stride, nActive, saveMean.data<T>(),
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_BATCHNORMALIZATION_H
#define CPU_BATCHNORMALIZATION_H
#include <vector>
// in/output_stride is normally the same as nPlanes; allow other values to act
// on a subset of columns, i.e. an inplace DenseNet blocks
template <typename T>
void BatchNormalization_ForwardPass(T *input_features, T *output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int nActive,
T *saveMean, T *saveInvStd, T *runningMean,
T *runningVar, T *weight, T *bias, T eps,
T momentum, bool train, T leakiness) {
if (train) {
std::memset(saveMean, 0, nPlanes * sizeof(T));
std::memset(saveInvStd, 0, nPlanes * sizeof(T));
for (Int row = 0, ci = 0; row < nActive;
row++, ci += input_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++) {
saveMean[plane] += input_features[ci];
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
saveMean[plane] /= nActive;
runningMean[plane] =
momentum * runningMean[plane] + (1 - momentum) * saveMean[plane];
}
for (Int row = 0, ci = 0; row < nActive;
row++, ci += input_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++) {
saveInvStd[plane] +=
(input_features[ci] - saveMean[plane]) *
(input_features[ci] - saveMean[plane]); // accumulate sum-squares
// before inverse square
// rooting
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
runningVar[plane] = momentum * runningVar[plane] +
(1 - momentum) * saveInvStd[plane] / (nActive - 1);
saveInvStd[plane] = powf(saveInvStd[plane] / nActive + eps, -0.5);
}
} else {
for (Int plane = 0; plane < nPlanes; plane++) {
saveMean[plane] = runningMean[plane];
saveInvStd[plane] = powf(runningVar[plane] + eps, -0.5);
}
}
std::vector<T> w(nPlanes);
std::vector<T> b(nPlanes);
for (Int plane = 0; plane < nPlanes; plane++) {
w[plane] = saveInvStd[plane] * (weight ? weight[plane] : 1);
b[plane] = -saveMean[plane] * w[plane] + (bias ? bias[plane] : 0);
}
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
T out = input_features[ci] * w[plane] + b[plane];
out = (out > 0) ? out : (out * leakiness);
output_features[co] = out;
}
}
}
template <typename T>
void BatchNormalization_BackwardPass(T *input_features, T *d_input_features,
T *output_features, T *d_output_features,
Int nPlanes, Int input_stride,
Int output_stride, Int nActive,
T *saveMean, T *saveInvStd, T *runningMean,
T *runningVar, T *weight, T *bias,
T *d_weight, T *d_bias, T leakiness) {
std::vector<T> gradMean(nPlanes);
std::vector<T> dotp(nPlanes);
std::vector<T> k(nPlanes);
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
T d = d_output_features[co];
d = (output_features[co] > 0) ? d : (d * leakiness);
d_output_features[co] = d;
gradMean[plane] += d;
dotp[plane] += (input_features[ci] - saveMean[plane]) * d;
}
}
for (Int plane = 0; plane < nPlanes; plane++) {
if (d_bias)
d_bias[plane] = gradMean[plane]; // sum of grads, really, until ...
gradMean[plane] /= nActive; // ...now
k[plane] = dotp[plane] * saveInvStd[plane] * saveInvStd[plane] / nActive;
}
for (Int row = 0, ci = 0, co = 0; row < nActive;
row++, ci += input_stride - nPlanes, co += output_stride - nPlanes) {
for (Int plane = 0; plane < nPlanes; plane++, ci++, co++) {
d_input_features[ci] =
(d_output_features[co] - gradMean[plane] -
(input_features[ci] - saveMean[plane]) * k[plane]) *
saveInvStd[plane] * (weight ? weight[plane] : 1);
}
}
if (d_weight)
for (Int plane = 0; plane < nPlanes; plane++) {
d_weight[plane] = dotp[plane] * saveInvStd[plane];
}
}
#endif /* CPU_BATCHNORMALIZATION_H */
......@@ -7,7 +7,7 @@
template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateOutput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor output_features,
/*float*/ at::Tensor noise, float alpha) {
/*float*/ at::Tensor noise, T alpha) {
output_features.resize_as_(input_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
......@@ -23,7 +23,7 @@ template <typename T>
void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
/*float*/ at::Tensor input_features, /*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features, /*float*/ at::Tensor noise,
float alpha) {
T alpha) {
d_input_features.resize_as_(d_output_features);
auto nActive = input_features.size(0);
auto nPlanes = input_features.size(1);
......
......@@ -11,7 +11,9 @@ void rule_index_select(at::Tensor target, at::Tensor src, Int nRules,
auto t_ptr = target.data<T>();
auto s_ptr = src.data<T>();
auto n = target.size(1);
for (int i = 0; i < nRules; ++i)
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < nRules; ++i)
std::memcpy(t_ptr + i * n, s_ptr + rules[2 * i] * n, sizeof(T) * n);
}
template <typename T>
......@@ -20,7 +22,9 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules,
auto t_ptr = target.data<T>();
auto s_ptr = src.data<T>();
auto n = target.size(1);
for (int i = 0; i < nRules; ++i) {
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < nRules; ++i) {
auto t = t_ptr + rules[2 * i] * n;
auto s = s_ptr + i * n;
for (int j = 0; j < n; ++j)
......
......@@ -4,7 +4,43 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "IOLayers.h"
#include <cstring>
// Assume output and d_input_features have been zero-ed
template <typename T>
void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
Int maxActive, Int nPlanes, Int *rules,
bool average) {
for (Int row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (Int i = 1; i <= nActive; ++i) {
auto in_f = input_features + nPlanes * rules[i];
for (Int plane = 0; plane < nPlanes; plane++) {
output_features[plane] += multiplier * in_f[plane];
}
}
output_features += nPlanes;
rules += 1 + maxActive;
}
}
template <typename T>
void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
Int nRows, Int maxActive, Int nPlanes, Int *rules,
bool average) {
for (Int row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (Int i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + nPlanes * rules[i];
for (Int plane = 0; plane < nPlanes; plane++)
d_in_f[plane] += multiplier * d_output_features[plane];
}
d_output_features += nPlanes;
rules += 1 + maxActive;
}
}
template <typename T, Int Dimension>
void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
......@@ -26,8 +62,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
output_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
template <typename T, Int Dimension>
......@@ -47,8 +83,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features.resize_({rules[0][2], nPlanes});
d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
d_output_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -69,8 +105,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({rules[0][2], nPlanes});
output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(),
input_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], false);
input_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
template <typename T, Int Dimension>
......@@ -90,8 +126,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(),
d_input_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], false);
d_input_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
......@@ -116,8 +152,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({*m.inputNActive, nPlanes});
output_features.zero_();
InputLayer_ForwardPass<T>(input_features.data<T>(),
output_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
output_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
template <typename T, Int Dimension>
......@@ -139,8 +175,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features.resize_({rules[0][2], rules[0][3], nPlanes});
d_input_features.zero_();
InputLayer_BackwardPass<T>(d_input_features.data<T>(),
d_output_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], mode == 4);
d_output_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], mode == 4);
}
}
......@@ -162,8 +198,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
output_features.resize_({rules[0][2], rules[0][3], nPlanes});
output_features.zero_();
InputLayer_BackwardPass<T>(output_features.data<T>(),
input_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], false);
input_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
template <typename T, Int Dimension>
......@@ -184,7 +220,7 @@ void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features.resize_({nRows, nPlanes});
d_input_features.zero_();
InputLayer_ForwardPass<T>(d_output_features.data<T>(),
d_input_features.data<T>(), nRows,
maxActive, nPlanes, &rules[1][0], false);
d_input_features.data<T>(), nRows, maxActive,
nPlanes, &rules[1][0], false);
}
}
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_IOLAYERS_H
#define CPU_IOLAYERS_H
#include <cstring>
// Assume output and d_input_features have been zero-ed
template <typename T>
void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
Int maxActive, Int nPlanes, Int *rules,
bool average) {
for (Int row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (Int i = 1; i <= nActive; ++i) {
auto in_f = input_features + nPlanes * rules[i];
for (Int plane = 0; plane < nPlanes; plane++) {
output_features[plane] += multiplier * in_f[plane];
}
}
output_features += nPlanes;
rules += 1 + maxActive;
}
}
template <typename T>
void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
Int nRows, Int maxActive, Int nPlanes,
Int *rules, bool average) {
for (Int row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
for (Int i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + nPlanes * rules[i];
for (Int plane = 0; plane < nPlanes; plane++)
d_in_f[plane] += multiplier * d_output_features[plane];
}
d_output_features += nPlanes;
rules += 1 + maxActive;
}
}
#endif /* CPU_IOLAYERS_H */
......@@ -6,8 +6,7 @@
template <typename T>
void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor input_features,
/*float*/ at::Tensor output_features,
float alpha) {
/*float*/ at::Tensor output_features, T alpha) {
output_features.resize_as_(input_features);
auto iF = input_features.data<T>();
auto oF = output_features.data<T>();
......@@ -20,7 +19,7 @@ template <typename T>
void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor input_features,
/*float*/ at::Tensor d_input_features,
/*float*/ at::Tensor d_output_features,
float alpha) {
T alpha) {
d_input_features.resize_as_(d_output_features);
auto iF = input_features.data<T>();
auto diF = d_input_features.data<T>();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment