Commit 2c4ed608 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Goodbye THNN. Hello ATen!

parent 6d4475db
......@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree.
#include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
using Int = int32_t;
using uInt = uint32_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 4294967295; // 2^32-1
const uInt Int_MAX = 2147483647; // 2^31-1
// Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>;
template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p;
long *td = THLongTensor_data(t);
for (int i = 0; i < dimension; i++)
long *td = t.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
return p;
}
template <uInt dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
return p;
}
template <uInt dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) {
template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
td = THLongTensor_data(t2);
for (int i = 0; i < dimension; i++)
td = t2.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i];
return p;
}
// FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash {
template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 16777619;
Int hash = 16777619;
for (auto x : p) {
hash *= 2166136261;
hash ^= x;
......@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
}
};
#define THCITensor THCudaIntTensor
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define at_kINT at::kInt
......@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree.
#include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
// Using 64 bit integers for coordinates and memory calculations.
using Int = int64_t;
using uInt = uint64_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 18446744073709551615; // 2^64-1
const uInt Int_MAX = 9223372036854775807; // 2^63-1
// Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>;
template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p;
long *td = THLongTensor_data(t);
for (int i = 0; i < dimension; i++)
long *td = t.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
return p;
}
template <uInt dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
return p;
}
template <uInt dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) {
template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
td = THLongTensor_data(t2);
for (int i = 0; i < dimension; i++)
td = t2.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i];
return p;
}
// FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash {
template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 14695981039346656037;
Int hash = -3750763034362895579; // 14695981039346656037;
for (auto x : p) {
hash *= 1099511628211;
hash ^= x;
......@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
}
};
#define THCITensor THCudaLongTensor
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define at_kINT at::kLong
......@@ -6,7 +6,6 @@
#ifndef ACTIVEPOOLING_H
#define ACTIVEPOOLING_H
#include "../SparseConvNet.h"
// Return the maximum number of active sites in the batch
// rules has size 1.
......@@ -14,14 +13,14 @@
// First column is number of active sites for that sample (<= maxActive)
// Remaining maxActive columns give the active sites, zero padded.
template <uInt dimension>
template <Int dimension>
void activePoolingRules(SparseGrids<dimension> &SGs, RuleBook &rules) {
rules.clear();
rules.resize(2);
auto &r = rules[0];
uInt maxActive = 0;
Int maxActive = 0;
for (auto &sg : SGs)
maxActive = std::max(maxActive, (uInt)sg.mp.size());
maxActive = std::max(maxActive, (Int)sg.mp.size());
for (auto &sg : SGs) {
r.push_back(sg.mp.size());
for (auto &iter : sg.mp)
......
......@@ -8,7 +8,7 @@
#define CONVOLUTIONRULES_H
#include "RectangularRegions.h"
template <uInt dimension>
template <Int dimension>
void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
SparseGrid<dimension> &outputGrid,
RuleBook &rules, long *size,
......@@ -17,10 +17,11 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
rules.resize(volume<dimension>(size));
for (auto const &inIter : inputGrid.mp) {
for (auto j : OutputRegionCalculator<dimension>(inIter.first, size, stride,
outputSpatialSize)) {
auto outRegion = OutputRegionCalculator<dimension>(
inIter.first, size, stride, outputSpatialSize);
for (auto j : outRegion) {
auto inRegion = InputRegionCalculator<dimension>(j, size, stride);
uInt rulesOffset = inRegion.offset(inIter.first);
Int rulesOffset = inRegion.offset(inIter.first);
auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) {
outIter =
......@@ -32,19 +33,19 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
}
}
template <uInt dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize,
long *filterStride,
long *input_spatialSize,
long *output_spatialSize) {
template <Int dimension>
Int Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize,
long *filterStride,
long *input_spatialSize,
long *output_spatialSize) {
rules.clear();
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i];
oSG.ctr = output_nActive;
......@@ -57,43 +58,43 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
return output_nActive;
}
template <uInt dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
template <Int dimension>
Int Convolution_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) {
rules.clear();
rules.resize(volume<dimension>(filterSize));
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize);
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++)
Convolution_InputSgToRulesAndOutputSg<dimension>(
input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
input_spatialSize, output_spatialSize);
}
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
// Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive;
Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp;
}
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) {
for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) {
for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) {
for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]);
R.push_back(r[k++] + offset);
}
......@@ -105,19 +106,19 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
// for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset)
// triples
template <uInt dimension>
template <Int dimension>
void SparseToDense_InputSgsToRulesAndOutputSgs(
SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
rules.clear();
rules.resize(batchSize);
Point<dimension> lb, ub;
for (int i = 0; i < dimension; ++i) {
for (Int i = 0; i < dimension; ++i) {
lb[i] = 0;
ub[i] = spatialSize[i] - 1;
}
auto region = RectangularRegion<dimension>(lb, ub);
for (uInt batchIdx = 0; batchIdx < batchSize; batchIdx++) {
for (Int batchIdx = 0; batchIdx < batchSize; batchIdx++) {
auto &iSG = input_SGs[batchIdx];
for (auto const &inIter : iSG.mp) {
rules[batchIdx].push_back(inIter.second + iSG.ctr);
......@@ -126,19 +127,19 @@ void SparseToDense_InputSgsToRulesAndOutputSgs(
}
}
template <uInt dimension>
template <Int dimension>
void SparseToDense_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
rules.clear();
rules.resize(batchSize);
Point<dimension> lb, ub;
for (int i = 0; i < dimension; ++i) {
for (Int i = 0; i < dimension; ++i) {
lb[i] = 0;
ub[i] = spatialSize[i] - 1;
}
auto region = RectangularRegion<dimension>(lb, ub);
uInt batchIdx;
Int batchIdx;
#pragma omp parallel for private(batchIdx)
for (batchIdx = 0; batchIdx < batchSize; batchIdx++) {
auto &iSG = input_SGs[batchIdx];
......
......@@ -8,7 +8,7 @@
#define FULLDECONVOLUTIONRULES_H
#include "RectangularRegions.h"
template <uInt dimension>
template <Int dimension>
void FullConvolution_InputSgToRulesAndOutputSg(
SparseGrid<dimension> &inputGrid, SparseGrid<dimension> &outputGrid,
RuleBook &rules, long *size, long *stride, long *inputSpatialSize,
......@@ -20,9 +20,7 @@ void FullConvolution_InputSgToRulesAndOutputSg(
auto outRegion =
InputRegionCalculator<dimension>(inIter.first, size, stride);
for (auto j : outRegion) {
auto inRegion =
OutputRegionCalculator<dimension>(j, size, stride, outputSpatialSize);
uInt rulesOffset = outRegion.offset(j);
Int rulesOffset = outRegion.offset(j);
auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) {
outIter =
......@@ -34,17 +32,17 @@ void FullConvolution_InputSgToRulesAndOutputSg(
}
}
template <uInt dimension>
uInt FullConvolution_InputSgsToRulesAndOutputSgs(
template <Int dimension>
Int FullConvolution_InputSgsToRulesAndOutputSgs(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) {
rules.clear();
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i];
oSG.ctr = output_nActive;
......@@ -57,43 +55,43 @@ uInt FullConvolution_InputSgsToRulesAndOutputSgs(
return output_nActive;
}
template <uInt dimension>
uInt FullConvolution_InputSgsToRulesAndOutputSgs_OMP(
template <Int dimension>
Int FullConvolution_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) {
rules.clear();
rules.resize(volume<dimension>(filterSize));
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize);
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++)
FullConvolution_InputSgToRulesAndOutputSg<dimension>(
input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
input_spatialSize, output_spatialSize);
}
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
// Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive;
Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp;
}
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) {
for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) {
for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) {
for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]);
R.push_back(r[k++] + offset);
}
......
......@@ -6,7 +6,7 @@
#ifndef INPUTLAYER_H
#define INPUTLAYER_H
#include "../SparseConvNet.h"
// Rulebook Format
// rules[0][0] == mode
......@@ -16,10 +16,10 @@
// rules[1] nOutputRows x (1+maxActive)
// mode 0==guaranteed unique 1==overwrite, 2=keep, 3=sum, 4=mean
template <uInt dimension>
template <Int dimension>
void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt nInputRows, uInt nInputColumns, uInt batchSize,
uInt mode, uInt &nActive) {
Int nInputRows, Int nInputColumns, Int batchSize, Int mode,
Int &nActive) {
assert(nActive == 0);
assert(rules.size() == 0);
assert(SGs.size() == 0);
......@@ -37,20 +37,20 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
if (nInputColumns == dimension) {
SGs.resize(1);
auto &sg = SGs[0];
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
for (Int i = 0; i < nInputRows; ++i) {
for (Int j = 0; j < dimension; j++)
p[j] = coords[j];
coords += dimension;
sg.mp[p] = i;
}
} else { // nInputColumns == dimension + 1
uInt idx;
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
Int idx;
for (Int i = 0; i < nInputRows; ++i) {
for (Int j = 0; j < dimension; j++)
p[j] = coords[j];
idx = coords[dimension];
coords += dimension + 1;
if (idx + 1 >= SGs.size())
if (idx + 1 >= (Int)SGs.size())
SGs.resize(idx + 1);
SGs[idx].mp[p] = i;
}
......@@ -59,12 +59,12 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
}
// Compile list of how input rows correspond to output rows
std::vector<std::vector<uInt>> outputRows;
std::vector<std::vector<Int>> outputRows;
if (nInputColumns == dimension) {
SGs.resize(1);
auto &sg = SGs[0];
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
for (Int i = 0; i < nInputRows; ++i) {
for (Int j = 0; j < dimension; j++)
p[j] = coords[j];
coords += dimension;
auto iter = sg.mp.find(p);
......@@ -75,13 +75,13 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
outputRows[sg.mp[p]].push_back(i);
}
} else { // nInputColumns == dimension + 1
uInt idx;
for (int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++)
Int idx;
for (Int i = 0; i < nInputRows; ++i) {
for (Int j = 0; j < dimension; j++)
p[j] = coords[j];
idx = coords[dimension];
coords += dimension + 1;
if (idx + 1 >= SGs.size())
if (idx + 1 >= (Int)SGs.size())
SGs.resize(idx + 1);
auto &sg = SGs[idx];
auto iter = sg.mp.find(p);
......@@ -99,21 +99,21 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
rules[0].push_back(outputRows.size());
auto &rule = rules[1];
if (mode == 1) {
for (uInt i = 0; i < nActive; ++i) {
for (Int i = 0; i < nActive; ++i) {
rule.push_back(1);
rule.push_back(outputRows[i].front());
}
}
if (mode == 2) {
for (uInt i = 0; i < nActive; ++i) {
for (Int i = 0; i < nActive; ++i) {
rule.push_back(1);
rule.push_back(outputRows[i].back());
}
}
if (mode == 3 or mode == 4) {
uInt maxActive = 0;
Int maxActive = 0;
for (auto &row : outputRows)
maxActive = std::max(maxActive, (uInt)row.size());
maxActive = std::max(maxActive, (Int)row.size());
rules[0][1] = maxActive;
for (auto &row : outputRows) {
rule.push_back(row.size());
......@@ -125,8 +125,6 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
}
}
// Rulebook Format
// rules[0][0] == mode
// rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2)
......@@ -138,14 +136,14 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
// bl is a batchSize x length x dimension long array of coordinates
// mode 0==guaranteed unique and all present; 1==overwrite, 2=keep, 3=sum,
// 4=mean
template <uInt dimension>
template <Int dimension>
void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt batchSize, uInt length, uInt mode, uInt &nActive) {
Int batchSize, Int length, Int mode, Int &nActive) {
assert(nActive == 0);
assert(rules.size() == 0);
assert(SGs.size() == 0);
SGs.resize(batchSize);
uInt I;
Int I;
if (mode == 0) {
nActive = batchSize * length;
......@@ -161,8 +159,8 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
sg.ctr = I * length;
auto c = coords + I * length * dimension;
Point<dimension> p;
for (int l = 0; l < length; ++l) {
for (int j = 0; j < dimension; ++j)
for (Int l = 0; l < length; ++l) {
for (Int j = 0; j < dimension; ++j)
p[j] = c[j];
c += dimension;
sg.mp[p] = l;
......@@ -172,18 +170,18 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
}
// Compile list of how input rows correspond to output rows
std::vector<std::vector<std::vector<uInt>>> outputRows(batchSize);
std::vector<uInt> nActives(batchSize);
std::vector<std::vector<std::vector<Int>>> outputRows(batchSize);
std::vector<Int> nActives(batchSize);
#pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) {
auto &sg = SGs[I];
auto &ors = outputRows[I];
auto &nAct = nActives[I];
auto c = coords + I * length * dimension;
uInt i = I * length;
Int i = I * length;
Point<dimension> p;
for (int l = 0; l < length; ++l, ++i) {
for (int j = 0; j < dimension; ++j)
for (Int l = 0; l < length; ++l, ++i) {
for (Int j = 0; j < dimension; ++j)
p[j] = *c++;
if (p[0] >= 0) {
auto iter = sg.mp.find(p);
......@@ -200,11 +198,11 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
SGs[I].ctr = nActive;
nActive += nActives[I];
}
uInt maxActive = 1;
Int maxActive = 1;
if (mode >= 3)
for (auto &ors : outputRows)
for (auto &row : ors)
maxActive = std::max(maxActive, (uInt)row.size());
maxActive = std::max(maxActive, (Int)row.size());
rules.resize(2);
rules[0].push_back(mode);
......@@ -247,7 +245,7 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
auto rr = &rule[SGs[I].ctr * (maxActive + 1)];
for (auto &row : ors) {
rr[0] = row.size();
for (int i = 0; i < row.size(); ++i)
for (Int i = 0; i < (Int)row.size(); ++i)
rr[i + 1] = row[i];
rr += 1 + maxActive;
}
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "Metadata.h"
#include "ActivePoolingRules.h"
#include "ConvolutionRules.h"
#include "FullConvolutionRules.h"
#include "IOLayersRules.h"
#include "RandomizedStrideRules.h"
#include "SubmanifoldConvolutionRules.h"
template <Int dimension> SparseGrid<dimension>::SparseGrid() : ctr(0) {
// Sparsehash needs a key to be set aside and never used - we use
// (-1,...,-1)
Point<dimension> empty_key;
for (Int i = 0; i < dimension; ++i)
empty_key[i] = -1;
mp.set_empty_key(empty_key);
}
template <typename T> T *OptionalTensorData(at::Tensor tensor) {
return tensor.numel() ? tensor.data<T>() : nullptr;
}
template <Int dimension>
void addPointToSparseGridMapAndFeatures(SparseGridMap<dimension> &mp,
Point<dimension> p, Int &nActive,
long nPlanes,
/*float*/ at::Tensor features,
float *vec, bool overwrite) {
auto iter = mp.find(p);
if (iter == mp.end()) {
iter = mp.insert(std::make_pair(p, nActive++)).first;
features.resize_({(int)nActive, nPlanes});
std::memcpy(features.data<float>() + (nActive - 1) * nPlanes, vec,
sizeof(float) * nPlanes);
} else if (overwrite) {
std::memcpy(features.data<float>() + iter->second * nPlanes, vec,
sizeof(float) * nPlanes);
}
}
template <Int dimension>
Metadata<dimension>::Metadata()
: re(std::chrono::system_clock::now().time_since_epoch().count()) {}
template <Int dimension> void Metadata<dimension>::clear() {
nActive.clear();
grids.clear();
activePoolingRuleBooks.clear();
inputLayerRuleBook.clear();
validRuleBooks.clear();
ruleBooks.clear();
fullConvolutionRuleBooks.clear();
sparseToDenseRuleBooks.clear();
inputSGs = nullptr;
inputSG = nullptr;
inputNActive = nullptr;
inputLayerRuleBook.clear();
blLayerRuleBook.clear();
}
template <Int dimension>
Int Metadata<dimension>::getNActive(/*long*/ at::Tensor spatialSize) {
return nActive[LongTensorToPoint<dimension>(spatialSize)];
};
template <Int dimension>
SparseGrids<dimension> &
Metadata<dimension>::getSparseGrid(/*long*/ at::Tensor spatialSize) {
return grids[LongTensorToPoint<dimension>(spatialSize)];
};
template <Int dimension>
void Metadata<dimension>::setInputSpatialSize(/*long*/ at::Tensor spatialSize) {
inputSpatialSize = LongTensorToPoint<dimension>(spatialSize);
inputSGs = &grids[inputSpatialSize];
inputNActive = &nActive[inputSpatialSize];
}
template <Int dimension> void Metadata<dimension>::batchAddSample() {
assert(inputSGs && "Call setInputSpatialSize first, please!");
inputSGs->resize(inputSGs->size() + 1);
inputSG = &inputSGs->back();
}
template <Int dimension>
void Metadata<dimension>::setInputSpatialLocation(/*float*/ at::Tensor features,
/*long*/ at::Tensor location,
/*float*/ at::Tensor vec,
bool overwrite) {
auto p = LongTensorToPoint<dimension>(location);
SparseGridMap<dimension> &mp = inputSG->mp;
Int &nActive = *inputNActive;
auto nPlanes = vec.size(0);
addPointToSparseGridMapAndFeatures<dimension>(
mp, p, nActive, nPlanes, features, vec.data<float>(), overwrite);
}
template <Int dimension>
void Metadata<dimension>::setInputSpatialLocations(
/*float*/ at::Tensor features,
/*long*/ at::Tensor locations,
/*float*/ at::Tensor vecs, bool overwrite) {
/* assert(locations.ndimension() == 2 and "locations must be 2
* dimensional!"); */
/* assert(vecs.ndimension() == 2 and "vecs must be 2 dimensional!"); */
/* assert(locations.size(0) == vecs.size(0) and */
/* "Location.size(0) and vecs.size(0) must be equal!"); */
/* assert((locations.size(1) == dimension or */
/* locations.size(1) == 1 + dimension) and */
/* "locations.size(0) must be either dimension or dimension+1"); */
Point<dimension> p;
Int &nActive = *inputNActive;
auto nPlanes = vecs.size(1);
long *l = locations.data<long>();
float *v = vecs.data<float>();
if (locations.size(1) == dimension) {
// add points to current sample
assert(inputSG);
SparseGridMap<dimension> &mp = inputSG->mp;
for (Int idx = 0; idx < locations.size(0); ++idx) {
for (Int d = 0; d < dimension; ++d)
p[d] = *l++;
addPointToSparseGridMapAndFeatures<dimension>(mp, p, nActive, nPlanes,
features, v, overwrite);
v += nPlanes;
}
}
if (locations.size(1) == dimension + 1) {
// add new samples to batch as necessary
auto &SGs = *inputSGs;
for (Int idx = 0; idx < locations.size(0); ++idx) {
for (Int d = 0; d < dimension; ++d)
p[d] = *l++;
Int batch = *l++;
if (batch >= (Int)SGs.size()) {
SGs.resize(batch + 1);
}
SparseGridMap<dimension> &mp = SGs[batch].mp;
addPointToSparseGridMapAndFeatures<dimension>(mp, p, nActive, nPlanes,
features, v, overwrite);
v += nPlanes;
}
}
}
template <Int dimension>
void Metadata<dimension>::getSpatialLocations(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor locations) {
Int nActive = getNActive(spatialSize);
auto &SGs = getSparseGrid(spatialSize);
Int batchSize = SGs.size();
locations.resize_({(int)nActive, dimension + 1});
locations.zero_();
auto lD = locations.data<long>();
for (Int i = 0; i < batchSize; i++) {
auto mp = SGs[i].mp;
auto offset = SGs[i].ctr;
for (auto it = mp.begin(); it != mp.end(); ++it) {
for (Int d = 0; d < dimension; ++d) {
lD[(it->second + offset) * (dimension + 1) + d] = it->first[d];
}
lD[(it->second + offset) * (dimension + 1) + dimension] = i;
}
}
}
template <Int dimension>
void Metadata<dimension>::createMetadataForDenseToSparse(
/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor nz_, long batchSize) {
clear();
setInputSpatialSize(spatialSize);
inputSGs->resize(batchSize);
auto &nActive = *inputNActive;
nActive = nz_.size(0);
long *nz = nz_.data<long>();
std::vector<Int> br(batchSize + 1);
if (batchSize == 1) {
br[1] = nActive;
} else {
long b = 0;
for (Int i = 0; i < nActive; i++) {
long B = nz[i * (dimension + 1)];
for (; b < B;)
br[++b] = i;
}
for (; b < batchSize;)
br[++b] = nActive;
}
Int b;
#pragma omp parallel for private(b)
for (b = 0; b < batchSize; b++) {
auto &sg = inputSGs->at(b);
for (Int i = br[b]; i < br[b + 1]; i++) {
Point<dimension> x;
for (Int j = 0; j < dimension; j++) {
x[j] = nz[i * (dimension + 1) + j + 1]; // 0-indexed
}
sg.mp[x] = i;
}
}
}
template <Int dimension>
void Metadata<dimension>::sparsifyMetadata(Metadata<dimension> &mOut,
/*long*/ at::Tensor spatialSize,
/*byte*/ at::Tensor filter,
/*long*/ at::Tensor cuSum) {
// Create a new SparseGrids with fewer entries.
mOut.clear();
auto p = LongTensorToPoint<dimension>(spatialSize);
auto &sgsIn = grids[p];
auto &sgsOut = mOut.grids[p];
sgsOut.resize(sgsIn.size());
if (filter.ndimension() == 1) {
auto f = filter.data<unsigned char>();
auto cs = cuSum.data<long>();
auto nActive = cs[cuSum.numel() - 1];
mOut.nActive[p] = nActive;
Int sample;
#pragma omp parallel for private(sample)
for (sample = 0; sample < (Int)sgsIn.size(); ++sample) {
auto &sgIn = sgsIn[sample];
auto &sgOut = sgsOut[sample];
for (auto const &iter : sgIn.mp) {
auto n = iter.second + sgIn.ctr;
if (f[n])
sgOut.mp[iter.first] = cs[n] - 1;
}
}
} else {
mOut.nActive[p] = 0;
}
}
// tensor is size[0] x .. x size[dimension-1] x size[dimension]
// size[0] x .. x size[dimension-1] == spatial volume
// size[dimension] == #feature planes
template <Int dimension>
void Metadata<dimension>::addSampleFromThresholdedTensor(
/*float*/ at::Tensor features_,
/*float*/ at::Tensor tensor_,
/*long*/ at::Tensor offset_,
/*long*/ at::Tensor spatialSize_, float threshold) {
auto &nActive = *inputNActive;
auto &SGs = *inputSGs;
SGs.resize(SGs.size() + 1);
auto &sg = SGs.back();
auto tensor = tensor_.data<float>();
auto offset = offset_.data<long>();
auto spatialSize = spatialSize_.data<long>();
long size[dimension + 1]; // IntList?
for (Int i = 0; i <= dimension; ++i)
size[i] = tensor_.size(i); // std::vector<long> size = tensor_.size();
auto nPlanes = size[dimension];
long volume = 1;
for (Int i = 0; i < dimension; ++i)
volume *= size[i];
features_.resize_({(int)(nActive + volume), nPlanes});
// Increment pointers as we work through the data
auto features = features_.data<float>() + nActive * nPlanes;
// Active locations
Point<dimension> point;
for (Int i = 0; i < dimension; i++)
point[i] = offset[i];
for (Int ctr = 0; ctr < volume; ctr++) {
bool active = false;
for (Int i = 0; i < nPlanes; i++) {
if (fabs(tensor[i]) > threshold) {
active = true;
break;
}
}
for (Int i = 0; i < dimension; i++) {
if (point[i] < 0 or point[i] >= spatialSize[i]) {
active = false;
break;
}
}
if (active) {
sg.mp[point] = nActive++;
std::memcpy(features, tensor, sizeof(float) * nPlanes);
features += nPlanes;
}
tensor += nPlanes;
incrementPointInCube<dimension>(point, size, offset);
}
features_.resize_({(int)nActive, nPlanes});
}
// 3x3 submanifold convolutions, 3x3/2x2 pooling or strided convolutions
template <Int dimension> void Metadata<dimension>::generateRuleBooks3s2() {
long sz[dimension], str[dimension], inS[dimension], outS[dimension];
Point<dimension> p1;
Point<2 * dimension> p2;
Point<3 * dimension> p3;
for (Int i = 0; i < dimension; ++i) {
p1[i] = p2[i] = p3[i] = inS[i] = inputSpatialSize[i];
p2[i + dimension] = p3[i + dimension] = sz[i] = 3;
p3[i + 2 * dimension] = str[i] = 2;
}
while (true) {
auto &SGs = grids[p1];
auto &rb = validRuleBooks[p2];
if (rb.empty())
SubmanifoldConvolution_SgsToRules(SGs, rb, sz);
for (Int i = 0; i < dimension; ++i)
if (p1[i] < 3 or p1[i] % 2 != 1)
return;
else
p1[i] = outS[i] = (inS[i] - 1) / 2;
auto &SGs2 = grids[p1];
auto &rb2 = ruleBooks[p3];
if (rb2.empty())
nActive[p1] = Convolution_InputSgsToRulesAndOutputSgs(SGs, SGs2, rb2, sz,
str, inS, outS);
for (Int i = 0; i < dimension; ++i)
p2[i] = p3[i] = inS[i] = outS[i];
}
}
// 3x3 submanifold convolutions, 2x2 pooling or strided convolutions
template <Int dimension> void Metadata<dimension>::generateRuleBooks2s2() {
long s2[dimension], s3[dimension], inS[dimension], outS[dimension];
Point<dimension> p1;
Point<2 * dimension> p2;
Point<3 * dimension> p3;
for (Int i = 0; i < dimension; ++i) {
p1[i] = p2[i] = p3[i] = inS[i] = inputSpatialSize[i];
p2[i + dimension] = s3[i] = 3;
p3[i + dimension] = p3[i + 2 * dimension] = s2[i] = 2;
}
while (true) {
auto &SGs = grids[p1];
auto &rb = validRuleBooks[p2];
if (rb.empty())
SubmanifoldConvolution_SgsToRules(SGs, rb, s3);
for (Int i = 0; i < dimension; ++i)
if (p1[i] < 2 or p1[i] % 2 != 0)
return;
else
p1[i] = outS[i] = inS[i] / 2;
auto &SGs2 = grids[p1];
auto &rb2 = ruleBooks[p3];
if (rb2.empty())
nActive[p1] = Convolution_InputSgsToRulesAndOutputSgs(SGs, SGs2, rb2, s2,
s2, inS, outS);
for (Int i = 0; i < dimension; ++i)
p2[i] = p3[i] = inS[i] = outS[i];
}
}
template <Int dimension>
void Metadata<dimension>::inputLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int batchSize,
Int mode) {
assert(spatialSize.ndimension() == 1);
assert(spatialSize.size(0) == dimension);
assert(coords.ndimension() == 2);
assert(coords.size(1) >= dimension and coords.size(1) <= dimension + 1);
setInputSpatialSize(spatialSize);
inputLayerRules<dimension>(*inputSGs, inputLayerRuleBook, coords.data<long>(),
coords.size(0), coords.size(1), batchSize, mode,
*inputNActive);
}
template <Int dimension>
void Metadata<dimension>::blLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int mode) {
assert(spatialSize.ndimension() == 1);
assert(spatialSize.size(0) == dimension);
assert(coords.ndimension() == 3);
assert(coords.size(2) == dimension);
setInputSpatialSize(spatialSize);
blRules<dimension>(*inputSGs, blLayerRuleBook, coords.data<long>(),
coords.size(0), coords.size(1), mode, *inputNActive);
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor size,
bool openMP) {
auto p = TwoLongTensorsToPoint<dimension>(spatialSize, size);
auto &rb = validRuleBooks[p];
if (rb.empty()) {
auto &SGs = grids[LongTensorToPoint<dimension>(spatialSize)];
#if defined(ENABLE_OPENMP)
openMP ? SubmanifoldConvolution_SgsToRules_OMP(SGs, rb, size.data<long>()) :
#endif
SubmanifoldConvolution_SgsToRules(SGs, rb, size.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize) {
auto spatialSz = LongTensorToPoint<dimension>(spatialSize);
auto &SGs = grids[spatialSz];
auto &rb = activePoolingRuleBooks[spatialSz];
if (rb.empty())
activePoolingRules(SGs, rb);
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
bool openMP) {
auto ss = LongTensorToPoint<dimension>(spatialSize);
auto &SGs = grids[ss];
auto &rb = sparseToDenseRuleBooks[ss];
if (rb.empty())
#if defined(ENABLE_OPENMP)
openMP ? SparseToDense_InputSgsToRulesAndOutputSgs_OMP(
SGs, rb, spatialSize.data<long>())
:
#endif
SparseToDense_InputSgsToRulesAndOutputSgs(SGs, rb,
spatialSize.data<long>());
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = ruleBooks[p];
if (rb.empty()) {
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
auto &iSGs = grids[iS];
auto &oSGs = grids[oS];
nActive[oS] =
#if defined(ENABLE_OPENMP)
openMP
? Convolution_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>())
:
#endif
Convolution_InputSgsToRulesAndOutputSgs(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &Metadata<dimension>::getFullConvolutionRuleBook(
/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, Metadata<dimension> &newM) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = fullConvolutionRuleBooks[p];
if (rb.empty()) {
newM.clear();
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
newM.grids[iS] = grids[iS]; // copy
newM.nActive[iS] = nActive[iS];
auto &iSGs = newM.grids[iS];
auto &oSGs = newM.grids[oS];
newM.nActive[oS] = FullConvolution_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &Metadata<dimension>::getRandomizedStrideRuleBook(
/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = ruleBooks[p];
if (rb.empty()) {
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
auto &iSGs = grids[iS];
auto &oSGs = grids[oS];
nActive[oS] =
#if defined(ENABLE_OPENMP)
openMP
? RSR_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>(),
re)
:
#endif
RSR_InputSgsToRulesAndOutputSgs(iSGs, oSGs, rb, size.data<long>(),
stride.data<long>(),
inputSpatialSize.data<long>(),
outputSpatialSize.data<long>(), re);
}
return rb;
}
template <Int dimension> Int volume(long *point) {
Int v = 1;
for (Int i = 0; i < dimension; i++)
v *= point[i];
return v;
}
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef Metadata_H
#define Metadata_H
#include "32bits.h"
#include <array>
#include <chrono>
#include <cstdint>
#include <google/dense_hash_map>
#include <iostream>
#include <random>
#include <string>
#include <unordered_map>
#include <vector>
template <Int dimension>
using SparseGridMap =
google::dense_hash_map<Point<dimension>, Int, IntArrayHash<dimension>,
std::equal_to<Point<dimension>>>;
template <Int dimension> class SparseGrid {
public:
Int ctr;
SparseGridMap<dimension> mp;
SparseGrid();
};
template <Int dimension> using SparseGrids = std::vector<SparseGrid<dimension>>;
using RuleBook = std::vector<std::vector<Int>>;
template <Int dimension>
void addPointToSparseGridMapAndFeatures(SparseGridMap<dimension> &mp,
Point<dimension> p, Int &nActive,
long nPlanes,
/*float*/ at::Tensor features,
float *vec, bool overwrite);
template <Int dimension> class Metadata {
public:
// Count of active sites for each scale
std::unordered_map<Point<dimension>, Int, IntArrayHash<dimension>> nActive;
// Hash tables for each scale locating the active points
std::unordered_map<Point<dimension>, SparseGrids<dimension>,
IntArrayHash<dimension>>
grids;
std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
activePoolingRuleBooks;
RuleBook inputLayerRuleBook;
RuleBook blLayerRuleBook;
std::unordered_map<Point<2 * dimension>, RuleBook,
IntArrayHash<2 * dimension>>
validRuleBooks;
std::unordered_map<Point<3 * dimension>, RuleBook,
IntArrayHash<3 * dimension>>
ruleBooks;
std::unordered_map<Point<3 * dimension>, RuleBook,
IntArrayHash<3 * dimension>>
fullConvolutionRuleBooks;
std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
sparseToDenseRuleBooks;
Point<dimension> inputSpatialSize;
SparseGrids<dimension> *inputSGs;
SparseGrid<dimension> *inputSG;
Int *inputNActive;
std::default_random_engine re;
Metadata();
void clear();
Int getNActive(/*long*/ at::Tensor spatialSize);
SparseGrids<dimension> &getSparseGrid(/*long*/ at::Tensor spatialSize);
void setInputSpatialSize(/*long*/ at::Tensor spatialSize);
void batchAddSample();
void setInputSpatialLocation(/*float*/ at::Tensor features,
/*long*/ at::Tensor location,
/*float*/ at::Tensor vec, bool overwrite);
void setInputSpatialLocations(/*float*/ at::Tensor features,
/*long*/ at::Tensor locations,
/*float*/ at::Tensor vecs, bool overwrite);
void getSpatialLocations(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor locations);
void createMetadataForDenseToSparse(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor nz_, long batchSize);
void sparsifyMetadata(Metadata<dimension> &mOut,
/*long*/ at::Tensor spatialSize,
/*byte*/ at::Tensor filter,
/*long*/ at::Tensor cuSum);
// tensor is size[0] x .. x size[dimension-1] x size[dimension]
// size[0] x .. x size[dimension-1] == spatial volume
// size[dimension] == #feature planes
void addSampleFromThresholdedTensor(/*float*/ at::Tensor features_,
/*float*/ at::Tensor tensor_,
/*long*/ at::Tensor offset_,
/*long*/ at::Tensor spatialSize_,
float threshold);
// 3x3 submanifold convolutions, 3x3/2x2 pooling or strided convolutions
void generateRuleBooks3s2();
// 3x3 submanifold convolutions, 2x2 pooling or strided convolutions
void generateRuleBooks2s2();
void inputLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int batchSize, Int mode);
void blLayer(/*long*/ at::Tensor spatialSize, /*long*/ at::Tensor coords,
Int mode);
RuleBook &getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor size, bool openMP);
RuleBook &getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize);
RuleBook &getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
bool openMP);
RuleBook &getRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP);
RuleBook &getFullConvolutionRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride,
Metadata<dimension> &newM);
RuleBook &getRandomizedStrideRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride,
bool openMP);
};
template <typename T> T *OptionalTensorData(at::Tensor tensor);
template <Int dimension> Int volume(long *point);
#endif
......@@ -11,27 +11,28 @@
class RSRTicks {
public:
std::vector<uInt> inputL;
std::vector<uInt> inputR;
std::vector<uInt> outputL;
std::vector<uInt> outputR;
RSRTicks(uInt input_spatialSize, uInt output_spatialSize, uInt size, uInt stride, std::default_random_engine re) {
std::vector<uInt> steps;
//steps.resize(output_spatialSize/3,stride-1);
//steps.resize(output_spatialSize/3*2,stride+1);
steps.resize(output_spatialSize-1,stride);
std::vector<Int> inputL;
std::vector<Int> inputR;
std::vector<Int> outputL;
std::vector<Int> outputR;
RSRTicks(Int input_spatialSize, Int output_spatialSize, Int size, Int stride,
std::default_random_engine re) {
std::vector<Int> steps;
// steps.resize(output_spatialSize/3,stride-1);
// steps.resize(output_spatialSize/3*2,stride+1);
steps.resize(output_spatialSize - 1, stride);
std::shuffle(steps.begin(), steps.end(), re);
inputL.push_back(0);
inputR.push_back(size-1);
inputR.push_back(size - 1);
for (auto step : steps) {
inputL.push_back(inputL.back()+step);
inputR.push_back(inputR.back()+step);
inputL.push_back(inputL.back() + step);
inputR.push_back(inputR.back() + step);
}
assert(inputR.back() == input_spatialSize - 1);
outputL.resize(input_spatialSize, output_spatialSize);
outputR.resize(input_spatialSize, 0);
for (uInt i = 0; i < output_spatialSize; i++) {
for (uInt j = inputL[i]; j <= inputR[i]; j++) {
for (Int i = 0; i < output_spatialSize; i++) {
for (Int j = inputL[i]; j <= inputR[i]; j++) {
outputL[j] = std::min(outputL[j], i);
outputR[j] = std::max(outputR[j], i);
}
......@@ -42,74 +43,76 @@ public:
typedef std::vector<RSRTicks> RSRTicksV;
RSRTicksV RSRRegions(long *input_spatialSize, long *output_spatialSize,
uInt dimension, long *size, long *stride, std::default_random_engine re) {
Int dimension, long *size, long *stride,
std::default_random_engine re) {
RSRTicksV t;
for (uInt i = 0; i < dimension; i++)
for (Int i = 0; i < dimension; i++)
t.emplace_back(RSRTicks(input_spatialSize[i], output_spatialSize[i],
size[i], stride[i], re));
size[i], stride[i], re));
return t;
}
template <uInt dimension>
template <Int dimension>
RectangularRegion<dimension>
RSRInputRegionCalculator(const Point<dimension> &output, RSRTicksV &t) {
Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) {
for (Int i = 0; i < dimension; i++) {
lb[i] = t[i].inputL[output[i]];
ub[i] = t[i].inputR[output[i]];
}
return RectangularRegion<dimension>(lb, ub);
}
template <uInt dimension>
template <Int dimension>
RectangularRegion<dimension>
RSROutputRegionCalculator(const Point<dimension> &input, RSRTicksV &t) {
Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) {
for (Int i = 0; i < dimension; i++) {
lb[i] = t[i].outputL[input[i]];
ub[i] = t[i].outputR[input[i]];
}
return RectangularRegion<dimension>(lb, ub);
}
template <uInt dimension>
template <Int dimension>
void RSR_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
SparseGrid<dimension> &outputGrid,
RuleBook &rules, RSRTicksV &t, long *size, long *stride) {
RuleBook &rules, RSRTicksV &t, long *size,
long *stride) {
rules.resize(volume<dimension>(size));
for (auto const &inIter : inputGrid.mp) {
for (auto j : RSROutputRegionCalculator<dimension>(inIter.first, t)) {
auto inRegion = RSRInputRegionCalculator<dimension>(j, t);
uInt rulesOffset = inRegion.offset(inIter.first);
Int rulesOffset = inRegion.offset(inIter.first);
auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) {
outIter =
outputGrid.mp.insert(std::make_pair(j, outputGrid.ctr++)).first;
}
assert(inIter.second<1e6);
assert(outIter->second<1e6);
assert(inIter.second < 1e6);
assert(outIter->second < 1e6);
rules[rulesOffset].push_back(inIter.second + inputGrid.ctr);
rules[rulesOffset].push_back(outIter->second);
}
}
}
template <uInt dimension>
uInt RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *size,long *stride,
long *input_spatialSize,
long *output_spatialSize,
std::default_random_engine re) {
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension,
size, stride, re);
template <Int dimension>
Int RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *size, long *stride,
long *input_spatialSize,
long *output_spatialSize,
std::default_random_engine re) {
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, size,
stride, re);
rules.clear();
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i];
oSG.ctr = output_nActive;
......@@ -120,47 +123,46 @@ uInt RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
return output_nActive;
}
template <uInt dimension>
uInt RSR_InputSgsToRulesAndOutputSgs_OMP(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules,
long *size, long *stride,
long *input_spatialSize,
long *output_spatialSize,
std::default_random_engine re) {
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension,
size, stride, re);
template <Int dimension>
Int RSR_InputSgsToRulesAndOutputSgs_OMP(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *size,
long *stride, long *input_spatialSize,
long *output_spatialSize,
std::default_random_engine re) {
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, size,
stride, re);
rules.clear();
rules.resize(volume<dimension>(size));
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize);
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++)
RSR_InputSgToRulesAndOutputSg<dimension>(input_SGs[i], output_SGs[i],
rbs[i], t, size, stride);
}
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
// Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive;
Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp;
}
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) {
for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) {
for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) {
for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]);
R.push_back(r[k++] + offset);
}
......
......@@ -6,14 +6,14 @@
#ifndef RECTANGULARREGIONS_H
#define RECTANGULARREGIONS_H
#include "../SparseConvNet.h"
// For iterating over the rectangular region with corners lb and ub.
// The .end() method and operator!= are designed to allow range based for
// loops of the region, but nothing else.
template <uInt dimension> class RectangularRegionIterator;
template <uInt dimension> class RectangularRegion {
template <Int dimension> class RectangularRegionIterator;
template <Int dimension> class RectangularRegion {
public:
Point<dimension> lb;
Point<dimension> ub;
......@@ -27,9 +27,9 @@ public:
// Otherwise it would need to represent a point just outside the region
return RectangularRegionIterator<dimension>(*this, ub);
}
uInt
Int
offset(const Point<dimension> &p) { // Enumerate the points inside the region
uInt of = 0, m = 1;
Int of = 0, m = 1;
for (Int i = dimension - 1; i >= 0; i--) {
of += m * (p[i] - lb[i]);
m *= ub[i] - lb[i] + 1;
......@@ -38,13 +38,13 @@ public:
}
};
template <uInt dimension> class RectangularRegionIterator {
template <Int dimension> class RectangularRegionIterator {
private:
RectangularRegion<dimension> &region;
public:
bool stillLooping;
Point<dimension> point;
bool stillLooping;
RectangularRegionIterator(RectangularRegion<dimension> &region,
Point<dimension> &point)
: region(region), point(point), stillLooping(true) {
......@@ -73,14 +73,14 @@ public:
};
// Only to be used for checking the end point of range based for loops.
template <uInt dimension>
template <Int dimension>
inline bool operator!=(const RectangularRegionIterator<dimension> &lhs,
const RectangularRegionIterator<dimension> &rhs) {
return lhs.stillLooping;
}
// Similar to above but for [ offset[0] ... offset[0]+size[0]-1 ] x ... x [..]
template <uInt dimension>
template <Int dimension>
void incrementPointInCube(Point<dimension> &point, long *size, long *offset) {
for (Int i = dimension - 1; i >= 0; i--) {
point[i]++;
......@@ -92,12 +92,12 @@ void incrementPointInCube(Point<dimension> &point, long *size, long *offset) {
// For a convolutional layer with given filter *size* and *stride*, find the
// subset of the input field corresponding to a point in the output.
template <uInt dimension>
template <Int dimension>
RectangularRegion<dimension>
InputRegionCalculator(const Point<dimension> &output, long *size,
long *stride) {
Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) {
for (Int i = 0; i < dimension; i++) {
lb[i] = output[i] * stride[i];
ub[i] = output[i] * stride[i] + size[i] - 1;
}
......@@ -106,12 +106,12 @@ InputRegionCalculator(const Point<dimension> &output, long *size,
// For a convolutional layer with given filter *size* and *stride*, find the
// subset of the output field corresponding to a point in the input.
template <uInt dimension>
template <Int dimension>
RectangularRegion<dimension>
OutputRegionCalculator(const Point<dimension> &input, long *size, long *stride,
long *outputSpatialSize) {
Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) {
for (Int i = 0; i < dimension; i++) {
lb[i] = std::max(0L, (input[i] - size[i] + stride[i]) / stride[i]);
ub[i] = std::min(outputSpatialSize[i] - 1, input[i] / stride[i]);
}
......
......@@ -8,11 +8,11 @@
#define VALIDCONVOLUTIONRULES_H
// Full input region for an output point
template <uInt dimension>
template <Int dimension>
RectangularRegion<dimension>
InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) {
for (Int i = 0; i < dimension; i++) {
Int pad = size[i] / 2;
lb[i] = output[i] - pad;
ub[i] = output[i] + size[i] - 1 - pad;
......@@ -23,15 +23,14 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
// Call for each convolutional / max-pooling layer, once for each batch item.
// rules is used to carry out the "lowering" whilst carrying out the convolution
template <uInt dimension>
double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules,
long *size) {
uInt sd = volume<dimension>(size);
template <Int dimension>
double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid,
RuleBook &rules, long *size) {
double countActiveInputs = 0;
for (auto const &outputIter : grid.mp) {
auto inRegion =
InputRegionCalculator_Valid<dimension>(outputIter.first, size);
uInt rulesOffset = 0;
Int rulesOffset = 0;
for (auto inputPoint : inRegion) {
auto inputIter = grid.mp.find(inputPoint);
if (inputIter != grid.mp.end()) {
......@@ -45,43 +44,43 @@ double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &r
return countActiveInputs;
}
template <uInt dimension>
uInt SubmanifoldConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules,
long *size) {
uInt sd = volume<dimension>(size);
uInt countActiveInputs = 0;
template <Int dimension>
Int SubmanifoldConvolution_SgsToRules(SparseGrids<dimension> &SGs,
RuleBook &rules, long *size) {
Int sd = volume<dimension>(size);
Int countActiveInputs = 0;
rules.clear();
rules.resize(sd);
for (uInt i = 0; i < SGs.size(); i++)
for (Int i = 0; i < (Int)SGs.size(); i++)
countActiveInputs +=
SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rules, size);
return countActiveInputs;
}
template <uInt dimension>
uInt SubmanifoldConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs,
RuleBook &rules, long *size) {
template <Int dimension>
Int SubmanifoldConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs,
RuleBook &rules, long *size) {
std::vector<RuleBook> rbs(SGs.size());
std::vector<double> countActiveInputs(SGs.size());
rules.clear();
uInt sd = volume<dimension>(size);
Int sd = volume<dimension>(size);
rules.resize(sd);
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < SGs.size(); i++) {
for (i = 0; i < (Int)SGs.size(); i++) {
rbs[i].resize(sd);
countActiveInputs[i] =
SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rbs[i], size);
}
}
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < sd; i++)
for (auto const &rb : rbs)
rules[i].insert(rules[i].end(), rb[i].begin(), rb[i].end());
}
uInt countActiveInputs_ = 0;
Int countActiveInputs_ = 0;
for (auto &i : countActiveInputs)
countActiveInputs_ += i;
return countActiveInputs_;
......
from torch.utils.ffi import _wrap_function
from ._SCN import lib as _lib, ffi as _ffi
__all__ = []
def _import_symbols(locals):
for symbol in dir(_lib):
fn = getattr(_lib, symbol)
if callable(fn):
locals[symbol] = _wrap_function(fn, _ffi)
else:
locals[symbol] = fn
__all__.append(symbol)
_import_symbols(locals())
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/ActivePooling.cpp"
#else
#include "ActivePooling.h"
extern "C" void scn_DR_(ActivePooling_updateOutput)(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *output_features, bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1];
auto _rules = _m.getActivePoolingRuleBook(inputSize);
uInt batchSize = _rules[1][0];
uInt maxActive = _rules[1][1];
THTensor_(resize2d)(output_features, batchSize, nPlanes);
THTensor_(zero)(output_features);
ActivePooling_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), batchSize,
maxActive, nPlanes, _rules, average);
}
extern "C" void scn_DR_(ActivePooling_updateGradInput)(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features,
bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1];
auto _rules = _m.getActivePoolingRuleBook(inputSize);
uInt batchSize = _rules[1][0];
uInt maxActive = _rules[1][1];
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
ActivePooling_BackwardPass<real>(
THTensor_(data)(d_input_features), THTensor_(data)(d_output_features),
batchSize, maxActive, nPlanes, _rules, average);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/AffineReluTrivialConvolution.cpp"
#else
#include "AffineReluTrivialConvolution.h"
extern "C" void scn_R_(AffineReluTrivialConvolution_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *affineWeight,
THTensor *affineBias, THTensor *convWeight) {
THTensor_(resize2d)(output_features, input_features->size[0],
convWeight->size[1]);
AffineReluTrivialConvolution_ForwardPass(
THTensor_(data)(input_features), convWeight->size[0],
input_features->stride[0], THTensor_(data)(output_features),
convWeight->size[1], output_features->stride[0],
THTensor_(data)(affineWeight), THTensor_(data)(affineBias),
THTensor_(data)(convWeight), input_features->size[0]);
}
extern "C" void scn_R_(AffineReluTrivialConvolution_backward)(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *affineWeight,
THTensor *d_affineWeight, THTensor *affineBias, THTensor *d_affineBias,
THTensor *convWeight, THTensor *d_convWeight, bool additiveGrad) {
THTensor_(resizeAs)(d_input_features, input_features);
AffineReluTrivialConvolution_BackwardPass(
THTensor_(data)(input_features), THTensor_(data)(d_input_features),
convWeight->size[0], input_features->stride[0],
THTensor_(data)(d_output_features), convWeight->size[1],
d_output_features->stride[0], THTensor_(data)(affineWeight),
THTensor_(data)(d_affineWeight), THTensor_(data)(affineBias),
THTensor_(data)(d_affineBias), THTensor_(data)(convWeight),
THTensor_(data)(d_convWeight), input_features->size[0], additiveGrad);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/AveragePooling.cpp"
#else
#include "AveragePooling.h"
extern "C" void scn_DR_(AveragePooling_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *output_features, long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
auto _rules =
_m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive,
input_features->size[1] - nFeaturesToDrop);
THTensor_(zero)(output_features);
auto iF = THTensor_(data)(input_features) + nFeaturesToDrop;
auto oF = THTensor_(data)(output_features);
for (auto &r : _rules) {
uInt nHot = r.size() / 2;
AveragePooling_ForwardPass<real>(iF, oF, nPlanes, input_features->stride[0],
output_features->stride[0], &r[0], nHot,
_rules.size());
}
}
extern "C" void scn_DR_(AveragePooling_updateGradInput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features,
long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
auto _rules =
_m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
auto diF = THTensor_(data)(d_input_features) + nFeaturesToDrop;
auto doF = THTensor_(data)(d_output_features);
for (auto &r : _rules) {
uInt nHot = r.size() / 2;
AveragePooling_BackwardPass<real>(
diF, doF, nPlanes, input_features->stride[0],
d_output_features->stride[0], &r[0], nHot, _rules.size());
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/BatchNormalization.cpp"
#else
#include "BatchNormalization.h"
extern "C" void scn_R_(BatchNormalization_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, real eps, real momentum, bool train,
real leakiness) {
THTensor_(resizeAs)(output_features, input_features);
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_ForwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias), eps, momentum,
train, leakiness);
}
}
extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, real eps, real momentum, bool train,
real leakiness) {
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_ForwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias), eps, momentum,
train, leakiness);
}
}
extern "C" void scn_R_(BatchNormalization_backward)(
THTensor *input_features, THTensor *d_input_features,
THTensor *output_features, THTensor *d_output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, THTensor *d_weight, THTensor *d_bias,
real leakiness) {
THTensor_(resizeAs)(d_input_features, input_features);
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_BackwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(d_input_features),
THTensor_(data)(output_features), THTensor_(data)(d_output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias),
THOptionalTensorData(d_weight), THOptionalTensorData(d_bias),
leakiness);
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/BatchwiseMultiplicativeDropout.cpp"
#else
extern "C" void scn_R_(BatchwiseMultiplicativeDropout_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *noise,
float alpha) {
if (input_features != output_features)
THTensor_(resizeAs)(output_features, input_features);
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto nz = THTensor_(data)(noise);
for (uInt row = 0; row < nActive; row++)
for (uInt plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
oF[o] = (iF[i] > 0) ? iF[i] * nz[plane] : iF[i] * nz[plane] * alpha;
}
extern "C" void scn_R_(BatchwiseMultiplicativeDropout_updateGradInput)(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *noise, float alpha) {
if (d_input_features != d_output_features)
THTensor_(resizeAs)(d_input_features, d_output_features);
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto nz = THTensor_(data)(noise);
for (uInt row = 0; row < nActive; row++)
for (uInt plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
diF[i] = (iF[i] > 0) ? doF[o] * nz[plane] : doF[o] * nz[plane] * alpha;
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/Convolution.cpp"
#else
#include "Convolution.h"
extern "C" double scn_DR_(Convolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(Convolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(FullConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(FullConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(RandomizedStrideConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(RandomizedStrideConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_CONVOLUTION_H
#define CPU_CONVOLUTION_H
#include "../SparseConvNet.h"
#include <cstring>
// buffer must have size >= nHot * (nIn+nOut)
template <typename T>
void Convolution_ForwardPass(
T *input_features, uInt input_nPlanes, uInt input_nPLANES,
T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
T *bias, RuleBook &rules, uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
if (bias != nullptr) // Set bias
for (uInt row = 0; row < output_nActive; row++)
for (uInt column = 0; column < output_nPlanes; column++)
output_features[row * output_nPLANES + column] = bias[column];
std::vector<T> input_buffer, output_buffer;
for (auto &r : rules) {
uInt nHot = r.size() / 2;
input_buffer.resize(nHot * input_nPlanes);
output_buffer.resize(nHot * output_nPlanes);
for (uInt row = 0; row < nHot; row++) {
std::memcpy(&input_buffer[row * input_nPlanes],
input_features + r[2 * row] * input_nPLANES,
sizeof(T) * input_nPlanes);
}
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is l*m (row-major)
// weight is m*r (row-major)
// output_buffer is l*r (row-major)
// buffer * weights -> output_buffers
(*gemm)('n', 'n',
output_nPlanes, // r
nHot, // l
input_nPlanes, // m
1, // alpha
weight, output_nPlanes, // r
&input_buffer[0], input_nPlanes, // m
0, // beta
&output_buffer[0], output_nPlanes // r
);
weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) {
T *b = &output_buffer[row * output_nPlanes];
T *o = &output_features[r[2 * row + 1] * output_nPLANES];
for (uInt k = 0; k < output_nPlanes; k++)
o[k] += b[k];
}
}
}
template <typename T>
void Convolution_BackwardPass(
T *input_features, T *d_input_features, uInt input_nPlanes,
uInt input_nPLANES, T *d_output_features, uInt output_nPlanes,
uInt output_nPLANES, T *weight, T *d_weight, T *d_bias, RuleBook &rules,
uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
if (d_bias)
for (uInt row = 0; row < output_nActive; row++)
for (uInt i = 0; i < output_nPlanes; i++)
d_bias[i] += d_output_features[row * output_nPLANES + i];
std::vector<T> input_buffer, output_buffer;
for (auto &r : rules) {
uInt nHot = r.size() / 2;
input_buffer.resize(nHot * input_nPlanes);
output_buffer.resize(nHot * output_nPlanes);
for (uInt row = 0; row < nHot; row++)
std::memcpy(&output_buffer[row * output_nPlanes],
&d_output_features[r[2 * row + 1] * output_nPLANES],
sizeof(T) * output_nPlanes);
// Do GEMM (note: gemm assumes column-major matrices)
// output_buffer is l*m (row-major)
// weights is r*m (row-major)
// input_buffer is l*r (row-major)
// output_buffer * T(weight) -> input_buffer
(*gemm)('t', 'n',
input_nPlanes, // r
nHot, // l
output_nPlanes, // m
1, // alpha
weight, output_nPlanes, // m
&output_buffer[0], output_nPlanes, // m
0, // beta
&input_buffer[0], input_nPlanes // r
);
weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) {
T *b = &input_buffer[row * input_nPlanes];
T *i = &d_input_features[r[2 * row] * input_nPLANES];
for (uInt k = 0; k < input_nPlanes; k++)
i[k] += b[k];
}
for (uInt row = 0; row < nHot; row++)
std::memcpy(&input_buffer[row * input_nPlanes],
input_features + r[2 * row] * input_nPLANES,
sizeof(T) * input_nPlanes);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is m*l (row-major)
// output_buffer is m*r (row-major)
// d_weights is l*r (row-major)
// T(input_buffer) * output_buffer -> d_weight
(*gemm)('n', 't',
output_nPlanes, // r
input_nPlanes, // l
nHot, // m
1, // alpha
&output_buffer[0], output_nPlanes, // r
&input_buffer[0], input_nPlanes, // l
1, // beta
d_weight, output_nPlanes // r
);
d_weight += input_nPlanes * output_nPlanes;
}
}
// template <typename T>
// void Convolution_ForwardPass(
// T *input_features, uInt input_nPlanes, uInt input_nPLANES,
// T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
// T *bias, RuleBook &rules, uInt output_nActive,
// void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
// T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
// if (bias != nullptr) // Set bias
// for (uInt row = 0; row < output_nActive; row++)
// for (uInt column = 0; column < output_nPlanes; column++)
// output_features[row * output_nPLANES + column] = bias[column];
// for (auto &r : rules) {
// uInt nHot = r.size() / 2;
// for (uInt row = 0; row < nHot; row++) {
// T *inp = &input_features[r[2 * row] * input_nPLANES];
// T *out = &output_features[r[2 * row + 1] * output_nPLANES];
// for (uInt i = 0; i < input_nPlanes; i++)
// for (uInt j = 0; j < output_nPlanes; j++)
// out[j] += inp[i] * weight[i * input_nPlanes + j];
// }
// weight += input_nPlanes * output_nPlanes;
// }
// }
#endif /* CPU_CONVOLUTION_H */
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/Deconvolution.cpp"
#else
#include "Deconvolution.h"
extern "C" double scn_DR_(Deconvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Deconvolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
double flops = 0;
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
return flops;
}
extern "C" void scn_DR_(Deconvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Deconvolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment