Commit 2c4ed608 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Goodbye THNN. Hello ATen!

parent 6d4475db
...@@ -5,66 +5,58 @@ ...@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#include <array> #include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations. // Using 32 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
using Int = int32_t; using Int = int32_t;
using uInt = uint32_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 4294967295; // 2^32-1
const uInt Int_MAX = 2147483647; // 2^31-1
// Point<dimension> is a point in the d-dimensional integer lattice // Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...) // (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>; template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) { template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p; Point<dimension> p;
long *td = THLongTensor_data(t); long *td = t.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
return p; return p;
} }
template <uInt dimension> template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p; Point<2 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = t0.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
td = THLongTensor_data(t1); td = t1.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i]; p[i + dimension] = td[i];
return p; return p;
} }
template <uInt dimension> template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1, Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
THLongTensor *t2) { /*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p; Point<3 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = t0.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
td = THLongTensor_data(t1); td = t1.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i]; p[i + dimension] = td[i];
td = THLongTensor_data(t2); td = t2.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i]; p[i + 2 * dimension] = td[i];
return p; return p;
} }
// FNV Hash function for Point<dimension> // FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash { template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const { std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 16777619; Int hash = 16777619;
for (auto x : p) { for (auto x : p) {
hash *= 2166136261; hash *= 2166136261;
hash ^= x; hash ^= x;
...@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash { ...@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
} }
}; };
#define THCITensor THCudaIntTensor #define at_kINT at::kInt
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
...@@ -5,66 +5,58 @@ ...@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#include <array> #include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations. // Using 64 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
using Int = int64_t; using Int = int64_t;
using uInt = uint64_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 18446744073709551615; // 2^64-1
const uInt Int_MAX = 9223372036854775807; // 2^63-1
// Point<dimension> is a point in the d-dimensional integer lattice // Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...) // (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>; template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) { template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p; Point<dimension> p;
long *td = THLongTensor_data(t); long *td = t.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
return p; return p;
} }
template <uInt dimension> template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) { Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p; Point<2 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = t0.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
td = THLongTensor_data(t1); td = t1.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i]; p[i + dimension] = td[i];
return p; return p;
} }
template <uInt dimension> template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1, Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
THLongTensor *t2) { /*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p; Point<3 * dimension> p;
long *td; long *td;
td = THLongTensor_data(t0); td = t0.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i] = td[i]; p[i] = td[i];
td = THLongTensor_data(t1); td = t1.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i]; p[i + dimension] = td[i];
td = THLongTensor_data(t2); td = t2.data<long>();
for (int i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i]; p[i + 2 * dimension] = td[i];
return p; return p;
} }
// FNV Hash function for Point<dimension> // FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash { template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const { std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 14695981039346656037; Int hash = -3750763034362895579; // 14695981039346656037;
for (auto x : p) { for (auto x : p) {
hash *= 1099511628211; hash *= 1099511628211;
hash ^= x; hash ^= x;
...@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash { ...@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
} }
}; };
#define THCITensor THCudaLongTensor #define at_kINT at::kLong
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#ifndef ACTIVEPOOLING_H #ifndef ACTIVEPOOLING_H
#define ACTIVEPOOLING_H #define ACTIVEPOOLING_H
#include "../SparseConvNet.h"
// Return the maximum number of active sites in the batch // Return the maximum number of active sites in the batch
// rules has size 1. // rules has size 1.
...@@ -14,14 +13,14 @@ ...@@ -14,14 +13,14 @@
// First column is number of active sites for that sample (<= maxActive) // First column is number of active sites for that sample (<= maxActive)
// Remaining maxActive columns give the active sites, zero padded. // Remaining maxActive columns give the active sites, zero padded.
template <uInt dimension> template <Int dimension>
void activePoolingRules(SparseGrids<dimension> &SGs, RuleBook &rules) { void activePoolingRules(SparseGrids<dimension> &SGs, RuleBook &rules) {
rules.clear(); rules.clear();
rules.resize(2); rules.resize(2);
auto &r = rules[0]; auto &r = rules[0];
uInt maxActive = 0; Int maxActive = 0;
for (auto &sg : SGs) for (auto &sg : SGs)
maxActive = std::max(maxActive, (uInt)sg.mp.size()); maxActive = std::max(maxActive, (Int)sg.mp.size());
for (auto &sg : SGs) { for (auto &sg : SGs) {
r.push_back(sg.mp.size()); r.push_back(sg.mp.size());
for (auto &iter : sg.mp) for (auto &iter : sg.mp)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#define CONVOLUTIONRULES_H #define CONVOLUTIONRULES_H
#include "RectangularRegions.h" #include "RectangularRegions.h"
template <uInt dimension> template <Int dimension>
void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid, void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
SparseGrid<dimension> &outputGrid, SparseGrid<dimension> &outputGrid,
RuleBook &rules, long *size, RuleBook &rules, long *size,
...@@ -17,10 +17,11 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid, ...@@ -17,10 +17,11 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
rules.resize(volume<dimension>(size)); rules.resize(volume<dimension>(size));
for (auto const &inIter : inputGrid.mp) { for (auto const &inIter : inputGrid.mp) {
for (auto j : OutputRegionCalculator<dimension>(inIter.first, size, stride, auto outRegion = OutputRegionCalculator<dimension>(
outputSpatialSize)) { inIter.first, size, stride, outputSpatialSize);
for (auto j : outRegion) {
auto inRegion = InputRegionCalculator<dimension>(j, size, stride); auto inRegion = InputRegionCalculator<dimension>(j, size, stride);
uInt rulesOffset = inRegion.offset(inIter.first); Int rulesOffset = inRegion.offset(inIter.first);
auto outIter = outputGrid.mp.find(j); auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) { if (outIter == outputGrid.mp.end()) {
outIter = outIter =
...@@ -32,19 +33,19 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid, ...@@ -32,19 +33,19 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
} }
} }
template <uInt dimension> template <Int dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs, Int Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, RuleBook &rules, long *filterSize,
long *filterStride, long *filterStride,
long *input_spatialSize, long *input_spatialSize,
long *output_spatialSize) { long *output_spatialSize) {
rules.clear(); rules.clear();
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i]; auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i]; auto &oSG = output_SGs[i];
oSG.ctr = output_nActive; oSG.ctr = output_nActive;
...@@ -57,43 +58,43 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs, ...@@ -57,43 +58,43 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
return output_nActive; return output_nActive;
} }
template <uInt dimension> template <Int dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs_OMP( Int Convolution_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride, RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) { long *input_spatialSize, long *output_spatialSize) {
rules.clear(); rules.clear();
rules.resize(volume<dimension>(filterSize)); rules.resize(volume<dimension>(filterSize));
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize); std::vector<RuleBook> rbs(batchSize);
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++) for (i = 0; i < batchSize; i++)
Convolution_InputSgToRulesAndOutputSg<dimension>( Convolution_InputSgToRulesAndOutputSg<dimension>(
input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride, input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
input_spatialSize, output_spatialSize); input_spatialSize, output_spatialSize);
} }
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
// Parallel assignment: // Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr // output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive // output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive; Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr; output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp; output_SGs[i].ctr = tmp;
} }
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) { for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i]; auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) { for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i]; auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr; auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) { for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]); R.push_back(r[k++]);
R.push_back(r[k++] + offset); R.push_back(r[k++] + offset);
} }
...@@ -105,19 +106,19 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP( ...@@ -105,19 +106,19 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
// for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset) // for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset)
// triples // triples
template <uInt dimension> template <Int dimension>
void SparseToDense_InputSgsToRulesAndOutputSgs( void SparseToDense_InputSgsToRulesAndOutputSgs(
SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) { SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
rules.clear(); rules.clear();
rules.resize(batchSize); rules.resize(batchSize);
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (int i = 0; i < dimension; ++i) { for (Int i = 0; i < dimension; ++i) {
lb[i] = 0; lb[i] = 0;
ub[i] = spatialSize[i] - 1; ub[i] = spatialSize[i] - 1;
} }
auto region = RectangularRegion<dimension>(lb, ub); auto region = RectangularRegion<dimension>(lb, ub);
for (uInt batchIdx = 0; batchIdx < batchSize; batchIdx++) { for (Int batchIdx = 0; batchIdx < batchSize; batchIdx++) {
auto &iSG = input_SGs[batchIdx]; auto &iSG = input_SGs[batchIdx];
for (auto const &inIter : iSG.mp) { for (auto const &inIter : iSG.mp) {
rules[batchIdx].push_back(inIter.second + iSG.ctr); rules[batchIdx].push_back(inIter.second + iSG.ctr);
...@@ -126,19 +127,19 @@ void SparseToDense_InputSgsToRulesAndOutputSgs( ...@@ -126,19 +127,19 @@ void SparseToDense_InputSgsToRulesAndOutputSgs(
} }
} }
template <uInt dimension> template <Int dimension>
void SparseToDense_InputSgsToRulesAndOutputSgs_OMP( void SparseToDense_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) { SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
rules.clear(); rules.clear();
rules.resize(batchSize); rules.resize(batchSize);
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (int i = 0; i < dimension; ++i) { for (Int i = 0; i < dimension; ++i) {
lb[i] = 0; lb[i] = 0;
ub[i] = spatialSize[i] - 1; ub[i] = spatialSize[i] - 1;
} }
auto region = RectangularRegion<dimension>(lb, ub); auto region = RectangularRegion<dimension>(lb, ub);
uInt batchIdx; Int batchIdx;
#pragma omp parallel for private(batchIdx) #pragma omp parallel for private(batchIdx)
for (batchIdx = 0; batchIdx < batchSize; batchIdx++) { for (batchIdx = 0; batchIdx < batchSize; batchIdx++) {
auto &iSG = input_SGs[batchIdx]; auto &iSG = input_SGs[batchIdx];
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#define FULLDECONVOLUTIONRULES_H #define FULLDECONVOLUTIONRULES_H
#include "RectangularRegions.h" #include "RectangularRegions.h"
template <uInt dimension> template <Int dimension>
void FullConvolution_InputSgToRulesAndOutputSg( void FullConvolution_InputSgToRulesAndOutputSg(
SparseGrid<dimension> &inputGrid, SparseGrid<dimension> &outputGrid, SparseGrid<dimension> &inputGrid, SparseGrid<dimension> &outputGrid,
RuleBook &rules, long *size, long *stride, long *inputSpatialSize, RuleBook &rules, long *size, long *stride, long *inputSpatialSize,
...@@ -20,9 +20,7 @@ void FullConvolution_InputSgToRulesAndOutputSg( ...@@ -20,9 +20,7 @@ void FullConvolution_InputSgToRulesAndOutputSg(
auto outRegion = auto outRegion =
InputRegionCalculator<dimension>(inIter.first, size, stride); InputRegionCalculator<dimension>(inIter.first, size, stride);
for (auto j : outRegion) { for (auto j : outRegion) {
auto inRegion = Int rulesOffset = outRegion.offset(j);
OutputRegionCalculator<dimension>(j, size, stride, outputSpatialSize);
uInt rulesOffset = outRegion.offset(j);
auto outIter = outputGrid.mp.find(j); auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) { if (outIter == outputGrid.mp.end()) {
outIter = outIter =
...@@ -34,17 +32,17 @@ void FullConvolution_InputSgToRulesAndOutputSg( ...@@ -34,17 +32,17 @@ void FullConvolution_InputSgToRulesAndOutputSg(
} }
} }
template <uInt dimension> template <Int dimension>
uInt FullConvolution_InputSgsToRulesAndOutputSgs( Int FullConvolution_InputSgsToRulesAndOutputSgs(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride, RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) { long *input_spatialSize, long *output_spatialSize) {
rules.clear(); rules.clear();
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i]; auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i]; auto &oSG = output_SGs[i];
oSG.ctr = output_nActive; oSG.ctr = output_nActive;
...@@ -57,43 +55,43 @@ uInt FullConvolution_InputSgsToRulesAndOutputSgs( ...@@ -57,43 +55,43 @@ uInt FullConvolution_InputSgsToRulesAndOutputSgs(
return output_nActive; return output_nActive;
} }
template <uInt dimension> template <Int dimension>
uInt FullConvolution_InputSgsToRulesAndOutputSgs_OMP( Int FullConvolution_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride, RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) { long *input_spatialSize, long *output_spatialSize) {
rules.clear(); rules.clear();
rules.resize(volume<dimension>(filterSize)); rules.resize(volume<dimension>(filterSize));
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize); std::vector<RuleBook> rbs(batchSize);
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++) for (i = 0; i < batchSize; i++)
FullConvolution_InputSgToRulesAndOutputSg<dimension>( FullConvolution_InputSgToRulesAndOutputSg<dimension>(
input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride, input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
input_spatialSize, output_spatialSize); input_spatialSize, output_spatialSize);
} }
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
// Parallel assignment: // Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr // output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive // output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive; Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr; output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp; output_SGs[i].ctr = tmp;
} }
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) { for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i]; auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) { for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i]; auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr; auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) { for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]); R.push_back(r[k++]);
R.push_back(r[k++] + offset); R.push_back(r[k++] + offset);
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#ifndef INPUTLAYER_H #ifndef INPUTLAYER_H
#define INPUTLAYER_H #define INPUTLAYER_H
#include "../SparseConvNet.h"
// Rulebook Format // Rulebook Format
// rules[0][0] == mode // rules[0][0] == mode
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
// rules[1] nOutputRows x (1+maxActive) // rules[1] nOutputRows x (1+maxActive)
// mode 0==guaranteed unique 1==overwrite, 2=keep, 3=sum, 4=mean // mode 0==guaranteed unique 1==overwrite, 2=keep, 3=sum, 4=mean
template <uInt dimension> template <Int dimension>
void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt nInputRows, uInt nInputColumns, uInt batchSize, Int nInputRows, Int nInputColumns, Int batchSize, Int mode,
uInt mode, uInt &nActive) { Int &nActive) {
assert(nActive == 0); assert(nActive == 0);
assert(rules.size() == 0); assert(rules.size() == 0);
assert(SGs.size() == 0); assert(SGs.size() == 0);
...@@ -37,20 +37,20 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -37,20 +37,20 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
if (nInputColumns == dimension) { if (nInputColumns == dimension) {
SGs.resize(1); SGs.resize(1);
auto &sg = SGs[0]; auto &sg = SGs[0];
for (int i = 0; i < nInputRows; ++i) { for (Int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++) for (Int j = 0; j < dimension; j++)
p[j] = coords[j]; p[j] = coords[j];
coords += dimension; coords += dimension;
sg.mp[p] = i; sg.mp[p] = i;
} }
} else { // nInputColumns == dimension + 1 } else { // nInputColumns == dimension + 1
uInt idx; Int idx;
for (int i = 0; i < nInputRows; ++i) { for (Int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++) for (Int j = 0; j < dimension; j++)
p[j] = coords[j]; p[j] = coords[j];
idx = coords[dimension]; idx = coords[dimension];
coords += dimension + 1; coords += dimension + 1;
if (idx + 1 >= SGs.size()) if (idx + 1 >= (Int)SGs.size())
SGs.resize(idx + 1); SGs.resize(idx + 1);
SGs[idx].mp[p] = i; SGs[idx].mp[p] = i;
} }
...@@ -59,12 +59,12 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -59,12 +59,12 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
} }
// Compile list of how input rows correspond to output rows // Compile list of how input rows correspond to output rows
std::vector<std::vector<uInt>> outputRows; std::vector<std::vector<Int>> outputRows;
if (nInputColumns == dimension) { if (nInputColumns == dimension) {
SGs.resize(1); SGs.resize(1);
auto &sg = SGs[0]; auto &sg = SGs[0];
for (int i = 0; i < nInputRows; ++i) { for (Int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++) for (Int j = 0; j < dimension; j++)
p[j] = coords[j]; p[j] = coords[j];
coords += dimension; coords += dimension;
auto iter = sg.mp.find(p); auto iter = sg.mp.find(p);
...@@ -75,13 +75,13 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -75,13 +75,13 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
outputRows[sg.mp[p]].push_back(i); outputRows[sg.mp[p]].push_back(i);
} }
} else { // nInputColumns == dimension + 1 } else { // nInputColumns == dimension + 1
uInt idx; Int idx;
for (int i = 0; i < nInputRows; ++i) { for (Int i = 0; i < nInputRows; ++i) {
for (int j = 0; j < dimension; j++) for (Int j = 0; j < dimension; j++)
p[j] = coords[j]; p[j] = coords[j];
idx = coords[dimension]; idx = coords[dimension];
coords += dimension + 1; coords += dimension + 1;
if (idx + 1 >= SGs.size()) if (idx + 1 >= (Int)SGs.size())
SGs.resize(idx + 1); SGs.resize(idx + 1);
auto &sg = SGs[idx]; auto &sg = SGs[idx];
auto iter = sg.mp.find(p); auto iter = sg.mp.find(p);
...@@ -99,21 +99,21 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -99,21 +99,21 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
rules[0].push_back(outputRows.size()); rules[0].push_back(outputRows.size());
auto &rule = rules[1]; auto &rule = rules[1];
if (mode == 1) { if (mode == 1) {
for (uInt i = 0; i < nActive; ++i) { for (Int i = 0; i < nActive; ++i) {
rule.push_back(1); rule.push_back(1);
rule.push_back(outputRows[i].front()); rule.push_back(outputRows[i].front());
} }
} }
if (mode == 2) { if (mode == 2) {
for (uInt i = 0; i < nActive; ++i) { for (Int i = 0; i < nActive; ++i) {
rule.push_back(1); rule.push_back(1);
rule.push_back(outputRows[i].back()); rule.push_back(outputRows[i].back());
} }
} }
if (mode == 3 or mode == 4) { if (mode == 3 or mode == 4) {
uInt maxActive = 0; Int maxActive = 0;
for (auto &row : outputRows) for (auto &row : outputRows)
maxActive = std::max(maxActive, (uInt)row.size()); maxActive = std::max(maxActive, (Int)row.size());
rules[0][1] = maxActive; rules[0][1] = maxActive;
for (auto &row : outputRows) { for (auto &row : outputRows) {
rule.push_back(row.size()); rule.push_back(row.size());
...@@ -125,8 +125,6 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -125,8 +125,6 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
} }
} }
// Rulebook Format // Rulebook Format
// rules[0][0] == mode // rules[0][0] == mode
// rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2) // rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2)
...@@ -138,14 +136,14 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -138,14 +136,14 @@ void inputLayerRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
// bl is a batchSize x length x dimension long array of coordinates // bl is a batchSize x length x dimension long array of coordinates
// mode 0==guaranteed unique and all present; 1==overwrite, 2=keep, 3=sum, // mode 0==guaranteed unique and all present; 1==overwrite, 2=keep, 3=sum,
// 4=mean // 4=mean
template <uInt dimension> template <Int dimension>
void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
uInt batchSize, uInt length, uInt mode, uInt &nActive) { Int batchSize, Int length, Int mode, Int &nActive) {
assert(nActive == 0); assert(nActive == 0);
assert(rules.size() == 0); assert(rules.size() == 0);
assert(SGs.size() == 0); assert(SGs.size() == 0);
SGs.resize(batchSize); SGs.resize(batchSize);
uInt I; Int I;
if (mode == 0) { if (mode == 0) {
nActive = batchSize * length; nActive = batchSize * length;
...@@ -161,8 +159,8 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -161,8 +159,8 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
sg.ctr = I * length; sg.ctr = I * length;
auto c = coords + I * length * dimension; auto c = coords + I * length * dimension;
Point<dimension> p; Point<dimension> p;
for (int l = 0; l < length; ++l) { for (Int l = 0; l < length; ++l) {
for (int j = 0; j < dimension; ++j) for (Int j = 0; j < dimension; ++j)
p[j] = c[j]; p[j] = c[j];
c += dimension; c += dimension;
sg.mp[p] = l; sg.mp[p] = l;
...@@ -172,18 +170,18 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -172,18 +170,18 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
} }
// Compile list of how input rows correspond to output rows // Compile list of how input rows correspond to output rows
std::vector<std::vector<std::vector<uInt>>> outputRows(batchSize); std::vector<std::vector<std::vector<Int>>> outputRows(batchSize);
std::vector<uInt> nActives(batchSize); std::vector<Int> nActives(batchSize);
#pragma omp parallel for private(I) #pragma omp parallel for private(I)
for (I = 0; I < batchSize; I++) { for (I = 0; I < batchSize; I++) {
auto &sg = SGs[I]; auto &sg = SGs[I];
auto &ors = outputRows[I]; auto &ors = outputRows[I];
auto &nAct = nActives[I]; auto &nAct = nActives[I];
auto c = coords + I * length * dimension; auto c = coords + I * length * dimension;
uInt i = I * length; Int i = I * length;
Point<dimension> p; Point<dimension> p;
for (int l = 0; l < length; ++l, ++i) { for (Int l = 0; l < length; ++l, ++i) {
for (int j = 0; j < dimension; ++j) for (Int j = 0; j < dimension; ++j)
p[j] = *c++; p[j] = *c++;
if (p[0] >= 0) { if (p[0] >= 0) {
auto iter = sg.mp.find(p); auto iter = sg.mp.find(p);
...@@ -200,11 +198,11 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -200,11 +198,11 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
SGs[I].ctr = nActive; SGs[I].ctr = nActive;
nActive += nActives[I]; nActive += nActives[I];
} }
uInt maxActive = 1; Int maxActive = 1;
if (mode >= 3) if (mode >= 3)
for (auto &ors : outputRows) for (auto &ors : outputRows)
for (auto &row : ors) for (auto &row : ors)
maxActive = std::max(maxActive, (uInt)row.size()); maxActive = std::max(maxActive, (Int)row.size());
rules.resize(2); rules.resize(2);
rules[0].push_back(mode); rules[0].push_back(mode);
...@@ -247,7 +245,7 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords, ...@@ -247,7 +245,7 @@ void blRules(SparseGrids<dimension> &SGs, RuleBook &rules, long *coords,
auto rr = &rule[SGs[I].ctr * (maxActive + 1)]; auto rr = &rule[SGs[I].ctr * (maxActive + 1)];
for (auto &row : ors) { for (auto &row : ors) {
rr[0] = row.size(); rr[0] = row.size();
for (int i = 0; i < row.size(); ++i) for (Int i = 0; i < (Int)row.size(); ++i)
rr[i + 1] = row[i]; rr[i + 1] = row[i];
rr += 1 + maxActive; rr += 1 + maxActive;
} }
......
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#include "Metadata.h"
#include "ActivePoolingRules.h"
#include "ConvolutionRules.h"
#include "FullConvolutionRules.h"
#include "IOLayersRules.h"
#include "RandomizedStrideRules.h"
#include "SubmanifoldConvolutionRules.h"
template <Int dimension> SparseGrid<dimension>::SparseGrid() : ctr(0) {
// Sparsehash needs a key to be set aside and never used - we use
// (-1,...,-1)
Point<dimension> empty_key;
for (Int i = 0; i < dimension; ++i)
empty_key[i] = -1;
mp.set_empty_key(empty_key);
}
template <typename T> T *OptionalTensorData(at::Tensor tensor) {
return tensor.numel() ? tensor.data<T>() : nullptr;
}
template <Int dimension>
void addPointToSparseGridMapAndFeatures(SparseGridMap<dimension> &mp,
Point<dimension> p, Int &nActive,
long nPlanes,
/*float*/ at::Tensor features,
float *vec, bool overwrite) {
auto iter = mp.find(p);
if (iter == mp.end()) {
iter = mp.insert(std::make_pair(p, nActive++)).first;
features.resize_({(int)nActive, nPlanes});
std::memcpy(features.data<float>() + (nActive - 1) * nPlanes, vec,
sizeof(float) * nPlanes);
} else if (overwrite) {
std::memcpy(features.data<float>() + iter->second * nPlanes, vec,
sizeof(float) * nPlanes);
}
}
template <Int dimension>
Metadata<dimension>::Metadata()
: re(std::chrono::system_clock::now().time_since_epoch().count()) {}
template <Int dimension> void Metadata<dimension>::clear() {
nActive.clear();
grids.clear();
activePoolingRuleBooks.clear();
inputLayerRuleBook.clear();
validRuleBooks.clear();
ruleBooks.clear();
fullConvolutionRuleBooks.clear();
sparseToDenseRuleBooks.clear();
inputSGs = nullptr;
inputSG = nullptr;
inputNActive = nullptr;
inputLayerRuleBook.clear();
blLayerRuleBook.clear();
}
template <Int dimension>
Int Metadata<dimension>::getNActive(/*long*/ at::Tensor spatialSize) {
return nActive[LongTensorToPoint<dimension>(spatialSize)];
};
template <Int dimension>
SparseGrids<dimension> &
Metadata<dimension>::getSparseGrid(/*long*/ at::Tensor spatialSize) {
return grids[LongTensorToPoint<dimension>(spatialSize)];
};
template <Int dimension>
void Metadata<dimension>::setInputSpatialSize(/*long*/ at::Tensor spatialSize) {
inputSpatialSize = LongTensorToPoint<dimension>(spatialSize);
inputSGs = &grids[inputSpatialSize];
inputNActive = &nActive[inputSpatialSize];
}
template <Int dimension> void Metadata<dimension>::batchAddSample() {
assert(inputSGs && "Call setInputSpatialSize first, please!");
inputSGs->resize(inputSGs->size() + 1);
inputSG = &inputSGs->back();
}
template <Int dimension>
void Metadata<dimension>::setInputSpatialLocation(/*float*/ at::Tensor features,
/*long*/ at::Tensor location,
/*float*/ at::Tensor vec,
bool overwrite) {
auto p = LongTensorToPoint<dimension>(location);
SparseGridMap<dimension> &mp = inputSG->mp;
Int &nActive = *inputNActive;
auto nPlanes = vec.size(0);
addPointToSparseGridMapAndFeatures<dimension>(
mp, p, nActive, nPlanes, features, vec.data<float>(), overwrite);
}
template <Int dimension>
void Metadata<dimension>::setInputSpatialLocations(
/*float*/ at::Tensor features,
/*long*/ at::Tensor locations,
/*float*/ at::Tensor vecs, bool overwrite) {
/* assert(locations.ndimension() == 2 and "locations must be 2
* dimensional!"); */
/* assert(vecs.ndimension() == 2 and "vecs must be 2 dimensional!"); */
/* assert(locations.size(0) == vecs.size(0) and */
/* "Location.size(0) and vecs.size(0) must be equal!"); */
/* assert((locations.size(1) == dimension or */
/* locations.size(1) == 1 + dimension) and */
/* "locations.size(0) must be either dimension or dimension+1"); */
Point<dimension> p;
Int &nActive = *inputNActive;
auto nPlanes = vecs.size(1);
long *l = locations.data<long>();
float *v = vecs.data<float>();
if (locations.size(1) == dimension) {
// add points to current sample
assert(inputSG);
SparseGridMap<dimension> &mp = inputSG->mp;
for (Int idx = 0; idx < locations.size(0); ++idx) {
for (Int d = 0; d < dimension; ++d)
p[d] = *l++;
addPointToSparseGridMapAndFeatures<dimension>(mp, p, nActive, nPlanes,
features, v, overwrite);
v += nPlanes;
}
}
if (locations.size(1) == dimension + 1) {
// add new samples to batch as necessary
auto &SGs = *inputSGs;
for (Int idx = 0; idx < locations.size(0); ++idx) {
for (Int d = 0; d < dimension; ++d)
p[d] = *l++;
Int batch = *l++;
if (batch >= (Int)SGs.size()) {
SGs.resize(batch + 1);
}
SparseGridMap<dimension> &mp = SGs[batch].mp;
addPointToSparseGridMapAndFeatures<dimension>(mp, p, nActive, nPlanes,
features, v, overwrite);
v += nPlanes;
}
}
}
template <Int dimension>
void Metadata<dimension>::getSpatialLocations(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor locations) {
Int nActive = getNActive(spatialSize);
auto &SGs = getSparseGrid(spatialSize);
Int batchSize = SGs.size();
locations.resize_({(int)nActive, dimension + 1});
locations.zero_();
auto lD = locations.data<long>();
for (Int i = 0; i < batchSize; i++) {
auto mp = SGs[i].mp;
auto offset = SGs[i].ctr;
for (auto it = mp.begin(); it != mp.end(); ++it) {
for (Int d = 0; d < dimension; ++d) {
lD[(it->second + offset) * (dimension + 1) + d] = it->first[d];
}
lD[(it->second + offset) * (dimension + 1) + dimension] = i;
}
}
}
template <Int dimension>
void Metadata<dimension>::createMetadataForDenseToSparse(
/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor nz_, long batchSize) {
clear();
setInputSpatialSize(spatialSize);
inputSGs->resize(batchSize);
auto &nActive = *inputNActive;
nActive = nz_.size(0);
long *nz = nz_.data<long>();
std::vector<Int> br(batchSize + 1);
if (batchSize == 1) {
br[1] = nActive;
} else {
long b = 0;
for (Int i = 0; i < nActive; i++) {
long B = nz[i * (dimension + 1)];
for (; b < B;)
br[++b] = i;
}
for (; b < batchSize;)
br[++b] = nActive;
}
Int b;
#pragma omp parallel for private(b)
for (b = 0; b < batchSize; b++) {
auto &sg = inputSGs->at(b);
for (Int i = br[b]; i < br[b + 1]; i++) {
Point<dimension> x;
for (Int j = 0; j < dimension; j++) {
x[j] = nz[i * (dimension + 1) + j + 1]; // 0-indexed
}
sg.mp[x] = i;
}
}
}
template <Int dimension>
void Metadata<dimension>::sparsifyMetadata(Metadata<dimension> &mOut,
/*long*/ at::Tensor spatialSize,
/*byte*/ at::Tensor filter,
/*long*/ at::Tensor cuSum) {
// Create a new SparseGrids with fewer entries.
mOut.clear();
auto p = LongTensorToPoint<dimension>(spatialSize);
auto &sgsIn = grids[p];
auto &sgsOut = mOut.grids[p];
sgsOut.resize(sgsIn.size());
if (filter.ndimension() == 1) {
auto f = filter.data<unsigned char>();
auto cs = cuSum.data<long>();
auto nActive = cs[cuSum.numel() - 1];
mOut.nActive[p] = nActive;
Int sample;
#pragma omp parallel for private(sample)
for (sample = 0; sample < (Int)sgsIn.size(); ++sample) {
auto &sgIn = sgsIn[sample];
auto &sgOut = sgsOut[sample];
for (auto const &iter : sgIn.mp) {
auto n = iter.second + sgIn.ctr;
if (f[n])
sgOut.mp[iter.first] = cs[n] - 1;
}
}
} else {
mOut.nActive[p] = 0;
}
}
// tensor is size[0] x .. x size[dimension-1] x size[dimension]
// size[0] x .. x size[dimension-1] == spatial volume
// size[dimension] == #feature planes
template <Int dimension>
void Metadata<dimension>::addSampleFromThresholdedTensor(
/*float*/ at::Tensor features_,
/*float*/ at::Tensor tensor_,
/*long*/ at::Tensor offset_,
/*long*/ at::Tensor spatialSize_, float threshold) {
auto &nActive = *inputNActive;
auto &SGs = *inputSGs;
SGs.resize(SGs.size() + 1);
auto &sg = SGs.back();
auto tensor = tensor_.data<float>();
auto offset = offset_.data<long>();
auto spatialSize = spatialSize_.data<long>();
long size[dimension + 1]; // IntList?
for (Int i = 0; i <= dimension; ++i)
size[i] = tensor_.size(i); // std::vector<long> size = tensor_.size();
auto nPlanes = size[dimension];
long volume = 1;
for (Int i = 0; i < dimension; ++i)
volume *= size[i];
features_.resize_({(int)(nActive + volume), nPlanes});
// Increment pointers as we work through the data
auto features = features_.data<float>() + nActive * nPlanes;
// Active locations
Point<dimension> point;
for (Int i = 0; i < dimension; i++)
point[i] = offset[i];
for (Int ctr = 0; ctr < volume; ctr++) {
bool active = false;
for (Int i = 0; i < nPlanes; i++) {
if (fabs(tensor[i]) > threshold) {
active = true;
break;
}
}
for (Int i = 0; i < dimension; i++) {
if (point[i] < 0 or point[i] >= spatialSize[i]) {
active = false;
break;
}
}
if (active) {
sg.mp[point] = nActive++;
std::memcpy(features, tensor, sizeof(float) * nPlanes);
features += nPlanes;
}
tensor += nPlanes;
incrementPointInCube<dimension>(point, size, offset);
}
features_.resize_({(int)nActive, nPlanes});
}
// 3x3 submanifold convolutions, 3x3/2x2 pooling or strided convolutions
template <Int dimension> void Metadata<dimension>::generateRuleBooks3s2() {
long sz[dimension], str[dimension], inS[dimension], outS[dimension];
Point<dimension> p1;
Point<2 * dimension> p2;
Point<3 * dimension> p3;
for (Int i = 0; i < dimension; ++i) {
p1[i] = p2[i] = p3[i] = inS[i] = inputSpatialSize[i];
p2[i + dimension] = p3[i + dimension] = sz[i] = 3;
p3[i + 2 * dimension] = str[i] = 2;
}
while (true) {
auto &SGs = grids[p1];
auto &rb = validRuleBooks[p2];
if (rb.empty())
SubmanifoldConvolution_SgsToRules(SGs, rb, sz);
for (Int i = 0; i < dimension; ++i)
if (p1[i] < 3 or p1[i] % 2 != 1)
return;
else
p1[i] = outS[i] = (inS[i] - 1) / 2;
auto &SGs2 = grids[p1];
auto &rb2 = ruleBooks[p3];
if (rb2.empty())
nActive[p1] = Convolution_InputSgsToRulesAndOutputSgs(SGs, SGs2, rb2, sz,
str, inS, outS);
for (Int i = 0; i < dimension; ++i)
p2[i] = p3[i] = inS[i] = outS[i];
}
}
// 3x3 submanifold convolutions, 2x2 pooling or strided convolutions
template <Int dimension> void Metadata<dimension>::generateRuleBooks2s2() {
long s2[dimension], s3[dimension], inS[dimension], outS[dimension];
Point<dimension> p1;
Point<2 * dimension> p2;
Point<3 * dimension> p3;
for (Int i = 0; i < dimension; ++i) {
p1[i] = p2[i] = p3[i] = inS[i] = inputSpatialSize[i];
p2[i + dimension] = s3[i] = 3;
p3[i + dimension] = p3[i + 2 * dimension] = s2[i] = 2;
}
while (true) {
auto &SGs = grids[p1];
auto &rb = validRuleBooks[p2];
if (rb.empty())
SubmanifoldConvolution_SgsToRules(SGs, rb, s3);
for (Int i = 0; i < dimension; ++i)
if (p1[i] < 2 or p1[i] % 2 != 0)
return;
else
p1[i] = outS[i] = inS[i] / 2;
auto &SGs2 = grids[p1];
auto &rb2 = ruleBooks[p3];
if (rb2.empty())
nActive[p1] = Convolution_InputSgsToRulesAndOutputSgs(SGs, SGs2, rb2, s2,
s2, inS, outS);
for (Int i = 0; i < dimension; ++i)
p2[i] = p3[i] = inS[i] = outS[i];
}
}
template <Int dimension>
void Metadata<dimension>::inputLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int batchSize,
Int mode) {
assert(spatialSize.ndimension() == 1);
assert(spatialSize.size(0) == dimension);
assert(coords.ndimension() == 2);
assert(coords.size(1) >= dimension and coords.size(1) <= dimension + 1);
setInputSpatialSize(spatialSize);
inputLayerRules<dimension>(*inputSGs, inputLayerRuleBook, coords.data<long>(),
coords.size(0), coords.size(1), batchSize, mode,
*inputNActive);
}
template <Int dimension>
void Metadata<dimension>::blLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int mode) {
assert(spatialSize.ndimension() == 1);
assert(spatialSize.size(0) == dimension);
assert(coords.ndimension() == 3);
assert(coords.size(2) == dimension);
setInputSpatialSize(spatialSize);
blRules<dimension>(*inputSGs, blLayerRuleBook, coords.data<long>(),
coords.size(0), coords.size(1), mode, *inputNActive);
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor size,
bool openMP) {
auto p = TwoLongTensorsToPoint<dimension>(spatialSize, size);
auto &rb = validRuleBooks[p];
if (rb.empty()) {
auto &SGs = grids[LongTensorToPoint<dimension>(spatialSize)];
#if defined(ENABLE_OPENMP)
openMP ? SubmanifoldConvolution_SgsToRules_OMP(SGs, rb, size.data<long>()) :
#endif
SubmanifoldConvolution_SgsToRules(SGs, rb, size.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize) {
auto spatialSz = LongTensorToPoint<dimension>(spatialSize);
auto &SGs = grids[spatialSz];
auto &rb = activePoolingRuleBooks[spatialSz];
if (rb.empty())
activePoolingRules(SGs, rb);
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
bool openMP) {
auto ss = LongTensorToPoint<dimension>(spatialSize);
auto &SGs = grids[ss];
auto &rb = sparseToDenseRuleBooks[ss];
if (rb.empty())
#if defined(ENABLE_OPENMP)
openMP ? SparseToDense_InputSgsToRulesAndOutputSgs_OMP(
SGs, rb, spatialSize.data<long>())
:
#endif
SparseToDense_InputSgsToRulesAndOutputSgs(SGs, rb,
spatialSize.data<long>());
return rb;
}
template <Int dimension>
RuleBook &
Metadata<dimension>::getRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = ruleBooks[p];
if (rb.empty()) {
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
auto &iSGs = grids[iS];
auto &oSGs = grids[oS];
nActive[oS] =
#if defined(ENABLE_OPENMP)
openMP
? Convolution_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>())
:
#endif
Convolution_InputSgsToRulesAndOutputSgs(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &Metadata<dimension>::getFullConvolutionRuleBook(
/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, Metadata<dimension> &newM) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = fullConvolutionRuleBooks[p];
if (rb.empty()) {
newM.clear();
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
newM.grids[iS] = grids[iS]; // copy
newM.nActive[iS] = nActive[iS];
auto &iSGs = newM.grids[iS];
auto &oSGs = newM.grids[oS];
newM.nActive[oS] = FullConvolution_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>());
}
return rb;
}
template <Int dimension>
RuleBook &Metadata<dimension>::getRandomizedStrideRuleBook(
/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP) {
auto p = ThreeLongTensorsToPoint<dimension>(inputSpatialSize, size, stride);
auto &rb = ruleBooks[p];
if (rb.empty()) {
auto iS = LongTensorToPoint<dimension>(inputSpatialSize);
auto oS = LongTensorToPoint<dimension>(outputSpatialSize);
auto &iSGs = grids[iS];
auto &oSGs = grids[oS];
nActive[oS] =
#if defined(ENABLE_OPENMP)
openMP
? RSR_InputSgsToRulesAndOutputSgs_OMP(
iSGs, oSGs, rb, size.data<long>(), stride.data<long>(),
inputSpatialSize.data<long>(), outputSpatialSize.data<long>(),
re)
:
#endif
RSR_InputSgsToRulesAndOutputSgs(iSGs, oSGs, rb, size.data<long>(),
stride.data<long>(),
inputSpatialSize.data<long>(),
outputSpatialSize.data<long>(), re);
}
return rb;
}
template <Int dimension> Int volume(long *point) {
Int v = 1;
for (Int i = 0; i < dimension; i++)
v *= point[i];
return v;
}
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef Metadata_H
#define Metadata_H
#include "32bits.h"
#include <array>
#include <chrono>
#include <cstdint>
#include <google/dense_hash_map>
#include <iostream>
#include <random>
#include <string>
#include <unordered_map>
#include <vector>
template <Int dimension>
using SparseGridMap =
google::dense_hash_map<Point<dimension>, Int, IntArrayHash<dimension>,
std::equal_to<Point<dimension>>>;
template <Int dimension> class SparseGrid {
public:
Int ctr;
SparseGridMap<dimension> mp;
SparseGrid();
};
template <Int dimension> using SparseGrids = std::vector<SparseGrid<dimension>>;
using RuleBook = std::vector<std::vector<Int>>;
template <Int dimension>
void addPointToSparseGridMapAndFeatures(SparseGridMap<dimension> &mp,
Point<dimension> p, Int &nActive,
long nPlanes,
/*float*/ at::Tensor features,
float *vec, bool overwrite);
template <Int dimension> class Metadata {
public:
// Count of active sites for each scale
std::unordered_map<Point<dimension>, Int, IntArrayHash<dimension>> nActive;
// Hash tables for each scale locating the active points
std::unordered_map<Point<dimension>, SparseGrids<dimension>,
IntArrayHash<dimension>>
grids;
std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
activePoolingRuleBooks;
RuleBook inputLayerRuleBook;
RuleBook blLayerRuleBook;
std::unordered_map<Point<2 * dimension>, RuleBook,
IntArrayHash<2 * dimension>>
validRuleBooks;
std::unordered_map<Point<3 * dimension>, RuleBook,
IntArrayHash<3 * dimension>>
ruleBooks;
std::unordered_map<Point<3 * dimension>, RuleBook,
IntArrayHash<3 * dimension>>
fullConvolutionRuleBooks;
std::unordered_map<Point<dimension>, RuleBook, IntArrayHash<dimension>>
sparseToDenseRuleBooks;
Point<dimension> inputSpatialSize;
SparseGrids<dimension> *inputSGs;
SparseGrid<dimension> *inputSG;
Int *inputNActive;
std::default_random_engine re;
Metadata();
void clear();
Int getNActive(/*long*/ at::Tensor spatialSize);
SparseGrids<dimension> &getSparseGrid(/*long*/ at::Tensor spatialSize);
void setInputSpatialSize(/*long*/ at::Tensor spatialSize);
void batchAddSample();
void setInputSpatialLocation(/*float*/ at::Tensor features,
/*long*/ at::Tensor location,
/*float*/ at::Tensor vec, bool overwrite);
void setInputSpatialLocations(/*float*/ at::Tensor features,
/*long*/ at::Tensor locations,
/*float*/ at::Tensor vecs, bool overwrite);
void getSpatialLocations(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor locations);
void createMetadataForDenseToSparse(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor nz_, long batchSize);
void sparsifyMetadata(Metadata<dimension> &mOut,
/*long*/ at::Tensor spatialSize,
/*byte*/ at::Tensor filter,
/*long*/ at::Tensor cuSum);
// tensor is size[0] x .. x size[dimension-1] x size[dimension]
// size[0] x .. x size[dimension-1] == spatial volume
// size[dimension] == #feature planes
void addSampleFromThresholdedTensor(/*float*/ at::Tensor features_,
/*float*/ at::Tensor tensor_,
/*long*/ at::Tensor offset_,
/*long*/ at::Tensor spatialSize_,
float threshold);
// 3x3 submanifold convolutions, 3x3/2x2 pooling or strided convolutions
void generateRuleBooks3s2();
// 3x3 submanifold convolutions, 2x2 pooling or strided convolutions
void generateRuleBooks2s2();
void inputLayer(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor coords, Int batchSize, Int mode);
void blLayer(/*long*/ at::Tensor spatialSize, /*long*/ at::Tensor coords,
Int mode);
RuleBook &getSubmanifoldRuleBook(/*long*/ at::Tensor spatialSize,
/*long*/ at::Tensor size, bool openMP);
RuleBook &getActivePoolingRuleBook(/*long*/ at::Tensor spatialSize);
RuleBook &getSparseToDenseRuleBook(/*long*/ at::Tensor spatialSize,
bool openMP);
RuleBook &getRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride, bool openMP);
RuleBook &getFullConvolutionRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride,
Metadata<dimension> &newM);
RuleBook &getRandomizedStrideRuleBook(/*long*/ at::Tensor inputSpatialSize,
/*long*/ at::Tensor outputSpatialSize,
/*long*/ at::Tensor size,
/*long*/ at::Tensor stride,
bool openMP);
};
template <typename T> T *OptionalTensorData(at::Tensor tensor);
template <Int dimension> Int volume(long *point);
#endif
...@@ -11,27 +11,28 @@ ...@@ -11,27 +11,28 @@
class RSRTicks { class RSRTicks {
public: public:
std::vector<uInt> inputL; std::vector<Int> inputL;
std::vector<uInt> inputR; std::vector<Int> inputR;
std::vector<uInt> outputL; std::vector<Int> outputL;
std::vector<uInt> outputR; std::vector<Int> outputR;
RSRTicks(uInt input_spatialSize, uInt output_spatialSize, uInt size, uInt stride, std::default_random_engine re) { RSRTicks(Int input_spatialSize, Int output_spatialSize, Int size, Int stride,
std::vector<uInt> steps; std::default_random_engine re) {
//steps.resize(output_spatialSize/3,stride-1); std::vector<Int> steps;
//steps.resize(output_spatialSize/3*2,stride+1); // steps.resize(output_spatialSize/3,stride-1);
steps.resize(output_spatialSize-1,stride); // steps.resize(output_spatialSize/3*2,stride+1);
steps.resize(output_spatialSize - 1, stride);
std::shuffle(steps.begin(), steps.end(), re); std::shuffle(steps.begin(), steps.end(), re);
inputL.push_back(0); inputL.push_back(0);
inputR.push_back(size-1); inputR.push_back(size - 1);
for (auto step : steps) { for (auto step : steps) {
inputL.push_back(inputL.back()+step); inputL.push_back(inputL.back() + step);
inputR.push_back(inputR.back()+step); inputR.push_back(inputR.back() + step);
} }
assert(inputR.back() == input_spatialSize - 1); assert(inputR.back() == input_spatialSize - 1);
outputL.resize(input_spatialSize, output_spatialSize); outputL.resize(input_spatialSize, output_spatialSize);
outputR.resize(input_spatialSize, 0); outputR.resize(input_spatialSize, 0);
for (uInt i = 0; i < output_spatialSize; i++) { for (Int i = 0; i < output_spatialSize; i++) {
for (uInt j = inputL[i]; j <= inputR[i]; j++) { for (Int j = inputL[i]; j <= inputR[i]; j++) {
outputL[j] = std::min(outputL[j], i); outputL[j] = std::min(outputL[j], i);
outputR[j] = std::max(outputR[j], i); outputR[j] = std::max(outputR[j], i);
} }
...@@ -42,74 +43,76 @@ public: ...@@ -42,74 +43,76 @@ public:
typedef std::vector<RSRTicks> RSRTicksV; typedef std::vector<RSRTicks> RSRTicksV;
RSRTicksV RSRRegions(long *input_spatialSize, long *output_spatialSize, RSRTicksV RSRRegions(long *input_spatialSize, long *output_spatialSize,
uInt dimension, long *size, long *stride, std::default_random_engine re) { Int dimension, long *size, long *stride,
std::default_random_engine re) {
RSRTicksV t; RSRTicksV t;
for (uInt i = 0; i < dimension; i++) for (Int i = 0; i < dimension; i++)
t.emplace_back(RSRTicks(input_spatialSize[i], output_spatialSize[i], t.emplace_back(RSRTicks(input_spatialSize[i], output_spatialSize[i],
size[i], stride[i], re)); size[i], stride[i], re));
return t; return t;
} }
template <uInt dimension> template <Int dimension>
RectangularRegion<dimension> RectangularRegion<dimension>
RSRInputRegionCalculator(const Point<dimension> &output, RSRTicksV &t) { RSRInputRegionCalculator(const Point<dimension> &output, RSRTicksV &t) {
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) { for (Int i = 0; i < dimension; i++) {
lb[i] = t[i].inputL[output[i]]; lb[i] = t[i].inputL[output[i]];
ub[i] = t[i].inputR[output[i]]; ub[i] = t[i].inputR[output[i]];
} }
return RectangularRegion<dimension>(lb, ub); return RectangularRegion<dimension>(lb, ub);
} }
template <uInt dimension> template <Int dimension>
RectangularRegion<dimension> RectangularRegion<dimension>
RSROutputRegionCalculator(const Point<dimension> &input, RSRTicksV &t) { RSROutputRegionCalculator(const Point<dimension> &input, RSRTicksV &t) {
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) { for (Int i = 0; i < dimension; i++) {
lb[i] = t[i].outputL[input[i]]; lb[i] = t[i].outputL[input[i]];
ub[i] = t[i].outputR[input[i]]; ub[i] = t[i].outputR[input[i]];
} }
return RectangularRegion<dimension>(lb, ub); return RectangularRegion<dimension>(lb, ub);
} }
template <uInt dimension> template <Int dimension>
void RSR_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid, void RSR_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
SparseGrid<dimension> &outputGrid, SparseGrid<dimension> &outputGrid,
RuleBook &rules, RSRTicksV &t, long *size, long *stride) { RuleBook &rules, RSRTicksV &t, long *size,
long *stride) {
rules.resize(volume<dimension>(size)); rules.resize(volume<dimension>(size));
for (auto const &inIter : inputGrid.mp) { for (auto const &inIter : inputGrid.mp) {
for (auto j : RSROutputRegionCalculator<dimension>(inIter.first, t)) { for (auto j : RSROutputRegionCalculator<dimension>(inIter.first, t)) {
auto inRegion = RSRInputRegionCalculator<dimension>(j, t); auto inRegion = RSRInputRegionCalculator<dimension>(j, t);
uInt rulesOffset = inRegion.offset(inIter.first); Int rulesOffset = inRegion.offset(inIter.first);
auto outIter = outputGrid.mp.find(j); auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) { if (outIter == outputGrid.mp.end()) {
outIter = outIter =
outputGrid.mp.insert(std::make_pair(j, outputGrid.ctr++)).first; outputGrid.mp.insert(std::make_pair(j, outputGrid.ctr++)).first;
} }
assert(inIter.second<1e6); assert(inIter.second < 1e6);
assert(outIter->second<1e6); assert(outIter->second < 1e6);
rules[rulesOffset].push_back(inIter.second + inputGrid.ctr); rules[rulesOffset].push_back(inIter.second + inputGrid.ctr);
rules[rulesOffset].push_back(outIter->second); rules[rulesOffset].push_back(outIter->second);
} }
} }
} }
template <uInt dimension> template <Int dimension>
uInt RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs, Int RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *size,long *stride, RuleBook &rules, long *size, long *stride,
long *input_spatialSize, long *input_spatialSize,
long *output_spatialSize, long *output_spatialSize,
std::default_random_engine re) { std::default_random_engine re) {
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, size,
size, stride, re); stride, re);
rules.clear(); rules.clear();
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i]; auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i]; auto &oSG = output_SGs[i];
oSG.ctr = output_nActive; oSG.ctr = output_nActive;
...@@ -120,47 +123,46 @@ uInt RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs, ...@@ -120,47 +123,46 @@ uInt RSR_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
return output_nActive; return output_nActive;
} }
template <uInt dimension> template <Int dimension>
uInt RSR_InputSgsToRulesAndOutputSgs_OMP(SparseGrids<dimension> &input_SGs, Int RSR_InputSgsToRulesAndOutputSgs_OMP(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, RuleBook &rules, long *size,
long *size, long *stride, long *stride, long *input_spatialSize,
long *input_spatialSize, long *output_spatialSize,
long *output_spatialSize, std::default_random_engine re) {
std::default_random_engine re) { auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, size,
auto t = RSRRegions(input_spatialSize, output_spatialSize, dimension, stride, re);
size, stride, re);
rules.clear(); rules.clear();
rules.resize(volume<dimension>(size)); rules.resize(volume<dimension>(size));
output_SGs.clear(); output_SGs.clear();
uInt batchSize = input_SGs.size(); Int batchSize = input_SGs.size();
output_SGs.resize(batchSize); output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize); std::vector<RuleBook> rbs(batchSize);
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++) for (i = 0; i < batchSize; i++)
RSR_InputSgToRulesAndOutputSg<dimension>(input_SGs[i], output_SGs[i], RSR_InputSgToRulesAndOutputSg<dimension>(input_SGs[i], output_SGs[i],
rbs[i], t, size, stride); rbs[i], t, size, stride);
} }
uInt output_nActive = 0; Int output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) { for (Int i = 0; i < batchSize; i++) {
// Parallel assignment: // Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr // output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive // output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive; Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr; output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp; output_SGs[i].ctr = tmp;
} }
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) { for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i]; auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) { for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i]; auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr; auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) { for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]); R.push_back(r[k++]);
R.push_back(r[k++] + offset); R.push_back(r[k++] + offset);
} }
......
...@@ -6,14 +6,14 @@ ...@@ -6,14 +6,14 @@
#ifndef RECTANGULARREGIONS_H #ifndef RECTANGULARREGIONS_H
#define RECTANGULARREGIONS_H #define RECTANGULARREGIONS_H
#include "../SparseConvNet.h"
// For iterating over the rectangular region with corners lb and ub. // For iterating over the rectangular region with corners lb and ub.
// The .end() method and operator!= are designed to allow range based for // The .end() method and operator!= are designed to allow range based for
// loops of the region, but nothing else. // loops of the region, but nothing else.
template <uInt dimension> class RectangularRegionIterator; template <Int dimension> class RectangularRegionIterator;
template <uInt dimension> class RectangularRegion { template <Int dimension> class RectangularRegion {
public: public:
Point<dimension> lb; Point<dimension> lb;
Point<dimension> ub; Point<dimension> ub;
...@@ -27,9 +27,9 @@ public: ...@@ -27,9 +27,9 @@ public:
// Otherwise it would need to represent a point just outside the region // Otherwise it would need to represent a point just outside the region
return RectangularRegionIterator<dimension>(*this, ub); return RectangularRegionIterator<dimension>(*this, ub);
} }
uInt Int
offset(const Point<dimension> &p) { // Enumerate the points inside the region offset(const Point<dimension> &p) { // Enumerate the points inside the region
uInt of = 0, m = 1; Int of = 0, m = 1;
for (Int i = dimension - 1; i >= 0; i--) { for (Int i = dimension - 1; i >= 0; i--) {
of += m * (p[i] - lb[i]); of += m * (p[i] - lb[i]);
m *= ub[i] - lb[i] + 1; m *= ub[i] - lb[i] + 1;
...@@ -38,13 +38,13 @@ public: ...@@ -38,13 +38,13 @@ public:
} }
}; };
template <uInt dimension> class RectangularRegionIterator { template <Int dimension> class RectangularRegionIterator {
private: private:
RectangularRegion<dimension> &region; RectangularRegion<dimension> &region;
public: public:
bool stillLooping;
Point<dimension> point; Point<dimension> point;
bool stillLooping;
RectangularRegionIterator(RectangularRegion<dimension> &region, RectangularRegionIterator(RectangularRegion<dimension> &region,
Point<dimension> &point) Point<dimension> &point)
: region(region), point(point), stillLooping(true) { : region(region), point(point), stillLooping(true) {
...@@ -73,14 +73,14 @@ public: ...@@ -73,14 +73,14 @@ public:
}; };
// Only to be used for checking the end point of range based for loops. // Only to be used for checking the end point of range based for loops.
template <uInt dimension> template <Int dimension>
inline bool operator!=(const RectangularRegionIterator<dimension> &lhs, inline bool operator!=(const RectangularRegionIterator<dimension> &lhs,
const RectangularRegionIterator<dimension> &rhs) { const RectangularRegionIterator<dimension> &rhs) {
return lhs.stillLooping; return lhs.stillLooping;
} }
// Similar to above but for [ offset[0] ... offset[0]+size[0]-1 ] x ... x [..] // Similar to above but for [ offset[0] ... offset[0]+size[0]-1 ] x ... x [..]
template <uInt dimension> template <Int dimension>
void incrementPointInCube(Point<dimension> &point, long *size, long *offset) { void incrementPointInCube(Point<dimension> &point, long *size, long *offset) {
for (Int i = dimension - 1; i >= 0; i--) { for (Int i = dimension - 1; i >= 0; i--) {
point[i]++; point[i]++;
...@@ -92,12 +92,12 @@ void incrementPointInCube(Point<dimension> &point, long *size, long *offset) { ...@@ -92,12 +92,12 @@ void incrementPointInCube(Point<dimension> &point, long *size, long *offset) {
// For a convolutional layer with given filter *size* and *stride*, find the // For a convolutional layer with given filter *size* and *stride*, find the
// subset of the input field corresponding to a point in the output. // subset of the input field corresponding to a point in the output.
template <uInt dimension> template <Int dimension>
RectangularRegion<dimension> RectangularRegion<dimension>
InputRegionCalculator(const Point<dimension> &output, long *size, InputRegionCalculator(const Point<dimension> &output, long *size,
long *stride) { long *stride) {
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) { for (Int i = 0; i < dimension; i++) {
lb[i] = output[i] * stride[i]; lb[i] = output[i] * stride[i];
ub[i] = output[i] * stride[i] + size[i] - 1; ub[i] = output[i] * stride[i] + size[i] - 1;
} }
...@@ -106,12 +106,12 @@ InputRegionCalculator(const Point<dimension> &output, long *size, ...@@ -106,12 +106,12 @@ InputRegionCalculator(const Point<dimension> &output, long *size,
// For a convolutional layer with given filter *size* and *stride*, find the // For a convolutional layer with given filter *size* and *stride*, find the
// subset of the output field corresponding to a point in the input. // subset of the output field corresponding to a point in the input.
template <uInt dimension> template <Int dimension>
RectangularRegion<dimension> RectangularRegion<dimension>
OutputRegionCalculator(const Point<dimension> &input, long *size, long *stride, OutputRegionCalculator(const Point<dimension> &input, long *size, long *stride,
long *outputSpatialSize) { long *outputSpatialSize) {
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) { for (Int i = 0; i < dimension; i++) {
lb[i] = std::max(0L, (input[i] - size[i] + stride[i]) / stride[i]); lb[i] = std::max(0L, (input[i] - size[i] + stride[i]) / stride[i]);
ub[i] = std::min(outputSpatialSize[i] - 1, input[i] / stride[i]); ub[i] = std::min(outputSpatialSize[i] - 1, input[i] / stride[i]);
} }
......
...@@ -8,11 +8,11 @@ ...@@ -8,11 +8,11 @@
#define VALIDCONVOLUTIONRULES_H #define VALIDCONVOLUTIONRULES_H
// Full input region for an output point // Full input region for an output point
template <uInt dimension> template <Int dimension>
RectangularRegion<dimension> RectangularRegion<dimension>
InputRegionCalculator_Valid(const Point<dimension> &output, long *size) { InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
Point<dimension> lb, ub; Point<dimension> lb, ub;
for (uInt i = 0; i < dimension; i++) { for (Int i = 0; i < dimension; i++) {
Int pad = size[i] / 2; Int pad = size[i] / 2;
lb[i] = output[i] - pad; lb[i] = output[i] - pad;
ub[i] = output[i] + size[i] - 1 - pad; ub[i] = output[i] + size[i] - 1 - pad;
...@@ -23,15 +23,14 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) { ...@@ -23,15 +23,14 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
// Call for each convolutional / max-pooling layer, once for each batch item. // Call for each convolutional / max-pooling layer, once for each batch item.
// rules is used to carry out the "lowering" whilst carrying out the convolution // rules is used to carry out the "lowering" whilst carrying out the convolution
template <uInt dimension> template <Int dimension>
double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &rules, double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid,
long *size) { RuleBook &rules, long *size) {
uInt sd = volume<dimension>(size);
double countActiveInputs = 0; double countActiveInputs = 0;
for (auto const &outputIter : grid.mp) { for (auto const &outputIter : grid.mp) {
auto inRegion = auto inRegion =
InputRegionCalculator_Valid<dimension>(outputIter.first, size); InputRegionCalculator_Valid<dimension>(outputIter.first, size);
uInt rulesOffset = 0; Int rulesOffset = 0;
for (auto inputPoint : inRegion) { for (auto inputPoint : inRegion) {
auto inputIter = grid.mp.find(inputPoint); auto inputIter = grid.mp.find(inputPoint);
if (inputIter != grid.mp.end()) { if (inputIter != grid.mp.end()) {
...@@ -45,43 +44,43 @@ double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &r ...@@ -45,43 +44,43 @@ double SubmanifoldConvolution_SgToRules(SparseGrid<dimension> &grid, RuleBook &r
return countActiveInputs; return countActiveInputs;
} }
template <uInt dimension> template <Int dimension>
uInt SubmanifoldConvolution_SgsToRules(SparseGrids<dimension> &SGs, RuleBook &rules, Int SubmanifoldConvolution_SgsToRules(SparseGrids<dimension> &SGs,
long *size) { RuleBook &rules, long *size) {
uInt sd = volume<dimension>(size); Int sd = volume<dimension>(size);
uInt countActiveInputs = 0; Int countActiveInputs = 0;
rules.clear(); rules.clear();
rules.resize(sd); rules.resize(sd);
for (uInt i = 0; i < SGs.size(); i++) for (Int i = 0; i < (Int)SGs.size(); i++)
countActiveInputs += countActiveInputs +=
SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rules, size); SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rules, size);
return countActiveInputs; return countActiveInputs;
} }
template <uInt dimension> template <Int dimension>
uInt SubmanifoldConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs, Int SubmanifoldConvolution_SgsToRules_OMP(SparseGrids<dimension> &SGs,
RuleBook &rules, long *size) { RuleBook &rules, long *size) {
std::vector<RuleBook> rbs(SGs.size()); std::vector<RuleBook> rbs(SGs.size());
std::vector<double> countActiveInputs(SGs.size()); std::vector<double> countActiveInputs(SGs.size());
rules.clear(); rules.clear();
uInt sd = volume<dimension>(size); Int sd = volume<dimension>(size);
rules.resize(sd); rules.resize(sd);
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < SGs.size(); i++) { for (i = 0; i < (Int)SGs.size(); i++) {
rbs[i].resize(sd); rbs[i].resize(sd);
countActiveInputs[i] = countActiveInputs[i] =
SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rbs[i], size); SubmanifoldConvolution_SgToRules<dimension>(SGs[i], rbs[i], size);
} }
} }
{ {
uInt i; Int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < sd; i++) for (i = 0; i < sd; i++)
for (auto const &rb : rbs) for (auto const &rb : rbs)
rules[i].insert(rules[i].end(), rb[i].begin(), rb[i].end()); rules[i].insert(rules[i].end(), rb[i].begin(), rb[i].end());
} }
uInt countActiveInputs_ = 0; Int countActiveInputs_ = 0;
for (auto &i : countActiveInputs) for (auto &i : countActiveInputs)
countActiveInputs_ += i; countActiveInputs_ += i;
return countActiveInputs_; return countActiveInputs_;
......
from torch.utils.ffi import _wrap_function
from ._SCN import lib as _lib, ffi as _ffi
__all__ = []
def _import_symbols(locals):
for symbol in dir(_lib):
fn = getattr(_lib, symbol)
if callable(fn):
locals[symbol] = _wrap_function(fn, _ffi)
else:
locals[symbol] = fn
__all__.append(symbol)
_import_symbols(locals())
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/ActivePooling.cpp"
#else
#include "ActivePooling.h"
extern "C" void scn_DR_(ActivePooling_updateOutput)(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *output_features, bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1];
auto _rules = _m.getActivePoolingRuleBook(inputSize);
uInt batchSize = _rules[1][0];
uInt maxActive = _rules[1][1];
THTensor_(resize2d)(output_features, batchSize, nPlanes);
THTensor_(zero)(output_features);
ActivePooling_ForwardPass<real>(THTensor_(data)(input_features),
THTensor_(data)(output_features), batchSize,
maxActive, nPlanes, _rules, average);
}
extern "C" void scn_DR_(ActivePooling_updateGradInput)(
THLongTensor *inputSize, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features,
bool average) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1];
auto _rules = _m.getActivePoolingRuleBook(inputSize);
uInt batchSize = _rules[1][0];
uInt maxActive = _rules[1][1];
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
ActivePooling_BackwardPass<real>(
THTensor_(data)(d_input_features), THTensor_(data)(d_output_features),
batchSize, maxActive, nPlanes, _rules, average);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/AffineReluTrivialConvolution.cpp"
#else
#include "AffineReluTrivialConvolution.h"
extern "C" void scn_R_(AffineReluTrivialConvolution_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *affineWeight,
THTensor *affineBias, THTensor *convWeight) {
THTensor_(resize2d)(output_features, input_features->size[0],
convWeight->size[1]);
AffineReluTrivialConvolution_ForwardPass(
THTensor_(data)(input_features), convWeight->size[0],
input_features->stride[0], THTensor_(data)(output_features),
convWeight->size[1], output_features->stride[0],
THTensor_(data)(affineWeight), THTensor_(data)(affineBias),
THTensor_(data)(convWeight), input_features->size[0]);
}
extern "C" void scn_R_(AffineReluTrivialConvolution_backward)(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *affineWeight,
THTensor *d_affineWeight, THTensor *affineBias, THTensor *d_affineBias,
THTensor *convWeight, THTensor *d_convWeight, bool additiveGrad) {
THTensor_(resizeAs)(d_input_features, input_features);
AffineReluTrivialConvolution_BackwardPass(
THTensor_(data)(input_features), THTensor_(data)(d_input_features),
convWeight->size[0], input_features->stride[0],
THTensor_(data)(d_output_features), convWeight->size[1],
d_output_features->stride[0], THTensor_(data)(affineWeight),
THTensor_(data)(d_affineWeight), THTensor_(data)(affineBias),
THTensor_(data)(d_affineBias), THTensor_(data)(convWeight),
THTensor_(data)(d_convWeight), input_features->size[0], additiveGrad);
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/AveragePooling.cpp"
#else
#include "AveragePooling.h"
extern "C" void scn_DR_(AveragePooling_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *output_features, long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
auto _rules =
_m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive,
input_features->size[1] - nFeaturesToDrop);
THTensor_(zero)(output_features);
auto iF = THTensor_(data)(input_features) + nFeaturesToDrop;
auto oF = THTensor_(data)(output_features);
for (auto &r : _rules) {
uInt nHot = r.size() / 2;
AveragePooling_ForwardPass<real>(iF, oF, nPlanes, input_features->stride[0],
output_features->stride[0], &r[0], nHot,
_rules.size());
}
}
extern "C" void scn_DR_(AveragePooling_updateGradInput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *poolSize,
THLongTensor *poolStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features,
long nFeaturesToDrop) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
uInt nPlanes = input_features->size[1] - nFeaturesToDrop;
auto _rules =
_m.getRuleBook(inputSize, outputSize, poolSize, poolStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
auto diF = THTensor_(data)(d_input_features) + nFeaturesToDrop;
auto doF = THTensor_(data)(d_output_features);
for (auto &r : _rules) {
uInt nHot = r.size() / 2;
AveragePooling_BackwardPass<real>(
diF, doF, nPlanes, input_features->stride[0],
d_output_features->stride[0], &r[0], nHot, _rules.size());
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/BatchNormalization.cpp"
#else
#include "BatchNormalization.h"
extern "C" void scn_R_(BatchNormalization_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, real eps, real momentum, bool train,
real leakiness) {
THTensor_(resizeAs)(output_features, input_features);
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_ForwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias), eps, momentum,
train, leakiness);
}
}
extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, real eps, real momentum, bool train,
real leakiness) {
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_ForwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias), eps, momentum,
train, leakiness);
}
}
extern "C" void scn_R_(BatchNormalization_backward)(
THTensor *input_features, THTensor *d_input_features,
THTensor *output_features, THTensor *d_output_features, THTensor *saveMean,
THTensor *saveInvStd, THTensor *runningMean, THTensor *runningVar,
THTensor *weight, THTensor *bias, THTensor *d_weight, THTensor *d_bias,
real leakiness) {
THTensor_(resizeAs)(d_input_features, input_features);
if (input_features->nDimension == 2) {
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto input_stride = input_features->stride[0];
auto output_stride = output_features->stride[0];
BatchNormalization_BackwardPass<real>(
THTensor_(data)(input_features), THTensor_(data)(d_input_features),
THTensor_(data)(output_features), THTensor_(data)(d_output_features),
nPlanes, input_stride, output_stride, nActive,
THTensor_(data)(saveMean), THTensor_(data)(saveInvStd),
THTensor_(data)(runningMean), THTensor_(data)(runningVar),
THOptionalTensorData(weight), THOptionalTensorData(bias),
THOptionalTensorData(d_weight), THOptionalTensorData(d_bias),
leakiness);
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/BatchwiseMultiplicativeDropout.cpp"
#else
extern "C" void scn_R_(BatchwiseMultiplicativeDropout_updateOutput)(
THTensor *input_features, THTensor *output_features, THTensor *noise,
float alpha) {
if (input_features != output_features)
THTensor_(resizeAs)(output_features, input_features);
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto nz = THTensor_(data)(noise);
for (uInt row = 0; row < nActive; row++)
for (uInt plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
oF[o] = (iF[i] > 0) ? iF[i] * nz[plane] : iF[i] * nz[plane] * alpha;
}
extern "C" void scn_R_(BatchwiseMultiplicativeDropout_updateGradInput)(
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *noise, float alpha) {
if (d_input_features != d_output_features)
THTensor_(resizeAs)(d_input_features, d_output_features);
auto nActive = input_features->size[0];
auto nPlanes = input_features->size[1];
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto nz = THTensor_(data)(noise);
for (uInt row = 0; row < nActive; row++)
for (uInt plane = 0, o = row * nPlanes, i = row * nPlanes; plane < nPlanes;
plane++, o++, i++)
diF[i] = (iF[i] > 0) ? doF[o] * nz[plane] : doF[o] * nz[plane] * alpha;
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/Convolution.cpp"
#else
#include "Convolution.h"
extern "C" double scn_DR_(Convolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(Convolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor *inputSize, THLongTensor *filterSize, void **m,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules = _m.getSubmanifoldRuleBook(inputSize, filterSize, true);
uInt nActive = _m.getNActive(inputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(FullConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *output_features, THTensor *weight,
THTensor *bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(FullConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **mIn, void **mOut,
THTensor *input_features, THTensor *d_input_features,
THTensor *d_output_features, THTensor *weight, THTensor *d_weight,
THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mIn)
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, mOut)
auto _rules = _mIn.getFullConvolutionRuleBook(
inputSize, outputSize, filterSize, filterStride, _mOut);
uInt nActive = _mOut.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
extern "C" double scn_DR_(RandomizedStrideConvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
double flops = 0;
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Convolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
}
return flops;
}
extern "C" void scn_DR_(RandomizedStrideConvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRandomizedStrideRuleBook(inputSize, outputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
if (nActive) {
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Convolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
}
#endif
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_CONVOLUTION_H
#define CPU_CONVOLUTION_H
#include "../SparseConvNet.h"
#include <cstring>
// buffer must have size >= nHot * (nIn+nOut)
template <typename T>
void Convolution_ForwardPass(
T *input_features, uInt input_nPlanes, uInt input_nPLANES,
T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
T *bias, RuleBook &rules, uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
if (bias != nullptr) // Set bias
for (uInt row = 0; row < output_nActive; row++)
for (uInt column = 0; column < output_nPlanes; column++)
output_features[row * output_nPLANES + column] = bias[column];
std::vector<T> input_buffer, output_buffer;
for (auto &r : rules) {
uInt nHot = r.size() / 2;
input_buffer.resize(nHot * input_nPlanes);
output_buffer.resize(nHot * output_nPlanes);
for (uInt row = 0; row < nHot; row++) {
std::memcpy(&input_buffer[row * input_nPlanes],
input_features + r[2 * row] * input_nPLANES,
sizeof(T) * input_nPlanes);
}
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is l*m (row-major)
// weight is m*r (row-major)
// output_buffer is l*r (row-major)
// buffer * weights -> output_buffers
(*gemm)('n', 'n',
output_nPlanes, // r
nHot, // l
input_nPlanes, // m
1, // alpha
weight, output_nPlanes, // r
&input_buffer[0], input_nPlanes, // m
0, // beta
&output_buffer[0], output_nPlanes // r
);
weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) {
T *b = &output_buffer[row * output_nPlanes];
T *o = &output_features[r[2 * row + 1] * output_nPLANES];
for (uInt k = 0; k < output_nPlanes; k++)
o[k] += b[k];
}
}
}
template <typename T>
void Convolution_BackwardPass(
T *input_features, T *d_input_features, uInt input_nPlanes,
uInt input_nPLANES, T *d_output_features, uInt output_nPlanes,
uInt output_nPLANES, T *weight, T *d_weight, T *d_bias, RuleBook &rules,
uInt output_nActive,
void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
if (d_bias)
for (uInt row = 0; row < output_nActive; row++)
for (uInt i = 0; i < output_nPlanes; i++)
d_bias[i] += d_output_features[row * output_nPLANES + i];
std::vector<T> input_buffer, output_buffer;
for (auto &r : rules) {
uInt nHot = r.size() / 2;
input_buffer.resize(nHot * input_nPlanes);
output_buffer.resize(nHot * output_nPlanes);
for (uInt row = 0; row < nHot; row++)
std::memcpy(&output_buffer[row * output_nPlanes],
&d_output_features[r[2 * row + 1] * output_nPLANES],
sizeof(T) * output_nPlanes);
// Do GEMM (note: gemm assumes column-major matrices)
// output_buffer is l*m (row-major)
// weights is r*m (row-major)
// input_buffer is l*r (row-major)
// output_buffer * T(weight) -> input_buffer
(*gemm)('t', 'n',
input_nPlanes, // r
nHot, // l
output_nPlanes, // m
1, // alpha
weight, output_nPlanes, // m
&output_buffer[0], output_nPlanes, // m
0, // beta
&input_buffer[0], input_nPlanes // r
);
weight += input_nPlanes * output_nPlanes;
for (uInt row = 0; row < nHot; row++) {
T *b = &input_buffer[row * input_nPlanes];
T *i = &d_input_features[r[2 * row] * input_nPLANES];
for (uInt k = 0; k < input_nPlanes; k++)
i[k] += b[k];
}
for (uInt row = 0; row < nHot; row++)
std::memcpy(&input_buffer[row * input_nPlanes],
input_features + r[2 * row] * input_nPLANES,
sizeof(T) * input_nPlanes);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is m*l (row-major)
// output_buffer is m*r (row-major)
// d_weights is l*r (row-major)
// T(input_buffer) * output_buffer -> d_weight
(*gemm)('n', 't',
output_nPlanes, // r
input_nPlanes, // l
nHot, // m
1, // alpha
&output_buffer[0], output_nPlanes, // r
&input_buffer[0], input_nPlanes, // l
1, // beta
d_weight, output_nPlanes // r
);
d_weight += input_nPlanes * output_nPlanes;
}
}
// template <typename T>
// void Convolution_ForwardPass(
// T *input_features, uInt input_nPlanes, uInt input_nPLANES,
// T *output_features, uInt output_nPlanes, uInt output_nPLANES, T *weight,
// T *bias, RuleBook &rules, uInt output_nActive,
// void (*gemm)(char transa, char transb, long m, long n, long k, T alpha,
// T *a, long lda, T *b, long ldb, T beta, T *c, long ldc)) {
// if (bias != nullptr) // Set bias
// for (uInt row = 0; row < output_nActive; row++)
// for (uInt column = 0; column < output_nPlanes; column++)
// output_features[row * output_nPLANES + column] = bias[column];
// for (auto &r : rules) {
// uInt nHot = r.size() / 2;
// for (uInt row = 0; row < nHot; row++) {
// T *inp = &input_features[r[2 * row] * input_nPLANES];
// T *out = &output_features[r[2 * row + 1] * output_nPLANES];
// for (uInt i = 0; i < input_nPlanes; i++)
// for (uInt j = 0; j < output_nPlanes; j++)
// out[j] += inp[i] * weight[i * input_nPlanes + j];
// }
// weight += input_nPlanes * output_nPlanes;
// }
// }
#endif /* CPU_CONVOLUTION_H */
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/Deconvolution.cpp"
#else
#include "Deconvolution.h"
extern "C" double scn_DR_(Deconvolution_updateOutput)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *output_features, THTensor *weight, THTensor *bias,
long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resize2d)(output_features, nActive, weight->size[1]);
if (not bias)
THTensor_(zero)(output_features);
auto iF = THTensor_(data)(input_features);
auto oF = THTensor_(data)(output_features);
auto ip = input_features->size[1];
auto op = output_features->size[1];
auto w = THTensor_(data)(weight);
auto b = THOptionalTensorData(bias);
Deconvolution_ForwardPass(iF, ip, ip, oF, op, op, w, b, _rules, nActive,
THBlas_(gemm));
double flops = 0;
for (auto &r : _rules)
flops += r.size() / 2 * ip * op;
return flops;
}
extern "C" void scn_DR_(Deconvolution_backward)(
THLongTensor *inputSize, THLongTensor *outputSize, THLongTensor *filterSize,
THLongTensor *filterStride, void **m, THTensor *input_features,
THTensor *d_input_features, THTensor *d_output_features, THTensor *weight,
THTensor *d_weight, THTensor *d_bias, long filterVolume) {
SCN_INITIALIZE_AND_REFERENCE(Metadata<Dimension>, m)
auto _rules =
_m.getRuleBook(outputSize, inputSize, filterSize, filterStride, true);
uInt nActive = _m.getNActive(outputSize);
THTensor_(resizeAs)(d_input_features, input_features);
THTensor_(zero)(d_input_features);
auto iF = THTensor_(data)(input_features);
auto diF = THTensor_(data)(d_input_features);
auto doF = THTensor_(data)(d_output_features);
auto ip = input_features->size[1];
auto op = d_output_features->size[1];
auto w = THTensor_(data)(weight);
auto dw = THTensor_(data)(d_weight);
auto db = THOptionalTensorData(d_bias);
Deconvolution_BackwardPass(iF, diF, ip, ip, doF, op, op, w, dw, db, _rules,
nActive, THBlas_(gemm));
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment