Commit 2c4ed608 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Goodbye THNN. Hello ATen!

parent 6d4475db
......@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree.
#include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
using Int = int32_t;
using uInt = uint32_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 4294967295; // 2^32-1
const uInt Int_MAX = 2147483647; // 2^31-1
// Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>;
template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p;
long *td = THLongTensor_data(t);
for (int i = 0; i < dimension; i++)
long *td = t.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
return p;
}
template <uInt dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
return p;
}
template <uInt dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) {
template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
td = THLongTensor_data(t2);
for (int i = 0; i < dimension; i++)
td = t2.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i];
return p;
}
// FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash {
template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 16777619;
Int hash = 16777619;
for (auto x : p) {
hash *= 2166136261;
hash ^= x;
......@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
}
};
#define THCITensor THCudaIntTensor
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define at_kINT at::kInt
......@@ -5,66 +5,58 @@
// LICENSE file in the root directory of this source tree.
#include <array>
#include <tuple>
// Using 32 bit integers for coordinates and memory calculations.
// They could be replaced with 64 bit integers.
// Advantages of 64 bit:
// - support for nFeatures * nActiveSites > 2^32 per hidden layer per batch
// Disadvantages:
// - larger, and therefore slower, data copies from CPU -> GPU
// - more device memory needed to store sparseconvnet 'rulebooks'
// - not really needed until GPUs have >> 32GB RAM
// Using 64 bit integers for coordinates and memory calculations.
using Int = int64_t;
using uInt = uint64_t; // Max value = uInt_MAX used to denote 'non-existent'
const uInt uInt_MAX = 18446744073709551615; // 2^64-1
const uInt Int_MAX = 9223372036854775807; // 2^63-1
// Point<dimension> is a point in the d-dimensional integer lattice
// (i.e. square-grid/cubic-grid, ...)
template <uInt dimension> using Point = std::array<Int, dimension>;
template <Int dimension> using Point = std::array<Int, dimension>;
template <uInt dimension> Point<dimension> LongTensorToPoint(THLongTensor *t) {
template <Int dimension>
Point<dimension> LongTensorToPoint(/*long*/ at::Tensor &t) {
Point<dimension> p;
long *td = THLongTensor_data(t);
for (int i = 0; i < dimension; i++)
long *td = t.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
return p;
}
template <uInt dimension>
Point<2 * dimension> TwoLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1) {
template <Int dimension>
Point<2 * dimension> TwoLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1) {
Point<2 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
return p;
}
template <uInt dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(THLongTensor *t0, THLongTensor *t1,
THLongTensor *t2) {
template <Int dimension>
Point<3 * dimension> ThreeLongTensorsToPoint(/*long*/ at::Tensor &t0,
/*long*/ at::Tensor &t1,
/*long*/ at::Tensor &t2) {
Point<3 * dimension> p;
long *td;
td = THLongTensor_data(t0);
for (int i = 0; i < dimension; i++)
td = t0.data<long>();
for (Int i = 0; i < dimension; i++)
p[i] = td[i];
td = THLongTensor_data(t1);
for (int i = 0; i < dimension; i++)
td = t1.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + dimension] = td[i];
td = THLongTensor_data(t2);
for (int i = 0; i < dimension; i++)
td = t2.data<long>();
for (Int i = 0; i < dimension; i++)
p[i + 2 * dimension] = td[i];
return p;
}
// FNV Hash function for Point<dimension>
template <uInt dimension> struct IntArrayHash {
template <Int dimension> struct IntArrayHash {
std::size_t operator()(Point<dimension> const &p) const {
uInt hash = 14695981039346656037;
Int hash = -3750763034362895579; // 14695981039346656037;
for (auto x : p) {
hash *= 1099511628211;
hash ^= x;
......@@ -73,5 +65,4 @@ template <uInt dimension> struct IntArrayHash {
}
};
#define THCITensor THCudaLongTensor
#define THCITensor_(NAME) TH_CONCAT_3(THCITensor, _, NAME)
#define at_kINT at::kLong
......@@ -6,7 +6,6 @@
#ifndef ACTIVEPOOLING_H
#define ACTIVEPOOLING_H
#include "../SparseConvNet.h"
// Return the maximum number of active sites in the batch
// rules has size 1.
......@@ -14,14 +13,14 @@
// First column is number of active sites for that sample (<= maxActive)
// Remaining maxActive columns give the active sites, zero padded.
template <uInt dimension>
template <Int dimension>
void activePoolingRules(SparseGrids<dimension> &SGs, RuleBook &rules) {
rules.clear();
rules.resize(2);
auto &r = rules[0];
uInt maxActive = 0;
Int maxActive = 0;
for (auto &sg : SGs)
maxActive = std::max(maxActive, (uInt)sg.mp.size());
maxActive = std::max(maxActive, (Int)sg.mp.size());
for (auto &sg : SGs) {
r.push_back(sg.mp.size());
for (auto &iter : sg.mp)
......
......@@ -8,7 +8,7 @@
#define CONVOLUTIONRULES_H
#include "RectangularRegions.h"
template <uInt dimension>
template <Int dimension>
void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
SparseGrid<dimension> &outputGrid,
RuleBook &rules, long *size,
......@@ -17,10 +17,11 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
rules.resize(volume<dimension>(size));
for (auto const &inIter : inputGrid.mp) {
for (auto j : OutputRegionCalculator<dimension>(inIter.first, size, stride,
outputSpatialSize)) {
auto outRegion = OutputRegionCalculator<dimension>(
inIter.first, size, stride, outputSpatialSize);
for (auto j : outRegion) {
auto inRegion = InputRegionCalculator<dimension>(j, size, stride);
uInt rulesOffset = inRegion.offset(inIter.first);
Int rulesOffset = inRegion.offset(inIter.first);
auto outIter = outputGrid.mp.find(j);
if (outIter == outputGrid.mp.end()) {
outIter =
......@@ -32,8 +33,8 @@ void Convolution_InputSgToRulesAndOutputSg(SparseGrid<dimension> &inputGrid,
}
}
template <uInt dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
template <Int dimension>
Int Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize,
long *filterStride,
......@@ -41,10 +42,10 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
long *output_spatialSize) {
rules.clear();
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
auto &iSG = input_SGs[i];
auto &oSG = output_SGs[i];
oSG.ctr = output_nActive;
......@@ -57,43 +58,43 @@ uInt Convolution_InputSgsToRulesAndOutputSgs(SparseGrids<dimension> &input_SGs,
return output_nActive;
}
template <uInt dimension>
uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
template <Int dimension>
Int Convolution_InputSgsToRulesAndOutputSgs_OMP(
SparseGrids<dimension> &input_SGs, SparseGrids<dimension> &output_SGs,
RuleBook &rules, long *filterSize, long *filterStride,
long *input_spatialSize, long *output_spatialSize) {
rules.clear();
rules.resize(volume<dimension>(filterSize));
output_SGs.clear();
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
output_SGs.resize(batchSize);
std::vector<RuleBook> rbs(batchSize);
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < batchSize; i++)
Convolution_InputSgToRulesAndOutputSg<dimension>(
input_SGs[i], output_SGs[i], rbs[i], filterSize, filterStride,
input_spatialSize, output_spatialSize);
}
uInt output_nActive = 0;
for (uInt i = 0; i < batchSize; i++) {
Int output_nActive = 0;
for (Int i = 0; i < batchSize; i++) {
// Parallel assignment:
// output_nActive <- output_nActive+output_SGs[i].ctr
// output_SGs[i].ctr <- output_nActive
uInt tmp = output_nActive;
Int tmp = output_nActive;
output_nActive += output_SGs[i].ctr;
output_SGs[i].ctr = tmp;
}
{
uInt i;
Int i;
#pragma omp parallel for private(i)
for (i = 0; i < rules.size(); i++) {
for (i = 0; i < (Int)rules.size(); i++) {
auto &R = rules[i];
for (uInt j = 0; j < batchSize; j++) {
for (Int j = 0; j < batchSize; j++) {
auto &r = rbs[j][i];
auto offset = output_SGs[j].ctr;
for (uInt k = 0; k < r.size();) {
for (Int k = 0; k < (Int)r.size();) {
R.push_back(r[k++]);
R.push_back(r[k++] + offset);
}
......@@ -105,19 +106,19 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
// for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset)
// triples
template <uInt dimension>
template <Int dimension>
void SparseToDense_InputSgsToRulesAndOutputSgs(
SparseGrids<dimension> &input_SGs, RuleBook &rules, long *spatialSize) {
uInt batchSize = input_SGs.size();
Int batchSize = input_SGs.size();
rules.clear();
rules.resize(batchSize);
Point<dimension> lb, ub;