Commit 19aca522 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

Fix CPU IOLayers

parent c9e5e6cd
...@@ -15,10 +15,10 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows, ...@@ -15,10 +15,10 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
Int row; Int row;
#pragma omp parallel for private(row) #pragma omp parallel for private(row)
for (row = 0; row < nRows; row++) { for (row = 0; row < nRows; row++) {
auto nActive = rules[0]; auto r = rules + row * (1 + maxActive);
auto nActive = r[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto out_f = output_features + row * nPlanes; auto out_f = output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) { for (Int i = 1; i <= nActive; ++i) {
auto in_f = input_features + r[i] * nPlanes; auto in_f = input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++) { for (Int plane = 0; plane < nPlanes; plane++) {
...@@ -34,10 +34,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -34,10 +34,10 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
Int row; Int row;
#pragma omp parallel for private(row) #pragma omp parallel for private(row)
for (row = 0; row < nRows; row++) { for (row = 0; row < nRows; row++) {
auto nActive = rules[0]; auto r = rules + row * (1 + maxActive);
auto nActive = r[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto d_out_f = d_output_features + row * nPlanes; auto d_out_f = d_output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) { for (Int i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + r[i] * nPlanes; auto d_in_f = d_input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
......
...@@ -14,16 +14,16 @@ ...@@ -14,16 +14,16 @@
#define RULEBOOKITERATOR(X, Y) \ #define RULEBOOKITERATOR(X, Y) \
{ \ { \
Int rbMaxSize = 0; \ Int rbMaxSize = 0; \
for (auto &r : _rules) \ for (auto &r : _rules) \
rbMaxSize = std::max(rbMaxSize, (Int)r.size()); \ rbMaxSize = std::max(rbMaxSize, (Int)r.size()); \
at::Tensor rulesBuffer = at::CUDA(at_kINT).tensor({rbMaxSize}); \ at::Tensor rulesBuffer = at::CUDA(at_kINT).tensor({rbMaxSize}); \
Int *rbB = rulesBuffer.data<Int>(); \ Int *rbB = rulesBuffer.data<Int>(); \
for (int k = 0; k < _rules.size(); ++k) { \ for (int k = 0; k < _rules.size(); ++k) { \
auto &r = _rules[k]; \ auto &r = _rules[k]; \
Int nHotB = r.size() / 2; \ Int nHotB = r.size() / 2; \
if (nHotB) { \ if (nHotB) { \
cudaMemcpy(rbB, &r[0], sizeof(Int) * 2 * nHotB, \ cudaMemcpy(rbB, &r[0], sizeof(Int) * 2 * nHotB, \
cudaMemcpyHostToDevice); \ cudaMemcpyHostToDevice); \
X \ X \
} \ } \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment