Commit 66986767 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

fixes

parent edf89af3
...@@ -17,14 +17,14 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows, ...@@ -17,14 +17,14 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
for (row = 0; row < nRows; row++) { for (row = 0; row < nRows; row++) {
auto nActive = rules[0]; auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto out_f = output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) { for (Int i = 1; i <= nActive; ++i) {
auto in_f = input_features + nPlanes * rules[i]; auto in_f = input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++) { for (Int plane = 0; plane < nPlanes; plane++) {
output_features[plane] += multiplier * in_f[plane]; out_f[plane] += multiplier * in_f[plane];
} }
} }
output_features += nPlanes;
rules += 1 + maxActive;
} }
} }
template <typename T> template <typename T>
...@@ -36,13 +36,13 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features, ...@@ -36,13 +36,13 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
for (row = 0; row < nRows; row++) { for (row = 0; row < nRows; row++) {
auto nActive = rules[0]; auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto d_out_f = d_output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) { for (Int i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + nPlanes * rules[i]; auto d_in_f = d_input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++) for (Int plane = 0; plane < nPlanes; plane++)
d_in_f[plane] += multiplier * d_output_features[plane]; d_in_f[plane] += multiplier * d_out_f[plane];
} }
d_output_features += nPlanes;
rules += 1 + maxActive;
} }
} }
......
...@@ -5,10 +5,9 @@ ...@@ -5,10 +5,9 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
template <typename T> template <typename T>
void Convolution_fp_bias(T *of, T *b, Int nPlanes, Int nActiveOut); void Convolution_fp_bias(T *oF, T *b, Int nPlanes, Int nActive);
template <typename T> template <typename T>
void Convolution_bp_bias(T *matrix, T *target, Int nRows, Int nColumns, void Convolution_bp_bias(T *d_oF, T *d_b, Int nPlanes, Int nActive);
Int nCOLUMNS);
template <typename T> template <typename T>
double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w, double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes, RuleBook _rules, Int input_nPlanes,
...@@ -84,7 +83,7 @@ void cuda_Convolution_backward( ...@@ -84,7 +83,7 @@ void cuda_Convolution_backward(
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
} }
...@@ -147,7 +146,7 @@ void cuda_SubmanifoldConvolution_backward( ...@@ -147,7 +146,7 @@ void cuda_SubmanifoldConvolution_backward(
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActive); Convolution_bp_bias(doF, db, op, nActive);
} }
} }
} }
...@@ -216,7 +215,7 @@ void cuda_FullConvolution_backward( ...@@ -216,7 +215,7 @@ void cuda_FullConvolution_backward(
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
} }
...@@ -283,7 +282,7 @@ void cuda_RandomizedStrideConvolution_backward( ...@@ -283,7 +282,7 @@ void cuda_RandomizedStrideConvolution_backward(
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#include "RuleBookIterator.h" #include "RuleBookIterator.h"
#define TACC double
template <typename T> template <typename T>
__global__ void Convolution_fp_bias_(T *output_features, T *bias, Int nPlanes, __global__ void Convolution_fp_bias_(T *output_features, T *bias, Int nPlanes,
...@@ -30,24 +31,21 @@ void Convolution_fp_bias(T *oF, T *b, Int nPlanes, Int nActive) { ...@@ -30,24 +31,21 @@ void Convolution_fp_bias(T *oF, T *b, Int nPlanes, Int nActive) {
} }
template <typename T> template <typename T>
__global__ void dColumnSum(T *matrix, T *target, Int nRows, Int nColumns, __global__ void Convolution_bp_bias_(T *d_oF, T *d_b, Int nPlanes, Int nActive) {
Int nCOLUMNS) { Int n = blockIdx.x * 32 + threadIdx.x;
Int i = blockIdx.x * 32 + threadIdx.x; d_oF+=n;
T t = 0; TACC t = 0;
for (Int j = blockIdx.y; j < nRows; j += 32) for (Int row = blockIdx.y; row < nActive; row += gridDim.y)
t += matrix[j * nCOLUMNS + i]; t += d_oF[row * nPlanes ];
atomicAdd(&target[i], t); atomicAdd(&d_b[n], t);
} }
template <typename T> template <typename T>
void Convolution_bp_bias(T *matrix, T *target, Int nRows, Int nColumns, void Convolution_bp_bias(T *d_oF, T *d_b, Int nPlanes, Int nActive) {
Int nCOLUMNS) { if (nPlanes / 32 > 0)
if (nColumns / 32 > 0) Convolution_bp_bias_<<<dim3(nPlanes / 32, 32), 32>>>(d_oF, d_b, nPlanes, nActive);
dColumnSum<<<dim3(nColumns / 32, 32), 32>>>(matrix, target, nRows, nColumns, if (nPlanes % 32 > 0) {
nCOLUMNS); Int o = nPlanes / 32 * 32;
if (nColumns % 32 > 0) { Convolution_bp_bias_<<<dim3(1, 32), nPlanes - o>>>(d_oF + o, d_b + o, nPlanes, nActive);
Int o = nColumns / 32 * 32;
dColumnSum<<<dim3(1, 32), nColumns - o>>>(matrix + o, target + o, nRows,
nColumns, nCOLUMNS);
} }
} }
...@@ -70,7 +68,7 @@ dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -70,7 +68,7 @@ dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K; outFeatures += n * K;
w += n * K; w += n * K;
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
Int R0[V]; Int R0[V];
...@@ -138,7 +136,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -138,7 +136,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K; outFeatures += n * K;
w += n * K; w += n * K;
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
Int R0[V]; Int R0[V];
...@@ -253,8 +251,8 @@ dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -253,8 +251,8 @@ dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures,
w += m * K * output_nPlanes; w += m * K * output_nPlanes;
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -330,8 +328,8 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -330,8 +328,8 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
w += m * K * output_nPlanes; w += m * K * output_nPlanes;
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -449,7 +447,7 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -449,7 +447,7 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
w += n * K; w += n * K;
Int KO = min(K, output_nPlanes - K * n); Int KO = min(K, output_nPlanes - K * n);
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ Int R[K * 2]; __shared__ Int R[K * 2];
...@@ -525,8 +523,8 @@ dConvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -525,8 +523,8 @@ dConvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m); Int KI = min(K, input_nPlanes - K * m);
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -650,3 +648,4 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -650,3 +648,4 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
, w += c; dw += c;) , w += c; dw += c;)
} }
} }
#undef TACC
\ No newline at end of file
...@@ -78,7 +78,7 @@ void cuda_Deconvolution_backward( ...@@ -78,7 +78,7 @@ void cuda_Deconvolution_backward(
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op); dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
if (d_bias.numel()) { if (d_bias.numel()) {
auto db = d_bias.data<T>(); auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut); Convolution_bp_bias(doF, db, op, nActiveOut);
} }
} }
} }
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
// This source code is licensed under the license found in the // This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree. // LICENSE file in the root directory of this source tree.
#define TACC double
template <typename T, Int K, Int V> template <typename T, Int K, Int V>
__global__ void __global__ void
dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules, dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
...@@ -23,7 +25,7 @@ dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -23,7 +25,7 @@ dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K; outFeatures += n * K;
w += n * K; w += n * K;
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
Int R0[V]; Int R0[V];
...@@ -91,7 +93,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -91,7 +93,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K; outFeatures += n * K;
w += n * K; w += n * K;
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
Int R0[V]; Int R0[V];
...@@ -205,8 +207,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_A( ...@@ -205,8 +207,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_A(
w += m * K * output_nPlanes; w += m * K * output_nPlanes;
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -281,8 +283,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B( ...@@ -281,8 +283,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
w += m * K * output_nPlanes; w += m * K * output_nPlanes;
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -400,7 +402,7 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules, ...@@ -400,7 +402,7 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
w += n * K; w += n * K;
Int KO = min(K, output_nPlanes - K * n); Int KO = min(K, output_nPlanes - K * n);
T O[V]; TACC O[V];
__shared__ T W[K][K]; __shared__ T W[K][K];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ Int R[K * 2]; __shared__ Int R[K * 2];
...@@ -476,8 +478,8 @@ dDeconvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, ...@@ -476,8 +478,8 @@ dDeconvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures,
dw += m * K * output_nPlanes; dw += m * K * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m); Int KI = min(K, input_nPlanes - K * m);
T dI[V]; TACC dI[V];
T dW[V]; TACC dW[V];
__shared__ T I[K][K]; __shared__ T I[K][K];
__shared__ T dO[K][K]; __shared__ T dO[K][K];
__shared__ T W[K][K]; __shared__ T W[K][K];
...@@ -601,3 +603,5 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures, ...@@ -601,3 +603,5 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
, w += c; dw += c;) , w += c; dw += c;)
} }
} }
#undef TACC
\ No newline at end of file
...@@ -43,10 +43,10 @@ template void cuda_AveragePooling_BackwardPass<float>( ...@@ -43,10 +43,10 @@ template void cuda_AveragePooling_BackwardPass<float>(
float *d_input_features, float *d_output_features, Int nPlanes, float *d_input_features, float *d_output_features, Int nPlanes,
Int input_stride, Int output_stride, RuleBook _rules, Int filterVolume); Int input_stride, Int output_stride, RuleBook _rules, Int filterVolume);
template void Convolution_fp_bias<float>(float *of, float *b, Int op, template void Convolution_fp_bias<float>(float *oF, float *b, Int nPlanes,
Int nActive); Int nActive);
template void Convolution_bp_bias<float>(float *matrix, float *target, template void Convolution_bp_bias<float>(float *d_oF, float *d_b,
Int nRows, Int nColumns, Int nCOLUMNS); Int nPlanes, Int nActive);
template double dConvolution_forward2<float>( template double dConvolution_forward2<float>(
float *inFeatures, float *outFeatures, float *w, RuleBook _rules, float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride); Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
forward_pass_multiplyAdd_count = 0 forward_pass_multiplyAdd_count = 0
forward_pass_hidden_states = 0 forward_pass_hidden_states = 0
from .activations import Tanh, Sigmoid, ReLU, ELU, SELU, BatchNormELU from .activations import Tanh, Sigmoid, ReLU, LeakyReLU, ELU, SELU, BatchNormELU
from .averagePooling import AveragePooling from .averagePooling import AveragePooling
from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
from .classificationTrainValidate import ClassificationTrainValidate from .classificationTrainValidate import ClassificationTrainValidate
......
...@@ -22,6 +22,18 @@ class Sigmoid(Module): ...@@ -22,6 +22,18 @@ class Sigmoid(Module):
return output return output
class LeakyReLU(Module):
def __init__(self,leak=1/3):
Module.__init__(self)
self.leak=leak
def forward(self, input):
output = SparseConvNetTensor()
output.features = F.leaky_relu(input.features,self.leak)
output.metadata = input.metadata
output.spatial_size = input.spatial_size
return output
class Tanh(Module): class Tanh(Module):
def forward(self, input): def forward(self, input):
output = SparseConvNetTensor() output = SparseConvNetTensor()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment