Commit 66986767 authored by Benjamin Thomas Graham's avatar Benjamin Thomas Graham
Browse files

fixes

parent edf89af3
......@@ -17,14 +17,14 @@ void InputLayer_ForwardPass(T *input_features, T *output_features, Int nRows,
for (row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto out_f = output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) {
auto in_f = input_features + nPlanes * rules[i];
auto in_f = input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++) {
output_features[plane] += multiplier * in_f[plane];
out_f[plane] += multiplier * in_f[plane];
}
}
output_features += nPlanes;
rules += 1 + maxActive;
}
}
template <typename T>
......@@ -36,13 +36,13 @@ void InputLayer_BackwardPass(T *d_input_features, T *d_output_features,
for (row = 0; row < nRows; row++) {
auto nActive = rules[0];
T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
auto d_out_f = d_output_features + row * nPlanes;
auto r = rules + row * (1 + maxActive);
for (Int i = 1; i <= nActive; ++i) {
auto d_in_f = d_input_features + nPlanes * rules[i];
auto d_in_f = d_input_features + r[i] * nPlanes;
for (Int plane = 0; plane < nPlanes; plane++)
d_in_f[plane] += multiplier * d_output_features[plane];
d_in_f[plane] += multiplier * d_out_f[plane];
}
d_output_features += nPlanes;
rules += 1 + maxActive;
}
}
......
......@@ -5,10 +5,9 @@
// LICENSE file in the root directory of this source tree.
template <typename T>
void Convolution_fp_bias(T *of, T *b, Int nPlanes, Int nActiveOut);
void Convolution_fp_bias(T *oF, T *b, Int nPlanes, Int nActive);
template <typename T>
void Convolution_bp_bias(T *matrix, T *target, Int nRows, Int nColumns,
Int nCOLUMNS);
void Convolution_bp_bias(T *d_oF, T *d_b, Int nPlanes, Int nActive);
template <typename T>
double dConvolution_forward2(T *inFeatures, T *outFeatures, T *w,
RuleBook _rules, Int input_nPlanes,
......@@ -84,7 +83,7 @@ void cuda_Convolution_backward(
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut);
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
}
......@@ -147,7 +146,7 @@ void cuda_SubmanifoldConvolution_backward(
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActive);
Convolution_bp_bias(doF, db, op, nActive);
}
}
}
......@@ -216,7 +215,7 @@ void cuda_FullConvolution_backward(
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut);
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
}
......@@ -283,7 +282,7 @@ void cuda_RandomizedStrideConvolution_backward(
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut);
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
}
......@@ -5,6 +5,7 @@
// LICENSE file in the root directory of this source tree.
#include "RuleBookIterator.h"
#define TACC double
template <typename T>
__global__ void Convolution_fp_bias_(T *output_features, T *bias, Int nPlanes,
......@@ -30,24 +31,21 @@ void Convolution_fp_bias(T *oF, T *b, Int nPlanes, Int nActive) {
}
template <typename T>
__global__ void dColumnSum(T *matrix, T *target, Int nRows, Int nColumns,
Int nCOLUMNS) {
Int i = blockIdx.x * 32 + threadIdx.x;
T t = 0;
for (Int j = blockIdx.y; j < nRows; j += 32)
t += matrix[j * nCOLUMNS + i];
atomicAdd(&target[i], t);
__global__ void Convolution_bp_bias_(T *d_oF, T *d_b, Int nPlanes, Int nActive) {
Int n = blockIdx.x * 32 + threadIdx.x;
d_oF+=n;
TACC t = 0;
for (Int row = blockIdx.y; row < nActive; row += gridDim.y)
t += d_oF[row * nPlanes ];
atomicAdd(&d_b[n], t);
}
template <typename T>
void Convolution_bp_bias(T *matrix, T *target, Int nRows, Int nColumns,
Int nCOLUMNS) {
if (nColumns / 32 > 0)
dColumnSum<<<dim3(nColumns / 32, 32), 32>>>(matrix, target, nRows, nColumns,
nCOLUMNS);
if (nColumns % 32 > 0) {
Int o = nColumns / 32 * 32;
dColumnSum<<<dim3(1, 32), nColumns - o>>>(matrix + o, target + o, nRows,
nColumns, nCOLUMNS);
void Convolution_bp_bias(T *d_oF, T *d_b, Int nPlanes, Int nActive) {
if (nPlanes / 32 > 0)
Convolution_bp_bias_<<<dim3(nPlanes / 32, 32), 32>>>(d_oF, d_b, nPlanes, nActive);
if (nPlanes % 32 > 0) {
Int o = nPlanes / 32 * 32;
Convolution_bp_bias_<<<dim3(1, 32), nPlanes - o>>>(d_oF + o, d_b + o, nPlanes, nActive);
}
}
......@@ -70,7 +68,7 @@ dConvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K;
w += n * K;
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
Int R0[V];
......@@ -138,7 +136,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K;
w += n * K;
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
Int R0[V];
......@@ -253,8 +251,8 @@ dConvolution_KMxKN_backward_dW_A(T *inFeatures, T *dInFeatures, T *dOutFeatures,
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -330,8 +328,8 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -449,7 +447,7 @@ dConvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
w += n * K;
Int KO = min(K, output_nPlanes - K * n);
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
__shared__ Int R[K * 2];
......@@ -525,8 +523,8 @@ dConvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
dw += m * K * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m);
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -650,3 +648,4 @@ void dConvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
, w += c; dw += c;)
}
}
#undef TACC
\ No newline at end of file
......@@ -78,7 +78,7 @@ void cuda_Deconvolution_backward(
dDeconvolution_backward_dW2<T>(iF, diF, doF, w, dw, _rules, ip, ip, op, op);
if (d_bias.numel()) {
auto db = d_bias.data<T>();
Convolution_bp_bias(doF, db, op, op, nActiveOut);
Convolution_bp_bias(doF, db, op, nActiveOut);
}
}
}
......@@ -4,6 +4,8 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#define TACC double
template <typename T, Int K, Int V>
__global__ void
dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
......@@ -23,7 +25,7 @@ dDeconvolution_KMxKN_forwardA(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K;
w += n * K;
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
Int R0[V];
......@@ -91,7 +93,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, Int *rules,
outFeatures += n * K;
w += n * K;
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
Int R0[V];
......@@ -205,8 +207,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_A(
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -281,8 +283,8 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
w += m * K * output_nPlanes;
dw += m * K * output_nPlanes;
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -400,7 +402,7 @@ dDeconvolution_KMxKN_forward2(T *inFeatures, T *outFeatures, T *w, Int *rules,
w += n * K;
Int KO = min(K, output_nPlanes - K * n);
T O[V];
TACC O[V];
__shared__ T W[K][K];
__shared__ T I[K][K];
__shared__ Int R[K * 2];
......@@ -476,8 +478,8 @@ dDeconvolution_KMxKN_backward_dW2(T *inFeatures, T *dInFeatures,
dw += m * K * output_nPlanes;
Int KI = min(K, input_nPlanes - K * m);
T dI[V];
T dW[V];
TACC dI[V];
TACC dW[V];
__shared__ T I[K][K];
__shared__ T dO[K][K];
__shared__ T W[K][K];
......@@ -601,3 +603,5 @@ void dDeconvolution_backward_dW2(T *inFeatures, T *dInFeatures, T *dOutFeatures,
, w += c; dw += c;)
}
}
#undef TACC
\ No newline at end of file
......@@ -43,10 +43,10 @@ template void cuda_AveragePooling_BackwardPass<float>(
float *d_input_features, float *d_output_features, Int nPlanes,
Int input_stride, Int output_stride, RuleBook _rules, Int filterVolume);
template void Convolution_fp_bias<float>(float *of, float *b, Int op,
template void Convolution_fp_bias<float>(float *oF, float *b, Int nPlanes,
Int nActive);
template void Convolution_bp_bias<float>(float *matrix, float *target,
Int nRows, Int nColumns, Int nCOLUMNS);
template void Convolution_bp_bias<float>(float *d_oF, float *d_b,
Int nPlanes, Int nActive);
template double dConvolution_forward2<float>(
float *inFeatures, float *outFeatures, float *w, RuleBook _rules,
Int input_nPlanes, Int input_stride, Int output_nPlanes, Int output_stride);
......
......@@ -6,7 +6,7 @@
forward_pass_multiplyAdd_count = 0
forward_pass_hidden_states = 0
from .activations import Tanh, Sigmoid, ReLU, ELU, SELU, BatchNormELU
from .activations import Tanh, Sigmoid, ReLU, LeakyReLU, ELU, SELU, BatchNormELU
from .averagePooling import AveragePooling
from .batchNormalization import BatchNormalization, BatchNormReLU, BatchNormLeakyReLU
from .classificationTrainValidate import ClassificationTrainValidate
......
......@@ -22,6 +22,18 @@ class Sigmoid(Module):
return output
class LeakyReLU(Module):
def __init__(self,leak=1/3):
Module.__init__(self)
self.leak=leak
def forward(self, input):
output = SparseConvNetTensor()
output.features = F.leaky_relu(input.features,self.leak)
output.metadata = input.metadata
output.spatial_size = input.spatial_size
return output
class Tanh(Module):
def forward(self, input):
output = SparseConvNetTensor()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment