Commit 03b78901 authored by TiagoMAntunes's avatar TiagoMAntunes
Browse files

Fixed incorrect shared memory type in column reduce kernel

parent 3d1987d1
...@@ -45,7 +45,12 @@ template <typename scalar_t> ...@@ -45,7 +45,12 @@ template <typename scalar_t>
__global__ __global__
void column_reduce(const scalar_t * matrix, scalar_t * result, void column_reduce(const scalar_t * matrix, scalar_t * result,
int m /* lines */, int n /* columns*/) { int m /* lines */, int n /* columns*/) {
extern __shared__ float sdata[];
// https://stackoverflow.com/questions/27570552/templated-cuda-kernel-with-dynamic-shared-memory
extern __shared__ __align__(sizeof(scalar_t)) unsigned char my_smem[];
scalar_t *sdata = reinterpret_cast<scalar_t *>(my_smem);
unsigned int tid = threadIdx.x + threadIdx.y * blockDim.x; // line unsigned int tid = threadIdx.x + threadIdx.y * blockDim.x; // line
unsigned int i = threadIdx.x * n + threadIdx.y + blockIdx.y * blockDim.y; // get to idx th line unsigned int i = threadIdx.x * n + threadIdx.y + blockIdx.y * blockDim.y; // get to idx th line
unsigned int offset = 0; unsigned int offset = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment