Commit 05971163 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent df228b3c
......@@ -76,6 +76,7 @@ blockwise_4d_tensor_pointwise_operation_unary(DstDesc, Float* __restrict__ p_dst
}
}
// Function: p_dst[reorder[i0], reorder[i1], reorder[i2], reorder[i3]] = p_src[i0,i1,i2,i3]
// TODO: in order to optimize mem access for different mem type,
// need to write specialized version
template <unsigned BlockSize,
......
......@@ -11,3 +11,5 @@ struct is_same<T, T>
{
static const bool value = true;
};
__device__ unsigned get_thread_local_id() { return threadIdx.x; }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment