Commit 9de63930 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent 23f633cd
...@@ -237,11 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer ...@@ -237,11 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer
GemmDataPerReadB); GemmDataPerReadB);
constexpr index_t in_block_space = constexpr index_t in_block_space =
math::integer_divide_ceil(in_e_n1_b_n2_block_desc.GetElementSpace(), max_align) * math::integer_least_multiple(in_e_n1_b_n2_block_desc.GetElementSpace(), max_align);
max_align;
constexpr index_t wei_block_space = constexpr index_t wei_block_space =
math::integer_divide_ceil(wei_e_k_block_desc.GetElementSpace(), max_align) * max_align; math::integer_least_multiple(wei_e_k_block_desc.GetElementSpace(), max_align);
__shared__ Float p_in_block_double[2 * in_block_space]; __shared__ Float p_in_block_double[2 * in_block_space];
__shared__ Float p_wei_block_double[2 * wei_block_space]; __shared__ Float p_wei_block_double[2 * wei_block_space];
......
...@@ -54,6 +54,14 @@ __host__ __device__ constexpr T integer_divide_ceil(T a, T b) ...@@ -54,6 +54,14 @@ __host__ __device__ constexpr T integer_divide_ceil(T a, T b)
return (a + b - 1) / b; return (a + b - 1) / b;
} }
template <class T>
__host__ __device__ constexpr T integer_least_multiple(T a, T b)
{
static_assert(is_same<T, index_t>{} || is_same<T, int>{}, "wrong type");
return b * integer_divide_ceil(a, b);
}
template <class T> template <class T>
__host__ __device__ constexpr T max(T x) __host__ __device__ constexpr T max(T x)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment