"...composable_kernel_rocm.git" did not exist on "fb1ccfa9df534c8c9f351dd959a0ff692d6f9210"
Commit f9560180 authored by Jing Zhang's avatar Jing Zhang
Browse files

debugging maxpool

parent b5bc31bd
......@@ -633,13 +633,15 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
auto bias_global_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
p_bias_global, bias_k0_k1_grid_desc.GetElementSpaceSize());
// if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0)
// printf("a: %d b: %d c: %d d: %d bias: %d\n",
//(int)a_e0_e1_k0_k1_e2_grid_desc.GetElementSpaceSize(),
//(int)b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_grid_desc.GetElementSpaceSize(),
//(int)c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.GetElementSpaceSize(),
//(int)d_k0_k1_n_h0_h1_hx_w0_w1_wx_grid_desc.GetElementSpaceSize(),
//(int)bias_k0_k1_grid_desc.GetElementSpaceSize());
#if 0
if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0)
printf("a: %d b: %d c: %d d: %d bias: %d\n",
(int)a_e0_e1_k0_k1_e2_grid_desc.GetElementSpaceSize(),
(int)b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_grid_desc.GetElementSpaceSize(),
(int)c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.GetElementSpaceSize(),
(int)d_k0_k1_n_h0_h1_hx_w0_w1_wx_grid_desc.GetElementSpaceSize(),
(int)bias_k0_k1_grid_desc.GetElementSpaceSize());
#endif
constexpr auto HasMainE1BlockLoop = CalculateHasMainE1BlockLoop();
constexpr auto HasDoubleTailE1BlockLoop = CalculateHasDoubleTailE1BlockLoop();
......
......@@ -319,7 +319,6 @@ __device__ typename vector_type<T, N>::type amd_buffer_load_impl(int32x4_t src_w
}
else if constexpr(N == 8)
{
printf("half8_t buffer_load\n");
// use fp32 load to mimic fp16 load
float4_t tmp = llvm_amdgcn_raw_buffer_load_fp32x4(
src_wave_buffer_resource, src_thread_addr_offset, src_wave_addr_offset, 0);
......
......@@ -126,10 +126,12 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1
using ABlockTransferThreadClusterLengths_E0_E1_K0_K1_E2 =
Sequence<1, E1PerBlock, 1, KPerBlock, 1>;
constexpr index_t ABlockTransferSrcScalarPerVector_E2 = E2;
constexpr index_t ABlockTransferDstScalarPerVector_E2 = E2;
constexpr index_t ABlockTransferSrcScalarPerVector_E2 = E2;
constexpr index_t ABlockTransferDstScalarPerVector_E2 = E2;
constexpr index_t BThreadTransferSrcScalarPerVector_E2 = E2;
constexpr index_t CThreadTransferDstScalarPerVector_K = K1;
constexpr index_t CThreadTransferDstScalarPerVector_K = K1;
#endif
const auto in_n_c0_hi_wi_c1_desc =
......
......@@ -111,7 +111,7 @@ int main(int argc, char* argv[])
constexpr auto Wi = Number<1920>{};
constexpr auto Y = Number<3>{};
constexpr auto X = Number<3>{};
constexpr auto C0 = Number<3>{};
constexpr auto C0 = Number<1>{};
constexpr auto C1 = Number<4>{};
constexpr auto K0 = Number<2>{};
constexpr auto K1 = Number<8>{};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment