"...composable_kernel_rocm.git" did not exist on "a49115b95edde18cacc8921c9a3ab9388dd907fa"
Commit aee5486c authored by ltqin's avatar ltqin
Browse files

add clear data function before call kernel

parent e711702d
......@@ -168,6 +168,10 @@ void device_convolution_backward_weight_implicit_gemm_v4r4r3_xdlops_nchw_kcyx_nk
for(index_t i = 0; i < 5; ++i)
{
std::function<void()> clear_weight = [&wei_k_c_y_x_device_buf, &wei_k_c_y_x]() {
wei_k_c_y_x_device_buf.ToDevice(wei_k_c_y_x.mData.data());
};
float ave_time = driver_gemm_xdlops_v2r4<
BlockSize,
TInWei,
......@@ -221,7 +225,8 @@ void device_convolution_backward_weight_implicit_gemm_v4r4r3_xdlops_nchw_kcyx_nk
wei_m0_n0_m1_n1_m2_m3_m4_n2_grid_step_hacks,
out_gemmk0_gemmm_gemmk1_grid_move_slice_window_step_hacks,
in_gemmk0_gemmn_gemmk1_grid_move_slice_window_step_hacks,
nrepeat);
nrepeat,
&clear_weight);
float perf = static_cast<float>(calculate_convolution_flops(
in_n_c_hi_wi_desc, wei_k_c_y_x_desc, out_n_k_ho_wo_desc)) /
......
......@@ -59,7 +59,8 @@ __host__ float driver_gemm_xdlops_v2r4(const FloatAB* p_a_grid,
CGridStepHacks,
AGridMoveSliceWindowStepHacks,
BGridMoveSliceWindowStepHacks,
ck::index_t nrepeat)
ck::index_t nrepeat,
const std::function<void()>* func)
{
using namespace ck;
......@@ -184,12 +185,13 @@ __host__ float driver_gemm_xdlops_v2r4(const FloatAB* p_a_grid,
c_m0_n0_m1_n1_m2_m3_m4_n2_grid_desc_dev_buf.ToDevice(&c_m0_n0_m1_n1_m2_m3_m4_n2_grid_desc);
c_block_cluster_adaptor_dev_buf.ToDevice(&c_block_cluster_adaptor);
float ave_time = launch_and_time_kernel(
float ave_time = launch_time_and_out_call_kernel(
kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
func,
p_a_grid,
p_b_grid,
p_c_grid,
......
......@@ -2,6 +2,7 @@
#define DEVICE_HPP
#include <memory>
#include <functional>
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
......@@ -77,4 +78,50 @@ float launch_and_time_kernel(
return timer.GetElapsedTime() / nrepeat;
}
template <typename... Args, typename F>
float launch_time_and_out_call_kernel(F kernel,
int nrepeat,
dim3 grid_dim,
dim3 block_dim,
std::size_t lds_byte,
const std::function<void()>* func,
Args... args)
{
KernelTimer timer;
printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
__func__,
grid_dim.x,
grid_dim.y,
grid_dim.z,
block_dim.x,
block_dim.y,
block_dim.z);
printf("Warm up\n");
hipStream_t stream_id = nullptr;
// warm up
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
printf("Start running %d times...\n", nrepeat);
timer.Start();
for(int i = 0; i < nrepeat; ++i)
{
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}
timer.End();
// call out function
if(func)
{
(*func)();
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}
return timer.GetElapsedTime() / nrepeat;
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment