Commit ffa70551 authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Fix formatting

parent 29e1829f
......@@ -72,8 +72,13 @@ using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
8>; // CBlockTransferScalarPerVector_NWaveNPerXdl
// clang-format on
using ReferenceConvBwdWeightInstance = ck::tensor_operation::host::
ReferenceConvBwdWeight<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
using ReferenceConvBwdWeightInstance =
ck::tensor_operation::host::ReferenceConvBwdWeight<InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>;
int main(int argc, char* argv[])
{
......
......@@ -22,7 +22,10 @@ struct BaseInvoker
BaseInvoker(const BaseInvoker&) = default;
BaseInvoker& operator=(const BaseInvoker&) = default;
virtual float Run(const BaseArgument*, int = 1, hipStream_t = nullptr, bool = false){return -1;}
virtual float Run(const BaseArgument*, int = 1, hipStream_t = nullptr, bool = false)
{
return -1;
}
virtual ~BaseInvoker() {}
};
......@@ -33,8 +36,8 @@ struct BaseOperator
BaseOperator(const BaseOperator&) = default;
BaseOperator& operator=(const BaseOperator&) = default;
virtual bool IsSupportedArgument(const BaseArgument*){return false;}
virtual std::string GetTypeString() const {return "";}
virtual bool IsSupportedArgument(const BaseArgument*) { return false; }
virtual std::string GetTypeString() const { return ""; }
virtual ~BaseOperator() {}
};
......
......@@ -693,7 +693,10 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -752,29 +755,31 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
remove_reference_t<Block2CTileMap>,
true>;
elapsed_time = launch_and_time_kernel(kernel,nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.BatchCount_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.compute_base_ptr_of_batch_,
arg.block_2_ctile_map_);
elapsed_time =
launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.BatchCount_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.compute_base_ptr_of_batch_,
arg.block_2_ctile_map_);
}
else
{
......@@ -796,37 +801,41 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
remove_reference_t<Block2CTileMap>,
false>;
elapsed_time = launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.BatchCount_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.compute_base_ptr_of_batch_,
arg.block_2_ctile_map_);
elapsed_time =
launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.BatchCount_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.compute_base_ptr_of_batch_,
arg.block_2_ctile_map_);
}
return elapsed_time;
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -402,7 +402,10 @@ struct DeviceBatchedGemmXdl
{
using Argument = DeviceBatchedGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -513,7 +516,10 @@ struct DeviceBatchedGemmXdl
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -415,7 +415,10 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
}
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
ShowInfo(arg);
if(!GridwiseGemm::CheckValidity(arg.a_grid_desc_kbatch_k0_m_k1_,
......@@ -446,7 +449,7 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
dim3(BlockSize),
0,
stream_id,
measure_time,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -471,7 +474,7 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -563,7 +566,10 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -531,7 +531,10 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
float ave_time = 0;
for(size_t i = 0; i < arg.a_grid_desc_k0_m_k1_container_.size(); i++)
......@@ -660,7 +663,10 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -642,7 +642,10 @@ struct
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -733,8 +736,8 @@ struct
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -779,8 +782,8 @@ struct
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -800,7 +803,10 @@ struct
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -607,7 +607,10 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -692,8 +695,8 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -733,8 +736,8 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -752,7 +755,10 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -568,7 +568,10 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -669,8 +672,8 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -705,8 +708,8 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -722,7 +725,10 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -450,7 +450,10 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -504,8 +507,8 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -537,8 +540,8 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -554,7 +557,10 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -92,7 +92,10 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
const auto naive_conv3d_fwd =
ref::naive_conv_fwd_ndhwc_kzyxc_ndhwk<InDataType,
......@@ -108,8 +111,8 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
dim3(256),
dim3(256),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_in_,
arg.p_wei_,
arg.p_out_,
......@@ -139,7 +142,10 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -435,7 +435,10 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "num_batches_of_GEMM = " << arg.num_subbatches_ << std::endl;
......@@ -489,8 +492,8 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -526,8 +529,8 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -548,7 +551,10 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -1241,7 +1241,10 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
float ave_time = 0;
for(size_t i = 0; i < arg.a_grid_desc_k0_m_k1_container_.size(); i++)
......@@ -1374,7 +1377,10 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -747,7 +747,10 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -801,8 +804,8 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -834,8 +837,8 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -851,7 +854,10 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -500,7 +500,10 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
#if 0
{
......@@ -533,7 +536,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
const auto K0 = arg.a_grid_desc_ak0_m_ak1_.GetLength(I0);
const bool has_main_k0_block_loop = GridwiseGemm::CalculateHasMainK0BlockLoop(K0);
float elapsed_time = 0.0f;
float elapsed_time = 0.0f;
if(has_main_k0_block_loop)
{
......@@ -554,28 +557,29 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
typename GridwiseGemm::DefaultBlock2CTileMap,
true>;
elapsed_time = launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.block_2_ctile_map_);
elapsed_time =
launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.block_2_ctile_map_);
}
else
{
......@@ -596,35 +600,39 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
typename GridwiseGemm::DefaultBlock2CTileMap,
false>;
elapsed_time = launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.block_2_ctile_map_);
elapsed_time =
launch_and_time_kernel(kernel,
nrepeat,
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
arg.p_d0_grid_,
arg.p_d1_grid_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.d0_reduce_op_,
arg.d1_reduce_op_,
arg.a_grid_desc_ak0_m_ak1_,
arg.b_grid_desc_bk0_n_bk1_,
arg.c_grid_desc_mblock_mperblock_nblock_nperblock_,
arg.d_grid_desc_mblock_mperblock_,
arg.block_2_ctile_map_);
}
return elapsed_time;
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -290,7 +290,10 @@ struct DeviceGemmXdl
{
using Argument = DeviceGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -343,8 +346,8 @@ struct DeviceGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -376,8 +379,8 @@ struct DeviceGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -394,7 +397,10 @@ struct DeviceGemmXdl
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -249,7 +249,10 @@ struct DeviceGemmXdl_C_Shuffle
{
using Argument = DeviceGemmXdl_C_Shuffle::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -305,8 +308,8 @@ struct DeviceGemmXdl_C_Shuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -341,8 +344,8 @@ struct DeviceGemmXdl_C_Shuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -359,7 +362,10 @@ struct DeviceGemmXdl_C_Shuffle
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -266,7 +266,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
{
using Argument = DeviceGemmXdl_C_Shuffle_Bias_2d::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -328,8 +331,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -369,8 +372,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -389,7 +392,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -273,7 +273,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -335,8 +338,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -376,8 +379,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -396,7 +399,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
......@@ -312,7 +312,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
float Run(const Argument& arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -380,8 +383,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -426,8 +429,8 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
measure_time,
stream_id,
measure_time,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -448,7 +451,10 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
float Run(const BaseArgument* p_arg,
int nrepeat = 1,
hipStream_t stream_id = nullptr,
bool measure_time = false) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment