Commit 44757d6b authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Make hip stream configurable

parent 7d38e6a0
......@@ -152,7 +152,7 @@ void profile_conv_fwd_impl(int do_verification,
{
std::string conv_name = conv_ptr.GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat, nullptr);
std::size_t flop = std::size_t(2) * N * K * Ho * Wo * C * Y * X;
......
......@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker(const BaseInvoker&) = default;
BaseInvoker& operator=(const BaseInvoker&) = default;
virtual float Run(const BaseArgument*, int = 1){return -1;}
virtual float Run(const BaseArgument*, int = 1, hipStream_t = nullptr){return -1;}
virtual ~BaseInvoker() {}
};
......
......@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
{
using Argument = DeviceBatchedGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_g_k0_m_k1_{"
......@@ -335,6 +335,7 @@ struct DeviceBatchedGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -367,6 +368,7 @@ struct DeviceBatchedGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -383,9 +385,9 @@ struct DeviceBatchedGemmXdl
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
}
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
ShowInfo(arg);
if(!GridwiseGemm::CheckValidity(arg.a_grid_desc_kbatch_k0_m_k1_,
......@@ -444,6 +444,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -468,6 +469,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -559,9 +561,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
nrepeat = 1;
float ave_time = 0;
......@@ -599,6 +599,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -632,6 +633,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -647,9 +649,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -642,7 +642,7 @@ struct
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
#if 0
{
......@@ -733,6 +733,7 @@ struct
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -777,6 +778,7 @@ struct
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -796,9 +798,9 @@ struct
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
#if 0
{
......@@ -692,6 +692,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -731,6 +732,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -748,9 +750,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
#if 0
{
......@@ -669,6 +669,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -703,6 +704,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -718,9 +720,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
#if 0
{
......@@ -505,6 +505,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -536,6 +537,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -551,9 +553,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto naive_conv3d_fwd =
ref::naive_conv_fwd_ndhwc_kzyxc_ndhwk<InDataType,
......@@ -114,6 +114,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
dim3(256),
dim3(256),
0,
stream_id,
arg.p_in_,
arg.p_wei_,
arg.p_out_,
......@@ -143,9 +144,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "num_batches_of_GEMM = " << arg.num_subbatches_ << std::endl;
......@@ -484,6 +484,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -519,6 +520,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -539,9 +541,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -644,6 +644,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -675,6 +676,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -690,9 +692,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time;
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -290,7 +290,7 @@ struct DeviceGemmXdl
{
using Argument = DeviceGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -343,6 +343,7 @@ struct DeviceGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -374,6 +375,7 @@ struct DeviceGemmXdl
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -390,9 +392,9 @@ struct DeviceGemmXdl
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
{
using Argument = DeviceGemmXdl_C_Shuffle::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -305,6 +305,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -339,6 +340,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -355,9 +357,9 @@ struct DeviceGemmXdl_C_Shuffle
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
{
using Argument = DeviceGemmXdl_C_Shuffle_Bias_2d::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -330,6 +330,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -369,6 +370,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -387,9 +389,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -273,7 +273,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -335,6 +335,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -374,6 +375,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -392,9 +394,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -312,7 +312,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
{
using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
{
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
......@@ -380,6 +380,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -424,6 +425,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -444,9 +446,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -385,7 +385,7 @@ struct DeviceGemmXdlSplitK
std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", "
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
}
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0);
......@@ -416,6 +416,7 @@ struct DeviceGemmXdlSplitK
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -440,6 +441,7 @@ struct DeviceGemmXdlSplitK
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -531,9 +533,9 @@ struct DeviceGemmXdlSplitK
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -391,7 +391,7 @@ struct DeviceGemmXdlSplitKCShuffle
std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", "
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
}
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0);
......@@ -423,6 +423,7 @@ struct DeviceGemmXdlSplitKCShuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -447,6 +448,7 @@ struct DeviceGemmXdlSplitKCShuffle
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.p_a_grid_,
arg.p_b_grid_,
arg.p_c_grid_,
......@@ -542,9 +544,9 @@ struct DeviceGemmXdlSplitKCShuffle
}
// polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
......@@ -204,7 +204,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
using gridwise_reduce = GridwiseReduction_mk_to_m_threadwise<InDataType,
OutDataType,
......@@ -246,6 +246,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
dim3(grid_size),
dim3(BlockSize),
0,
stream_id,
arg.a_grid_desc_m_k_,
arg.b_grid_desc_m_,
arg.in_element_op_,
......@@ -257,9 +258,9 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
arg.p_out_indices_dev_);
}
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment