"...resnet50_migraphx.git" did not exist on "0ffa7cb7da5290d038921dc70efcfac027de0a6d"
Commit a76eac28 authored by rocking's avatar rocking
Browse files

[What] Remove tuple type in the base class

[Why] External api depend on base class. if base class has relationship with type, we will need many class for different type
parent f9d22b02
......@@ -163,8 +163,7 @@ template <typename ALayout,
index_t CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock,
LoopScheduler LoopSched = make_default_loop_scheduler()>
struct DeviceBatchedGemmReduce_Xdl_CShuffle
: public DeviceGemmReduce<DPtrsGlobal,
AElementwiseOperation,
: public DeviceGemmReduce<AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
DxsInElementwiseOperation,
......@@ -861,7 +860,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle
MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
DPtrsGlobal p_dxs,
void* p_dxs,
index_t MRaw,
index_t NRaw,
index_t KRaw,
......@@ -875,10 +874,11 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle
DxsReduceAccElementwiseOperation dxs_out_element_op,
index_t BatchCount) override
{
DPtrsGlobal dxs_tuple = *(static_cast<DPtrsGlobal*>(p_dxs));
return std::make_unique<Argument>(static_cast<const ADataType*>(p_a),
static_cast<const BDataType*>(p_b),
static_cast<CDataType*>(p_c),
p_dxs,
dxs_tuple,
MRaw,
NRaw,
KRaw,
......
......@@ -71,8 +71,7 @@ template <typename ALayout,
index_t CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock,
LoopScheduler LoopSched = make_default_loop_scheduler()>
struct DeviceGemmBiasAddReduce_Xdl_CShuffle
: public DeviceGemmBiasAddReduce<DPtrsGlobal,
AElementwiseOperation,
: public DeviceGemmBiasAddReduce<AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
C1ElementwiseOperation,
......@@ -744,7 +743,7 @@ struct DeviceGemmBiasAddReduce_Xdl_CShuffle
void* p_c,
const void* p_c0,
const void* p_c1,
DPtrsGlobal p_dxs,
void* p_dxs,
index_t MRaw,
index_t NRaw,
index_t KRaw,
......@@ -760,12 +759,13 @@ struct DeviceGemmBiasAddReduce_Xdl_CShuffle
DxsReduceAccElementwiseOperation dxs_out_element_op,
index_t /* KBatch */ = 1) override
{
DPtrsGlobal dxs_tuple = *(static_cast<DPtrsGlobal*>(p_dxs));
return std::make_unique<Argument>(static_cast<const ADataType*>(p_a),
static_cast<const BDataType*>(p_b),
static_cast<CDataType*>(p_c),
static_cast<const C0DataType*>(p_c0),
static_cast<const C1DataType*>(p_c1),
p_dxs,
dxs_tuple,
MRaw,
NRaw,
KRaw,
......
......@@ -6,8 +6,7 @@ namespace ck {
namespace tensor_operation {
namespace device {
template <typename DPtrsGlobal,
typename AElementwiseOperation,
template <typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation,
typename DxsInElementwiseOperation,
......@@ -18,7 +17,7 @@ struct DeviceGemmReduce : public BaseOperator
MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
DPtrsGlobal p_dxs,
void* p_dxs,
ck::index_t M,
ck::index_t N,
ck::index_t K,
......@@ -35,21 +34,18 @@ struct DeviceGemmReduce : public BaseOperator
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
template <typename DPtrsGlobal,
typename AElementwiseOperation,
template <typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation,
typename DxsInElementwiseOperation,
typename DxsReduceAccElementwiseOperation>
using DeviceGemmReducePtr = std::unique_ptr<DeviceGemmReduce<DPtrsGlobal,
AElementwiseOperation,
using DeviceGemmReducePtr = std::unique_ptr<DeviceGemmReduce<AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
DxsInElementwiseOperation,
DxsReduceAccElementwiseOperation>>;
template <typename DPtrsGlobal,
typename AElementwiseOperation,
template <typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation,
typename C1ElementwiseOperation,
......@@ -63,7 +59,7 @@ struct DeviceGemmBiasAddReduce : public BaseOperator
void* p_c,
const void* p_c0,
const void* p_c1,
DPtrsGlobal p_dxs,
void* p_dxs,
ck::index_t M,
ck::index_t N,
ck::index_t K,
......@@ -82,16 +78,14 @@ struct DeviceGemmBiasAddReduce : public BaseOperator
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
template <typename DPtrsGlobal,
typename AElementwiseOperation,
template <typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation,
typename C1ElementwiseOperation,
typename DxsInElementwiseOperation,
typename DxsReduceAccElementwiseOperation>
using DeviceGemmBiasAddReducePtr =
std::unique_ptr<DeviceGemmBiasAddReduce<DPtrsGlobal,
AElementwiseOperation,
std::unique_ptr<DeviceGemmBiasAddReduce<AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
C1ElementwiseOperation,
......
......@@ -68,8 +68,7 @@ template <typename ALayout,
index_t CReduceThreadLds2VGprCopySrcDstScalarPerVector_NPerBlock,
index_t CReduceThreadVgpr2GlobalCopySrcDstScalarPerVector_MPerBlock,
LoopScheduler LoopSched = make_default_loop_scheduler()>
struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<DPtrsGlobal,
AElementwiseOperation,
struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
DxsInElementwiseOperation,
......@@ -695,7 +694,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<DPtrsGlobal,
MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
DPtrsGlobal p_dxs,
void* p_dxs,
index_t MRaw,
index_t NRaw,
index_t KRaw,
......@@ -709,10 +708,11 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<DPtrsGlobal,
DxsReduceAccElementwiseOperation dxs_out_element_op,
index_t /* KBatch */ = 1) override
{
DPtrsGlobal dxs_tuple = *(static_cast<DPtrsGlobal*>(p_dxs));
return std::make_unique<Argument>(static_cast<const ADataType*>(p_a),
static_cast<const BDataType*>(p_b),
static_cast<CDataType*>(p_c),
p_dxs,
dxs_tuple,
MRaw,
NRaw,
KRaw,
......
......@@ -62,12 +62,9 @@ using device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_in
>;
void add_device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances,
......
......@@ -62,12 +62,9 @@ using device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_in
>;
void add_device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances,
......
......@@ -62,12 +62,9 @@ using device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_in
>;
void add_device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances,
......
......@@ -59,12 +59,9 @@ using device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_in
>;
void add_device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances,
......
......@@ -63,8 +63,7 @@ using device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_km_kn
>;
void add_device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_km_kn_mn_instances(
std::vector<DeviceGemmBiasAddReducePtr<DPtrsGlobal,
PassThrough,
std::vector<DeviceGemmBiasAddReducePtr<PassThrough,
PassThrough,
PassThrough,
PassThrough,
......
......@@ -63,8 +63,7 @@ using device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_km_nk
>;
void add_device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_km_nk_mn_instances(
std::vector<DeviceGemmBiasAddReducePtr<DPtrsGlobal,
PassThrough,
std::vector<DeviceGemmBiasAddReducePtr<PassThrough,
PassThrough,
PassThrough,
PassThrough,
......
......@@ -63,8 +63,7 @@ using device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_mk_kn
>;
void add_device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_mk_kn_mn_instances(
std::vector<DeviceGemmBiasAddReducePtr<DPtrsGlobal,
PassThrough,
std::vector<DeviceGemmBiasAddReducePtr<PassThrough,
PassThrough,
PassThrough,
PassThrough,
......
......@@ -60,8 +60,7 @@ using device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_mk_nk
>;
void add_device_gemm_bias_add_reduce_xdl_cshuffle_f16_f16_f16_f16_f16_f32_f32_mk_nk_mn_instances(
std::vector<DeviceGemmBiasAddReducePtr<DPtrsGlobal,
PassThrough,
std::vector<DeviceGemmBiasAddReducePtr<PassThrough,
PassThrough,
PassThrough,
PassThrough,
......
......@@ -62,12 +62,9 @@ using device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instances = s
>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances, device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instances{});
......
......@@ -62,12 +62,9 @@ using device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instances = s
>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances, device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instances{});
......
......@@ -62,12 +62,9 @@ using device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances = s
>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances, device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances{});
......
......@@ -59,12 +59,9 @@ using device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instances = s
>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instances(
std::vector<DeviceGemmReducePtr<DPtrsGlobal,
PassThrough,
PassThrough,
PassThrough,
DInElementOps,
DOutElementOps>>& instances)
std::vector<
DeviceGemmReducePtr<PassThrough, PassThrough, PassThrough, DInElementOps, DOutElementOps>>&
instances)
{
add_device_operation_instances(
instances, device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instances{});
......
......@@ -26,7 +26,6 @@ using DInElementOps = ck::Tuple<Identity, Square>;
using DOutElementOps = ck::Tuple<Identity, Identity>;
using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePtr<
DPtrsGlobal,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
......@@ -260,7 +259,7 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
gemm_ptr->MakeArgumentPointer(static_cast<ADataType*>(a_device_buf.GetDeviceBuffer()),
static_cast<BDataType*>(b_device_buf.GetDeviceBuffer()),
static_cast<CDataType*>(c_device_buf.GetDeviceBuffer()),
dxs_global,
&dxs_global,
M,
N,
K,
......
......@@ -27,7 +27,6 @@ using DInElementOps = ck::Tuple<Identity, Square>;
using DOutElementOps = ck::Tuple<Div, Div>;
using DeviceGemmBiasAddReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmBiasAddReducePtr<
DPtrsGlobal,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
......@@ -296,7 +295,7 @@ void profile_gemm_bias_add_reduce_impl(int do_verification,
static_cast<CDataType*>(c_device_buf.GetDeviceBuffer()),
static_cast<C0DataType*>(bias_device_buf.GetDeviceBuffer()),
static_cast<C1DataType*>(c1_device_buf.GetDeviceBuffer()),
dxs_global,
&dxs_global,
M,
N,
K,
......
......@@ -27,7 +27,6 @@ using DInElementOps = ck::Tuple<Identity, Square>;
using DOutElementOps = ck::Tuple<Div, Div>;
using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePtr<
DPtrsGlobal,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
......@@ -261,7 +260,7 @@ bool profile_gemm_reduce_impl(int do_verification,
gemm_ptr->MakeArgumentPointer(static_cast<ADataType*>(a_device_buf.GetDeviceBuffer()),
static_cast<BDataType*>(b_device_buf.GetDeviceBuffer()),
static_cast<CDataType*>(c_device_buf.GetDeviceBuffer()),
dxs_global,
&dxs_global,
M,
N,
K,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment