Commit c03045ce authored by Chao Liu's avatar Chao Liu
Browse files

rename

parent b2589957
......@@ -2,7 +2,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v4r4r2_nhwc_kyxc_nhwk.hpp"
#include "driver_dynamic_gemm_xdlops_v2r2.hpp"
#include "driver_gemm_xdlops_v2r2.hpp"
template <typename TInWei,
typename TAcc,
......@@ -14,7 +14,7 @@ template <typename TInWei,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk(
void device_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk(
const InLengths& in_n_hi_wi_c_lengths,
const WeiLengths& wei_k_y_x_c_lengths,
const OutLengths& out_n_ho_wo_k_lengths,
......@@ -44,12 +44,9 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nh
wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data());
out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data());
const auto in_n_hi_wi_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_ho_wo_k_lengths);
const auto in_n_hi_wi_c_desc = make_naive_tensor_descriptor_packed(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc = make_naive_tensor_descriptor_packed(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc = make_naive_tensor_descriptor_packed(out_n_ho_wo_k_lengths);
#if 1
// [M, N, K0, K1] = [256, 128, 4, 4] for fp32
......@@ -155,7 +152,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nh
for(index_t i = 0; i < 5; ++i)
{
float ave_time = driver_dynamic_gemm_xdlops_v2r2<
float ave_time = driver_gemm_xdlops_v2r2<
BlockSize,
TInWei,
TAcc,
......
......@@ -2,7 +2,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v4r4r2_nhwc_kyxc_nhwk.hpp"
#include "driver_dynamic_gemm_xdlops_v2r3.hpp"
#include "driver_gemm_xdlops_v2r3.hpp"
template <typename TInWei,
typename TAcc,
......@@ -14,7 +14,7 @@ template <typename TInWei,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nhwk(
void device_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nhwk(
const InLengths& in_n_hi_wi_c_lengths,
const WeiLengths& wei_k_y_x_c_lengths,
const OutLengths& out_n_ho_wo_k_lengths,
......@@ -49,12 +49,9 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nh
wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data());
out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data());
const auto in_n_hi_wi_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_ho_wo_k_lengths);
const auto in_n_hi_wi_c_desc = make_naive_tensor_descriptor_packed(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc = make_naive_tensor_descriptor_packed(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc = make_naive_tensor_descriptor_packed(out_n_ho_wo_k_lengths);
#if 1
// [M, N, K0, K1] = [256, 128, 4, 4] for fp32
......@@ -224,7 +221,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r3_xdlops_nhwc_kyxc_nh
for(index_t i = 0; i < 5; ++i)
{
float ave_time = driver_dynamic_gemm_xdlops_v2r3<
float ave_time = driver_gemm_xdlops_v2r3<
BlockSize,
TInWei,
TAcc,
......
......@@ -2,7 +2,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v4r4r4_nhwc_kyxc_nhwk.hpp"
#include "driver_dynamic_gemm_xdlops_v2r3.hpp"
#include "driver_gemm_xdlops_v2r3.hpp"
template <typename TInWei,
typename TAcc,
......@@ -14,7 +14,7 @@ template <typename TInWei,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk(
void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk(
const InLengths& in_n_hi_wi_c_lengths,
const WeiLengths& wei_k_y_x_c_lengths,
const OutLengths& out_n_ho_wo_k_lengths,
......@@ -44,12 +44,9 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nh
wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data());
out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data());
const auto in_n_hi_wi_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_ho_wo_k_lengths);
const auto in_n_hi_wi_c_desc = make_naive_tensor_descriptor_packed(in_n_hi_wi_c_lengths);
const auto wei_k_y_x_c_desc = make_naive_tensor_descriptor_packed(wei_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc = make_naive_tensor_descriptor_packed(out_n_ho_wo_k_lengths);
#if 0
// [M, N, K0, K1] = [256, 128, 4, 4] for fp32
......@@ -278,7 +275,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nh
for(index_t i = 0; i < 5; ++i)
{
float ave_time = driver_dynamic_gemm_xdlops_v2r3<
float ave_time = driver_gemm_xdlops_v2r3<
BlockSize,
TInWei,
TAcc,
......
#include <unistd.h>
#include "device.hpp"
#include "host_tensor.hpp"
#include "driver_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp"
#include "driver_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp"
#include "driver_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp"
#include "driver_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp"
template <typename TInWei,
ck::index_t InWeiVectorSize,
......@@ -15,7 +15,7 @@ template <typename TInWei,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
const InLengths& in_n_c_hi_wi_lengths,
const WeiLengths& wei_k_c_y_x_lengths,
const OutLengths& out_n_k_ho_wo_lengths,
......@@ -85,12 +85,10 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
in_n_c0_hi_wi_c1_device_buf.ToDevice(in_n_c0_hi_wi_c1.mData.data());
wei_k_c0_y_x_c1_device_buf.ToDevice(wei_k_c0_y_x_c1.mData.data());
const auto in_n_c0_hi_wi_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, C0, Hi, Wi));
const auto wei_k_c0_y_x_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C0, Y, X));
const auto in_n_c0_hi_wi_desc = make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi));
const auto wei_k_c0_y_x_desc = make_naive_tensor_descriptor_packed(make_tuple(K, C0, Y, X));
const auto out_n_k0_ho_wo_k1_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K0, Ho, Wo, K1));
make_naive_tensor_descriptor_packed(make_tuple(N, K0, Ho, Wo, K1));
#if 1
// cdata = 64, BlockSize = 64, 16x8x32x4
......
......@@ -3,7 +3,7 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "transform_forward_convolution_into_gemm_v6r1_nchw_kcyx_nkhw.hpp"
#include "driver_dynamic_contraction_dlops_v1r2.hpp"
#include "driver_contraction_dlops_v1r2.hpp"
template <typename TInWei,
typename TAcc,
......@@ -15,7 +15,7 @@ template <typename TInWei,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
void device_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
const InLengths& in_n_c_hi_wi_lengths,
const WeiLengths& wei_k_c_y_x_lengths,
const OutLengths& out_n_k_ho_wo_lengths,
......@@ -44,12 +44,9 @@ void device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
wei_k_c_y_x_device_buf.ToDevice(wei_k_c_y_x.mData.data());
out_n_k_ho_wo_device_buf.ToDevice(out_n_k_ho_wo.mData.data());
const auto in_desc_n_c_hi_wi =
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_c_hi_wi_lengths);
const auto wei_desc_k_c_y_x =
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_c_y_x_lengths);
const auto out_desc_n_k_ho_wo =
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_k_ho_wo_lengths);
const auto in_desc_n_c_hi_wi = make_naive_tensor_descriptor_packed(in_n_c_hi_wi_lengths);
const auto wei_desc_k_c_y_x = make_naive_tensor_descriptor_packed(wei_k_c_y_x_lengths);
const auto out_desc_n_k_ho_wo = make_naive_tensor_descriptor_packed(out_n_k_ho_wo_lengths);
#if 0
// [8, 1, 128, 1] * [8, 4, 32, 1] = [1, 128, 4, 32] for fp32
......@@ -180,7 +177,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
for(index_t i = 0; i < 5; ++i)
{
float ave_time = driver_dynamic_contraction_dlops_v1r2<
float ave_time = driver_contraction_dlops_v1r2<
BlockSize,
TInWei,
TAcc,
......
#ifndef DRIVER_DYNAMIC_CONTRACTION_DLOPS_V1R2_HPP
#define DRIVER_DYNAMIC_CONTRACTION_DLOPS_V1R2_HPP
#ifndef DRIVER_CONTRACTION_DLOPS_V1R2_HPP
#define DRIVER_CONTRACTION_DLOPS_V1R2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_contraction_dlops_v1r2.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_contraction_dlops_v1r2.hpp"
template <ck::index_t BlockSize,
typename FloatAB,
......@@ -45,7 +45,7 @@ template <ck::index_t BlockSize,
typename AGridMoveSliceWindowIteratorHacks,
typename BGridMoveSliceWindowIteratorHacks>
__host__ float
driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
driver_contraction_dlops_v1r2(const FloatAB* p_a_grid,
const FloatAB* p_b_grid,
FloatC* p_c_grid,
const AGridDesc_GK0_GM0_GM1_GK1& a_grid_desc_gk0_gm0_gm1_gk1,
......@@ -70,7 +70,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
// GEMM
using GridwiseContraction =
GridwiseDynamicContractionDlops_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_GM0_GM1_GN0_GN1<
GridwiseContractionDlops_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_GM0_GM1_GN0_GN1<
BlockSize,
FloatAB,
FloatAcc,
......@@ -116,7 +116,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
a_grid_desc_gk0_gm0_gm1_gk1, b_grid_desc_gk0_gn0_gn1_gk1, c_grid_desc_gm0_gm1_gn0_gn1))
{
throw std::runtime_error("wrong! "
"GridwiseDynamicContraction_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_"
"GridwiseContraction_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_"
"GM0_GM1_GN0_GN1 has invalid setting");
}
......@@ -178,7 +178,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
if(has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel = kernel_dynamic_contraction_dlops_v1r2<
const auto kernel = kernel_contraction_dlops_v1r2<
GridwiseContraction,
FloatAB,
FloatC,
......@@ -204,7 +204,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
}
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
{
const auto kernel = kernel_dynamic_contraction_dlops_v1r2<
const auto kernel = kernel_contraction_dlops_v1r2<
GridwiseContraction,
FloatAB,
FloatC,
......@@ -230,7 +230,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
}
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel = kernel_dynamic_contraction_dlops_v1r2<
const auto kernel = kernel_contraction_dlops_v1r2<
GridwiseContraction,
FloatAB,
FloatC,
......@@ -256,7 +256,7 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
}
else
{
const auto kernel = kernel_dynamic_contraction_dlops_v1r2<
const auto kernel = kernel_contraction_dlops_v1r2<
GridwiseContraction,
FloatAB,
FloatC,
......
#ifndef DRIVER_DYNAMIC_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_NCHW_KCYX_NKHW_HPP
#define DRIVER_DYNAMIC_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_NCHW_KCYX_NKHW_HPP
#ifndef DRIVER_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_NCHW_KCYX_NKHW_HPP
#define DRIVER_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_NCHW_KCYX_NKHW_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_gemm_dlops_v2.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v2.hpp"
#include "gridwise_operation_wrapper.hpp"
template <ck::index_t BlockSize,
......@@ -34,9 +34,9 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
__host__ void Run(const ck::DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const ck::DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const ck::DynamicTensorDescriptor<Out...>& out_n_k0_ho_wo_k1_global_desc,
__host__ void Run(const ck::TensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const ck::TensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const ck::TensorDescriptor<Out...>& out_n_k0_ho_wo_k1_global_desc,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
......@@ -82,14 +82,14 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
const auto InRightPadW = in_right_pads[I1];
// weight tensor
const auto wei_e_k_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C * Y * X)),
const auto wei_e_k_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(K, C * Y * X)),
make_tuple(make_pass_through_transform(K), make_pass_through_transform(C * Y * X)),
make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<1>{}, Sequence<0>{}));
// input tensor
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
const auto in_n_c_hip_wip_global_desc = transform_tensor_descriptor(
in_n_c_hi_wi_global_desc,
make_tuple(make_pass_through_transform(N),
make_pass_through_transform(C),
......@@ -98,7 +98,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
const auto in_n_c_y_ho_x_wo_global_desc = transform_tensor_descriptor(
in_n_c_hip_wip_global_desc,
make_tuple(
make_pass_through_transform(N),
......@@ -108,7 +108,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_e_n_ho_wo_global_desc = transform_dynamic_tensor_descriptor(
const auto in_e_n_ho_wo_global_desc = transform_tensor_descriptor(
in_n_c_y_ho_x_wo_global_desc,
make_tuple(make_merge_transform(make_tuple(C, Y, X)),
make_pass_through_transform(N),
......@@ -118,8 +118,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
// output tensor
const auto out_k_n_ho_wo_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K0, Ho, Wo, K1)),
const auto out_k_n_ho_wo_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(N, K0, Ho, Wo, K1)),
make_tuple(make_merge_transform(make_tuple(K0, K1)),
make_pass_through_transform(N),
make_pass_through_transform(Ho),
......@@ -169,7 +169,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
#if 1
// GEMM
using gridwise_gemm = GridwiseDynamicGemmDlops_km_kn_mn_v3<
using gridwise_gemm = GridwiseGemmDlops_km_kn_mn_v3<
BlockSize,
FloatAB,
FloatAcc,
......
#ifndef DRIVER_DYNAMIC_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_DLOPS_NCHW_KCYX_NKHW_OUTPAD_HPP
#define DRIVER_DYNAMIC_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_DLOPS_NCHW_KCYX_NKHW_OUTPAD_HPP
#ifndef DRIVER_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_DLOPS_NCHW_KCYX_NKHW_OUTPAD_HPP
#define DRIVER_CONVOLUTION_FORWARD_IMPLICIT_GEMM_V5R1_DLOPS_NCHW_KCYX_NKHW_OUTPAD_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_gemm_dlops_v2.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v2.hpp"
#include "gridwise_operation_wrapper.hpp"
template <ck::index_t BlockSize,
......@@ -34,9 +34,9 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
__host__ void Run(const ck::DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const ck::DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const ck::DynamicTensorDescriptor<Out...>& out_n_k0_ho_wo_k1_global_desc,
__host__ void Run(const ck::TensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const ck::TensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const ck::TensorDescriptor<Out...>& out_n_k0_ho_wo_k1_global_desc,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
......@@ -93,14 +93,14 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
<< std::endl;
// weight tensor
const auto wei_e_k_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C * Y * X)),
const auto wei_e_k_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(K, C * Y * X)),
make_tuple(make_pass_through_transform(K), make_pass_through_transform(C * Y * X)),
make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<1>{}, Sequence<0>{}));
// input tensor
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
const auto in_n_c_hip_wip_global_desc = transform_tensor_descriptor(
in_n_c_hi_wi_global_desc,
make_tuple(make_pass_through_transform(N),
make_pass_through_transform(C),
......@@ -109,7 +109,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
const auto in_n_c_y_ho_x_wo_global_desc = transform_tensor_descriptor(
in_n_c_hip_wip_global_desc,
make_tuple(
make_pass_through_transform(N),
......@@ -119,7 +119,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_e_n_ho_wo_global_desc = transform_dynamic_tensor_descriptor(
const auto in_e_n_ho_wo_global_desc = transform_tensor_descriptor(
in_n_c_y_ho_x_wo_global_desc,
make_tuple(make_merge_transform(make_tuple(C, Y, X)),
make_pass_through_transform(N),
......@@ -129,8 +129,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
// output tensor
const auto out_k_n_hop_wop_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K0, Ho, Wo, K1)),
const auto out_k_n_hop_wop_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(N, K0, Ho, Wo, K1)),
make_tuple(make_merge_transform(make_tuple(K0, K1)),
make_pass_through_transform(N),
make_pad_transform(Ho, 0, OutRightPadH),
......@@ -181,7 +181,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
Sequence<0, 0, 0, 0, 0>{}));
// GEMM
using gridwise_gemm = GridwiseDynamicGemmDlops_km_kn_mn_v3<
using gridwise_gemm = GridwiseGemmDlops_km_kn_mn_v3<
BlockSize,
FloatAB,
FloatAcc,
......
#ifndef DRIVER_DYNAMIC_GEMM_DLOPS_V1R2
#define DRIVER_DYNAMIC_GEMM_DLOPS_V1R2
#ifndef DRIVER_GEMM_DLOPS_V1R2
#define DRIVER_GEMM_DLOPS_V1R2
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_gemm_dlops_v1r2.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v1r2.hpp"
template <ck::index_t BlockSize,
typename FloatAB,
......@@ -48,7 +48,7 @@ template <ck::index_t BlockSize,
typename CGridIteratorHacks,
typename AGridMoveSliceWindowIteratorHacks,
typename BGridMoveSliceWindowIteratorHacks>
__host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
__host__ float driver_gemm_dlops_v1r2(const FloatAB* p_a_grid,
const FloatAB* p_b_grid,
FloatC* p_c_grid,
const AKMGridDesc& a_k_m_grid_desc,
......@@ -72,8 +72,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
constexpr auto I5 = Number<5>{};
// GEMM
using GridwiseGemm =
GridwiseDynamicGemmDlops_km_kn_mn_v1r2<BlockSize,
using GridwiseGemm = GridwiseGemmDlops_km_kn_mn_v1r2<BlockSize,
FloatAB,
FloatAcc,
FloatC,
......@@ -122,8 +121,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
if(!GridwiseGemm::CheckValidity(a_k_m_grid_desc, b_k_n_grid_desc, c_m_n_grid_desc))
{
throw std::runtime_error(
"wrong! GridwiseDynamicGemmDlops_km_kn_mn_v1r2 has invalid setting");
throw std::runtime_error("wrong! GridwiseGemmDlops_km_kn_mn_v1r2 has invalid setting");
}
const auto a_k_m0_m1_grid_desc = GridwiseGemm::MakeAKM0M1GridDescriptor(a_k_m_grid_desc);
......@@ -174,7 +172,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
if(has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -200,7 +198,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -226,7 +224,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -252,7 +250,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -295,7 +293,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
if(has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -324,7 +322,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -353,7 +351,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......@@ -382,7 +380,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
else
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r2<GridwiseGemm,
kernel_gemm_dlops_v1r2<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AKM0M1GridDesc>,
......
#ifndef DRIVER_DYNAMIC_GEMM_DLOPS_V1R3
#define DRIVER_DYNAMIC_GEMM_DLOPS_V1R3
#ifndef DRIVER_GEMM_DLOPS_V1R3
#define DRIVER_GEMM_DLOPS_V1R3
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_gemm_dlops_v1r3.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_dlops_v1r3.hpp"
template <ck::index_t BlockSize,
typename FloatAB,
......@@ -44,7 +44,7 @@ template <ck::index_t BlockSize,
typename CGridIteratorHacks,
typename AGridMoveSliceWindowIteratorHacks,
typename BGridMoveSliceWindowIteratorHacks>
__host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
__host__ float driver_gemm_dlops_v1r3(const FloatAB* p_a_grid,
const FloatAB* p_b_grid,
FloatC* p_c_grid,
const AK0MK1GridDesc& a_k0_m_k1_grid_desc,
......@@ -69,7 +69,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
// GEMM
using GridwiseGemm =
GridwiseDynamicGemmDlops_km_kn_mn_v1r3<BlockSize,
GridwiseGemmDlops_km_kn_mn_v1r3<BlockSize,
FloatAB,
FloatAcc,
FloatC,
......@@ -114,8 +114,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
if(!GridwiseGemm::CheckValidity(a_k0_m_k1_grid_desc, b_k0_n_k1_grid_desc, c_m_n_grid_desc))
{
throw std::runtime_error(
"wrong! GridwiseDynamicGemmDlops_km_kn_mn_v1r3 has invalid setting");
throw std::runtime_error("wrong! GridwiseGemmDlops_km_kn_mn_v1r3 has invalid setting");
}
const auto a_k0_m0_m1_k1_grid_desc =
......@@ -170,7 +169,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
if(has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -196,7 +195,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -222,7 +221,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -248,7 +247,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -291,7 +290,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
if(has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -322,7 +321,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -353,7 +352,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......@@ -384,7 +383,7 @@ __host__ float driver_dynamic_gemm_dlops_v1r3(const FloatAB* p_a_grid,
else
{
const auto kernel =
kernel_dynamic_gemm_dlops_v1r3<GridwiseGemm,
kernel_gemm_dlops_v1r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0M0M1K1GridDesc>,
......
#ifndef DRIVER_DYNAMIC_GEMM_XDLOPS_V2R3
#define DRIVER_DYNAMIC_GEMM_XDLOPS_V2R3
#ifndef DRIVER_GEMM_XDLOPS_V2R3
#define DRIVER_GEMM_XDLOPS_V2R3
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "gridwise_dynamic_gemm_xdlops_v2r3.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_xdlops_v2r3.hpp"
template <ck::index_t BlockSize,
typename FloatAB,
......@@ -47,7 +47,7 @@ template <ck::index_t BlockSize,
typename AGridMoveSliceWindowIteratorHacks,
typename BGridMoveSliceWindowIteratorHacks,
bool CAccessOrderMRepeatNRepeat>
__host__ float driver_dynamic_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
__host__ float driver_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
const FloatAB* p_b_grid,
FloatC* p_c_grid,
const AK0MK1GridDesc& a_k0_m_k1_grid_desc,
......@@ -68,7 +68,7 @@ __host__ float driver_dynamic_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
constexpr auto I2 = Number<2>{};
using GridwiseGemm =
GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3<BlockSize,
GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3<BlockSize,
FloatAB,
FloatAcc,
FloatC,
......@@ -126,7 +126,7 @@ __host__ float driver_dynamic_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
if(!GridwiseGemm::CheckValidity(a_k0_m_k1_grid_desc, b_k0_n_k1_grid_desc, c_m_n_grid_desc))
{
throw std::runtime_error(
"wrong! GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2r3 has invalid setting");
"wrong! GridwiseGemm_km_kn_m0m1n0n1_xdlops_v2r3 has invalid setting");
}
const auto c_m0_m1_m2_n_grid_desc = GridwiseGemm::MakeCM0M1M2NGridDescriptor(c_m_n_grid_desc);
......@@ -139,7 +139,7 @@ __host__ float driver_dynamic_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
const index_t grid_size = GridwiseGemm::CalculateGridSize(c_m_n_grid_desc);
const auto kernel = kernel_dynamic_gemm_xdlops_v2r3<GridwiseGemm,
const auto kernel = kernel_gemm_xdlops_v2r3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AK0MK1GridDesc>,
......
......@@ -12,10 +12,10 @@
#include "conv_common.hpp"
#include "host_conv_bwd_data.hpp"
#include "device_tensor.hpp"
#include "device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp"
#include "device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp"
#include "device_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp"
#include "device_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp"
#define USE_DYNAMIC_MODE 1
#define USE_MODE 1
#define USE_CONV_BWD_V4R1_XDL_NHWC 1
#define USE_CONV_BWD_V4R1R2_XDL_NHWC 1
......@@ -37,7 +37,7 @@ int main(int argc, char* argv[])
constexpr auto I5 = Number<5>{};
constexpr auto I6 = Number<6>{};
#if USE_DYNAMIC_MODE
#if USE_MODE
// dynamic mode
if(argc != 22)
{
......@@ -212,7 +212,7 @@ int main(int argc, char* argv[])
}
auto f_make_for_device_nhwc = [&]() {
#if USE_DYNAMIC_MODE
#if USE_MODE
const auto in_lengths_dev = make_tuple(N, Hi, Wi, C);
const auto wei_lengths_dev = make_tuple(K, Y, X, C);
const auto out_lengths_dev = make_tuple(N, Ho, Wo, K);
......@@ -253,10 +253,10 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nhwc();
device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk<
in_data_t,
device_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk<in_data_t,
acc_data_t,
out_data_t>(tmp[I0],
out_data_t>(
tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......@@ -280,10 +280,10 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nhwc();
device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk<
in_data_t,
device_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk<in_data_t,
acc_data_t,
out_data_t>(tmp[I0],
out_data_t>(
tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......
......@@ -12,14 +12,14 @@
#include "conv_common.hpp"
#include "host_conv.hpp"
#include "device_tensor.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhwk.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
#define USE_DYNAMIC_MODE 1
#include "device_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhwk.hpp"
#include "device_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
#define USE_MODE 1
#define USE_CONV_FWD_V4R4_NCHW 0
#define USE_CONV_FWD_V4R4R2_NHWC 1
#define USE_CONV_FWD_V6R1_NCHW 1
......@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
constexpr auto I5 = Number<5>{};
constexpr auto I6 = Number<6>{};
#if USE_DYNAMIC_MODE
#if USE_MODE
// dynamic mode
if(argc != 22)
{
......@@ -228,7 +228,7 @@ int main(int argc, char* argv[])
}
auto f_make_for_device_nchw = [&]() {
#if USE_DYNAMIC_MODE
#if USE_MODE
const auto in_lengths_dev = make_tuple(N, C, Hi, Wi);
const auto wei_lengths_dev = make_tuple(K, C, Y, X);
const auto out_lengths_dev = make_tuple(N, K, Ho, Wo);
......@@ -260,7 +260,7 @@ int main(int argc, char* argv[])
};
auto f_make_for_device_nhwc = [&]() {
#if USE_DYNAMIC_MODE
#if USE_MODE
const auto in_lengths_dev = make_tuple(N, Hi, Wi, C);
const auto wei_lengths_dev = make_tuple(K, Y, X, C);
const auto out_lengths_dev = make_tuple(N, Ho, Wo, K);
......@@ -301,10 +301,9 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nchw();
device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw<in_data_t,
device_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw<in_data_t,
acc_data_t,
out_data_t>(
tmp[I0],
out_data_t>(tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......@@ -328,10 +327,9 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nhwc();
device_dynamic_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhwk<in_data_t,
device_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhwk<in_data_t,
acc_data_t,
out_data_t>(
tmp[I0],
out_data_t>(tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......@@ -355,10 +353,9 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nchw();
device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw<in_data_t,
device_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw<in_data_t,
acc_data_t,
out_data_t>(
tmp[I0],
out_data_t>(tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......@@ -382,11 +379,10 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nchw();
device_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw<in_data_t,
device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw<in_data_t,
16,
acc_data_t,
out_data_t>(
tmp[I0],
out_data_t>(tmp[I0],
tmp[I1],
tmp[I2],
tmp[I3],
......@@ -410,7 +406,7 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nchw();
device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw<in_data_t,
device_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw<in_data_t,
acc_data_t,
out_data_t>(
tmp[I0],
......@@ -437,7 +433,7 @@ int main(int argc, char* argv[])
const auto tmp = f_make_for_device_nhwc();
device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk<in_data_t,
device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk<in_data_t,
acc_data_t,
out_data_t>(
tmp[I0],
......
#ifndef CONV_COMMON_HPP
#define CONV_COMMON_HPP
#include "dynamic_tensor_descriptor.hpp"
#include "tensor_descriptor.hpp"
enum ConvTensorLayout
{
......@@ -19,8 +19,8 @@ template <typename... InDesc,
typename LeftPads,
typename RightPads>
constexpr auto get_convolution_output_default_4d_tensor_descriptor(
const ck::DynamicTensorDescriptor<InDesc...>& in_desc,
const ck::DynamicTensorDescriptor<WeiDesc...>& wei_desc,
const ck::TensorDescriptor<InDesc...>& in_desc,
const ck::TensorDescriptor<WeiDesc...>& wei_desc,
const ConvStrides& conv_strides,
const ConvDilations conv_dilations,
const LeftPads& left_pads,
......@@ -57,7 +57,7 @@ constexpr auto get_convolution_output_default_4d_tensor_descriptor(
const auto Ho = (Hi + LeftPadH + RightPadH - YEff) / conv_strides[I0] + I1;
const auto Wo = (Wi + LeftPadW + RightPadW - XEff) / conv_strides[I1] + I1;
return make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K, Ho, Wo));
return make_naive_tensor_descriptor_packed(make_tuple(N, K, Ho, Wo));
}
template <class InDesc, class WeiDesc, class OutDesc>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment