Commit f289221b authored by root's avatar root
Browse files

merge

parents 961e6810 5a88b0b0
......@@ -3,7 +3,10 @@
#include "host_tensor.hpp"
#include "driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
template <class T,
template <class TInWei,
ck::index_t InWeiVectorSize,
class TAcc,
class TOut,
class InDesc,
class WeiDesc,
class OutDesc,
......@@ -11,35 +14,33 @@ template <class T,
class ConvDilations,
class InLeftPads,
class InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
const Tensor<T>& in_nchw,
void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(
InDesc,
const Tensor<TInWei>& in_n_c_hi_wi,
WeiDesc,
const Tensor<T>& wei_kcyx,
const Tensor<TInWei>& wei_k_c_y_x,
OutDesc,
Tensor<T>& out_nkhw,
Tensor<TOut>& out_n_k_ho_wo,
ConvStrides,
ConvDilations,
InLeftPads,
InRightPads,
ck::index_t nrepeat)
{
std::cout << "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw"
<< std::endl;
using namespace ck;
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
std::cout << "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw"
<< std::endl;
std::size_t data_sz = sizeof(T);
DeviceMem in_nchw_device_buf(data_sz * in_nchw.mDesc.GetElementSpace());
DeviceMem wei_kcyx_device_buf(data_sz * wei_kcyx.mDesc.GetElementSpace());
DeviceMem out_nkhw_device_buf(data_sz * out_nkhw.mDesc.GetElementSpace());
DeviceMem in_n_c_hi_wi_device_buf(sizeof(TInWei) * in_n_c_hi_wi.mDesc.GetElementSpace());
DeviceMem wei_k_c_y_x_device_buf(sizeof(TInWei) * wei_k_c_y_x.mDesc.GetElementSpace());
DeviceMem out_n_k_ho_wo_device_buf(sizeof(TOut) * out_n_k_ho_wo.mDesc.GetElementSpace());
in_nchw_device_buf.ToDevice(in_nchw.mData.data());
wei_kcyx_device_buf.ToDevice(wei_kcyx.mData.data());
out_nkhw_device_buf.ToDevice(out_nkhw.mData.data());
in_n_c_hi_wi_device_buf.ToDevice(in_n_c_hi_wi.mData.data());
wei_k_c_y_x_device_buf.ToDevice(wei_k_c_y_x.mData.data());
out_n_k_ho_wo_device_buf.ToDevice(out_n_k_ho_wo.mData.data());
#if 1
#if 0
// run-time variables
const auto in_n_c_hi_wi_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths()));
......@@ -67,7 +68,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
const auto in_right_pads = sequence_to_tuple_of_number(InRightPads{});
#endif
#if 0
#if 1
// cdata = 16, BlockSize = 64, 16x64x4
constexpr index_t BlockSize = 64;
......@@ -368,9 +369,9 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
#endif
<BlockSize,
TDevice,
TDevice,
TDevice,
typename vector_type<TInWei, InWeiVectorSize>::type,
TAcc,
TOut,
GemmMPerBlock,
GemmNPerBlock,
GemmKPerBlock,
......@@ -398,9 +399,11 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
conv_dilations,
in_left_pads,
in_right_pads,
static_cast<TDevice*>(wei_kcyx_device_buf.GetDeviceBuffer()),
static_cast<TDevice*>(in_nchw_device_buf.GetDeviceBuffer()),
static_cast<TDevice*>(out_nkhw_device_buf.GetDeviceBuffer()));
static_cast<typename vector_type<TInWei, InWeiVectorSize>::type*>(
wei_k_c_y_x_device_buf.GetDeviceBuffer()),
static_cast<typename vector_type<TInWei, InWeiVectorSize>::type*>(
in_n_c_hi_wi_device_buf.GetDeviceBuffer()),
static_cast<TOut*>(out_n_k_ho_wo_device_buf.GetDeviceBuffer()));
out_nkhw_device_buf.FromDevice(out_nkhw.mData.data());
out_n_k_ho_wo_device_buf.FromDevice(out_n_k_ho_wo.mData.data());
}
......@@ -13,26 +13,25 @@ template <class TInWei,
class ConvStrides,
class ConvDilations,
class InLeftPads,
class InRightPads,
class T>
class InRightPads>
void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(
InDesc,
const Tensor<T>& in_n_c_hi_wi,
const Tensor<TInWei>& in_n_c_hi_wi,
WeiDesc,
const Tensor<T>& wei_k_c_y_x,
const Tensor<TInWei>& wei_k_c_y_x,
OutDesc,
Tensor<T>& out_n_k_ho_wo,
Tensor<TOut>& out_n_k_ho_wo,
ConvStrides,
ConvDilations,
InLeftPads,
InRightPads,
ck::index_t nrepeat)
{
using namespace ck;
std::cout << "device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk"
<< std::endl;
using namespace ck;
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
......
......@@ -24,20 +24,6 @@ int main(int argc, char* argv[])
using namespace ck;
#if 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t HI = 1;
constexpr index_t WI = 64;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<1, 1>;
using RightPads = Sequence<1, 1>;
#elif 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t HI = 1080;
......@@ -151,7 +137,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 1
#elif 0
// 3x3, 71x71
constexpr index_t N = 128;
constexpr index_t C = 192;
......@@ -634,9 +620,9 @@ int main(int argc, char* argv[])
#if 1
using in_data_t = float;
constexpr index_t in_vector_size = 1;
using out_data_t = float;
using acc_data_t = float;
#else
using out_data_t = float;
#elif 1
using in_data_t = int8_t;
constexpr index_t in_vector_size = 4;
using acc_data_t = int32_t;
......@@ -720,20 +706,13 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
wei_kcyx,
out_nkhw_desc,
out_nkhw_device,
ConvStrides{},
ConvDilations{},
LeftPads{},
RightPads{},
nrepeat);
#elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
#elif 1
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
in_vector_size,
acc_data_t,
out_data_t>
(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
wei_kcyx,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment