Commit 07a673c6 authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into cpu_avx2

parents c0f698d5 ac0d8066
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_bwd_weight_impl.hpp"
enum struct ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3
};
enum struct ConvInputLayout
{
NCHW, // 0
NHWC, // 1
};
enum struct ConvWeightLayout
{
KCYX, // 0
KYXC, // 1
};
enum struct ConvOutputLayout
{
NKHW, // 0
NHWK, // 1
};
int profile_conv_bwd_weight(int argc, char* argv[])
{
if(argc != 26)
{
printf("arg1: tensor operation (conv_fwd: ForwardConvolution)\n");
printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
printf("arg5: output tensor layout (0: NKHW; 1: NHWK)\n");
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
printf("arg25: split k (>=1)\n");
exit(1);
}
const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const auto in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const auto wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const auto out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
const ck::index_t C = std::stoi(argv[12]);
const ck::index_t Y = std::stoi(argv[13]);
const ck::index_t X = std::stoi(argv[14]);
const ck::index_t Hi = std::stoi(argv[15]);
const ck::index_t Wi = std::stoi(argv[16]);
const ck::index_t conv_stride_h = std::stoi(argv[17]);
const ck::index_t conv_stride_w = std::stoi(argv[18]);
const ck::index_t conv_dilation_h = std::stoi(argv[19]);
const ck::index_t conv_dilation_w = std::stoi(argv[20]);
const ck::index_t in_left_pad_h = std::stoi(argv[21]);
const ck::index_t in_left_pad_w = std::stoi(argv[22]);
const ck::index_t in_right_pad_h = std::stoi(argv[23]);
const ck::index_t in_right_pad_w = std::stoi(argv[24]);
ck::index_t split_k = std::stoi(argv[25]);
split_k = std::max(1, split_k);
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
if(data_type == ConvDataType::F32_F32_F32 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w},
split_k);
}
else if(data_type == ConvDataType::F16_F16_F16 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w},
split_k);
}
else
{
throw std::runtime_error("wrong! this Conv data_type & layout is not implemented");
}
return 1;
}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "profile_convnd_bwd_data_impl.hpp" #include "profile_convnd_bwd_data_impl.hpp"
enum ConvDataType enum struct ConvDataType
{ {
F32_F32_F32, // 0 F32_F32_F32, // 0
F16_F16_F16, // 1 F16_F16_F16, // 1
...@@ -15,27 +15,27 @@ enum ConvDataType ...@@ -15,27 +15,27 @@ enum ConvDataType
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
}; };
enum ConvInputLayout enum struct ConvInputLayout
{ {
NCHW, // 0 NCHW, // 0
NHWC, // 1 NHWC, // 1
}; };
enum ConvWeightLayout enum struct ConvWeightLayout
{ {
KCYX, // 0 KCYX, // 0
KYXC, // 1 KYXC, // 1
}; };
enum ConvOutputLayout enum struct ConvOutputLayout
{ {
NKHW, // 0 NKHW, // 0
NHWK, // 1 NHWK, // 1
}; };
ck::conv_util::ConvParams parse_conv_params(int num_dim_spatial, char* argv[], int arg_idx) ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[], int arg_idx)
{ {
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right) // (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = num_dim_spatial; params.num_dim_spatial = num_dim_spatial;
params.N = std::stoi(argv[arg_idx++]); params.N = std::stoi(argv[arg_idx++]);
...@@ -97,16 +97,16 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) ...@@ -97,16 +97,16 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
return 1; return 1;
} }
const int data_type = static_cast<ConvDataType>(std::stoi(argv[2])); const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const int in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3])); const auto in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const int wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4])); const auto wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const int out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5])); const auto out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]); const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]); const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]); const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]); const int nrepeat = std::stoi(argv[9]);
ck::conv_util::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams); ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
auto Run = [&](auto input_type, auto wei_type, auto out_type, auto acc_type) { auto Run = [&](auto input_type, auto wei_type, auto out_type, auto acc_type) {
using InDataType = decltype(input_type); using InDataType = decltype(input_type);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
int profile_gemm_reduce(int argc, char* argv[]) int profile_gemm_reduce(int argc, char* argv[])
{ {
enum struct GemmMatrixLayout_t enum struct GemmMatrixLayout
{ {
MK_KN_MN, // 0 MK_KN_MN, // 0
MK_NK_MN, // 1 MK_NK_MN, // 1
...@@ -16,7 +16,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -16,7 +16,7 @@ int profile_gemm_reduce(int argc, char* argv[])
KM_NK_MN, // 3 KM_NK_MN, // 3
}; };
enum struct GemmReduceDataType_t enum struct GemmReduceDataType
{ {
F32_F32_F32_F32_F32, // 0 F32_F32_F32_F32_F32, // 0
F16_F16_F16_F32_F32, // 1 F16_F16_F16_F32_F32, // 1
...@@ -39,8 +39,8 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -39,8 +39,8 @@ int profile_gemm_reduce(int argc, char* argv[])
exit(1); exit(1);
} }
const auto data_type = static_cast<GemmReduceDataType_t>(std::stoi(argv[2])); const auto data_type = static_cast<GemmReduceDataType>(std::stoi(argv[2]));
const auto layout = static_cast<GemmMatrixLayout_t>(std::stoi(argv[3])); const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
...@@ -54,8 +54,7 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -54,8 +54,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const int StrideB = std::stoi(argv[12]); const int StrideB = std::stoi(argv[12]);
const int StrideC = std::stoi(argv[13]); const int StrideC = std::stoi(argv[13]);
if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 && if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
layout == GemmMatrixLayout_t::MK_KN_MN)
{ {
ck::profiler::profile_gemm_reduce_impl<ck::half_t, ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t, ck::half_t,
...@@ -75,8 +74,8 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -75,8 +74,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? N : StrideB, (StrideB < 0) ? N : StrideB,
(StrideC < 0) ? N : StrideC); (StrideC < 0) ? N : StrideC);
} }
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 && else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::MK_NK_MN) layout == GemmMatrixLayout::MK_NK_MN)
{ {
ck::profiler::profile_gemm_reduce_impl<ck::half_t, ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t, ck::half_t,
...@@ -96,8 +95,8 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -96,8 +95,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? K : StrideB, (StrideB < 0) ? K : StrideB,
(StrideC < 0) ? N : StrideC); (StrideC < 0) ? N : StrideC);
} }
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 && else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::KM_KN_MN) layout == GemmMatrixLayout::KM_KN_MN)
{ {
ck::profiler::profile_gemm_reduce_impl<ck::half_t, ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t, ck::half_t,
...@@ -117,8 +116,8 @@ int profile_gemm_reduce(int argc, char* argv[]) ...@@ -117,8 +116,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? N : StrideB, (StrideB < 0) ? N : StrideB,
(StrideC < 0) ? N : StrideC); (StrideC < 0) ? N : StrideC);
} }
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 && else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::KM_NK_MN) layout == GemmMatrixLayout::KM_NK_MN)
{ {
ck::profiler::profile_gemm_reduce_impl<ck::half_t, ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t, ck::half_t,
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <half.hpp> #include <half.hpp>
#include "profile_grouped_gemm_impl.hpp" #include "profile_grouped_gemm_impl.hpp"
enum GemmMatrixLayout enum struct GemmMatrixLayout
{ {
MK_KN_MN, // 0 MK_KN_MN, // 0
MK_NK_MN, // 1 MK_NK_MN, // 1
...@@ -18,7 +18,7 @@ enum GemmMatrixLayout ...@@ -18,7 +18,7 @@ enum GemmMatrixLayout
KM_NK_NM, // 7 KM_NK_NM, // 7
}; };
enum GemmDataType enum struct GemmDataType
{ {
F32_F32_F32, // 0 F32_F32_F32, // 0
F16_F16_F16, // 1 F16_F16_F16, // 1
...@@ -61,8 +61,8 @@ int profile_grouped_gemm(int argc, char* argv[]) ...@@ -61,8 +61,8 @@ int profile_grouped_gemm(int argc, char* argv[])
exit(1); exit(1);
} }
const int data_type = static_cast<GemmDataType>(std::stoi(argv[2])); const auto data_type = static_cast<GemmDataType>(std::stoi(argv[2]));
const int layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3])); const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
......
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
using namespace std; using namespace std;
using ck::NanPropagation_t; using ck::NanPropagation;
using ck::ReduceTensorIndices_t; using ck::ReduceTensorIndices;
using ck::ReduceTensorOp_t; using ck::ReduceTensorOp;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
{"reduceDims", required_argument, nullptr, 'R'}, {"reduceDims", required_argument, nullptr, 'R'},
...@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values) ...@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values)
return (values); return (values);
} }
enum struct appDataType_t enum struct AppDataType
{ {
appHalf = 0, appHalf = 0,
appFloat = 1, appFloat = 1,
...@@ -130,18 +130,18 @@ class AppArgs ...@@ -130,18 +130,18 @@ class AppArgs
std::vector<float> scales; std::vector<float> scales;
ReduceTensorOp_t reduceOp = ReduceTensorOp_t::ADD; ReduceTensorOp reduceOp = ReduceTensorOp::ADD;
appDataType_t compTypeId = appDataType_t::appFloat; AppDataType compTypeId = AppDataType::appFloat;
appDataType_t outTypeId = appDataType_t::appFloat; AppDataType outTypeId = AppDataType::appFloat;
bool compType_assigned = false; bool compType_assigned = false;
bool outType_assigned = false; bool outType_assigned = false;
NanPropagation_t nanOpt = NanPropagation_t::NOT_PROPAGATE_NAN; NanPropagation nanOpt = NanPropagation::NOT_PROPAGATE_NAN;
ReduceTensorIndices_t indicesOpt = ReduceTensorIndices_t::NO_INDICES; ReduceTensorIndices indicesOpt = ReduceTensorIndices::NO_INDICES;
bool do_log = false; bool do_log = false;
bool do_verification = false; bool do_verification = false;
bool do_dumpout = false; bool do_dumpout = false;
int init_method; int init_method;
int nrepeat; int nrepeat;
...@@ -213,33 +213,33 @@ class AppArgs ...@@ -213,33 +213,33 @@ class AppArgs
if(!optarg) if(!optarg)
throw std::runtime_error("Invalid option format!"); throw std::runtime_error("Invalid option format!");
reduceOp = static_cast<ReduceTensorOp_t>(std::atoi(optarg)); reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
break; break;
case 'C': case 'C':
if(!optarg) if(!optarg)
throw std::runtime_error("Invalid option format!"); throw std::runtime_error("Invalid option format!");
compTypeId = static_cast<appDataType_t>(std::atoi(optarg)); compTypeId = static_cast<AppDataType>(std::atoi(optarg));
compType_assigned = true; compType_assigned = true;
break; break;
case 'W': case 'W':
if(!optarg) if(!optarg)
throw std::runtime_error("Invalid option format!"); throw std::runtime_error("Invalid option format!");
outTypeId = static_cast<appDataType_t>(std::atoi(optarg)); outTypeId = static_cast<AppDataType>(std::atoi(optarg));
outType_assigned = true; outType_assigned = true;
break; break;
case 'N': case 'N':
if(!optarg) if(!optarg)
throw std::runtime_error("Invalid option format!"); throw std::runtime_error("Invalid option format!");
nanOpt = static_cast<NanPropagation_t>(std::atoi(optarg)); nanOpt = static_cast<NanPropagation>(std::atoi(optarg));
break; break;
case 'I': case 'I':
if(!optarg) if(!optarg)
throw std::runtime_error("Invalid option format!"); throw std::runtime_error("Invalid option format!");
indicesOpt = static_cast<ReduceTensorIndices_t>(std::atoi(optarg)); indicesOpt = static_cast<ReduceTensorIndices>(std::atoi(optarg));
break; break;
case 'S': case 'S':
if(!optarg) if(!optarg)
...@@ -303,10 +303,10 @@ class AppArgs ...@@ -303,10 +303,10 @@ class AppArgs
scales.push_back(0.0f); scales.push_back(0.0f);
}; };
if(reduceOp == ReduceTensorOp_t::MIN || reduceOp == ReduceTensorOp_t::MAX || if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
reduceOp == ReduceTensorOp_t::AMAX) reduceOp == ReduceTensorOp::AMAX)
{ {
if(indicesOpt != ReduceTensorIndices_t::NO_INDICES) if(indicesOpt != ReduceTensorIndices::NO_INDICES)
need_indices = true; need_indices = true;
// for indexable operations, no need to assign compType and outType, just let them be // for indexable operations, no need to assign compType and outType, just let them be
...@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[]) ...@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[])
check_reduce_dims(rank, args.reduceDims); check_reduce_dims(rank, args.reduceDims);
if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1) if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!"); throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");
if(args.use_half) if(args.use_half)
{ {
if(!args.compType_assigned) if(!args.compType_assigned)
args.compTypeId = appDataType_t::appHalf; args.compTypeId = AppDataType::appHalf;
if(args.outType_assigned && if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appHalf && args.outTypeId != appDataType_t::appFloat)) (args.outTypeId != AppDataType::appHalf && args.outTypeId != AppDataType::appFloat))
args.outTypeId = appDataType_t::appFloat; args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned) if(!args.outType_assigned)
args.outTypeId = appDataType_t::appHalf; args.outTypeId = AppDataType::appHalf;
if(args.compTypeId == appDataType_t::appHalf) if(args.compTypeId == AppDataType::appHalf)
{ {
profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification, profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification,
args.init_method, args.init_method,
...@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[]) ...@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0], args.scales[0],
args.scales[1]); args.scales[1]);
} }
else if(args.compTypeId == appDataType_t::appFloat) else if(args.compTypeId == AppDataType::appFloat)
{ {
profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification, profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
args.init_method, args.init_method,
...@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[]) ...@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[])
else if(args.use_int8) else if(args.use_int8)
{ {
if(!args.compType_assigned) if(!args.compType_assigned)
args.compTypeId = appDataType_t::appInt8; args.compTypeId = AppDataType::appInt8;
if(args.outType_assigned && if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appInt8 && args.outTypeId != appDataType_t::appInt32)) (args.outTypeId != AppDataType::appInt8 && args.outTypeId != AppDataType::appInt32))
args.outTypeId = appDataType_t::appInt32; args.outTypeId = AppDataType::appInt32;
if(!args.outType_assigned) if(!args.outType_assigned)
args.outTypeId = appDataType_t::appInt8; args.outTypeId = AppDataType::appInt8;
if(args.compTypeId == appDataType_t::appInt8) if(args.compTypeId == AppDataType::appInt8)
{ {
profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification, profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
args.init_method, args.init_method,
...@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[]) ...@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0], args.scales[0],
args.scales[1]); args.scales[1]);
} }
else if(args.compTypeId == appDataType_t::appInt32) else if(args.compTypeId == AppDataType::appInt32)
{ {
profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification, profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
args.init_method, args.init_method,
...@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[]) ...@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[])
} }
else if(args.use_bf16) else if(args.use_bf16)
{ {
if(args.outType_assigned && (args.outTypeId != appDataType_t::appBFloat16 && if(args.outType_assigned &&
args.outTypeId != appDataType_t::appFloat)) (args.outTypeId != AppDataType::appBFloat16 && args.outTypeId != AppDataType::appFloat))
args.outTypeId = appDataType_t::appFloat; args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned) if(!args.outType_assigned)
args.outTypeId = appDataType_t::appBFloat16; args.outTypeId = AppDataType::appBFloat16;
profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification, profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
args.init_method, args.init_method,
...@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[]) ...@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[])
} }
else else
{ {
if(args.compTypeId == appDataType_t::appFloat) if(args.compTypeId == AppDataType::appFloat)
{ {
profile_reduce_impl<float, float, float>(args.do_verification, profile_reduce_impl<float, float, float>(args.do_verification,
args.init_method, args.init_method,
...@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[]) ...@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0], args.scales[0],
args.scales[1]); args.scales[1]);
} }
else if(args.compTypeId == appDataType_t::appDouble) else if(args.compTypeId == AppDataType::appDouble)
{ {
profile_reduce_impl<float, double, float>(args.do_verification, profile_reduce_impl<float, double, float>(args.do_verification,
args.init_method, args.init_method,
......
...@@ -17,6 +17,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]); ...@@ -17,6 +17,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]); int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int profile_convnd_bwd_data(int, char*[], int); int profile_convnd_bwd_data(int, char*[], int);
int profile_reduce(int, char*[]); int profile_reduce(int, char*[]);
int profile_conv_bwd_weight(int, char*[]);
int profile_batched_gemm_reduce(int, char*[]); int profile_batched_gemm_reduce(int, char*[]);
int main(int argc, char* argv[]) int main(int argc, char* argv[])
...@@ -89,6 +90,10 @@ int main(int argc, char* argv[]) ...@@ -89,6 +90,10 @@ int main(int argc, char* argv[])
{ {
return profile_reduce(argc, argv); return profile_reduce(argc, argv);
} }
else if(strcmp(argv[1], "conv2d_bwd_weight") == 0)
{
return profile_conv_bwd_weight(argc, argv);
}
else else
{ {
// clang-format off // clang-format off
...@@ -97,7 +102,7 @@ int main(int argc, char* argv[]) ...@@ -97,7 +102,7 @@ int main(int argc, char* argv[])
" gemm_bias_relu: GEMM+Bias+ReLU\n" " gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n" " gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n" " gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped Gemm\n" " grouped_gemm: Grouped GEMM\n"
" conv_fwd: ForwardConvolution\n" " conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n" " conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n" " conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
...@@ -106,10 +111,9 @@ int main(int argc, char* argv[]) ...@@ -106,10 +111,9 @@ int main(int argc, char* argv[])
" conv1d_bwd_data: BackwardConvolution data 1 dim\n" " conv1d_bwd_data: BackwardConvolution data 1 dim\n"
" conv2d_bwd_data: BackwardConvolution data 2 dim\n" " conv2d_bwd_data: BackwardConvolution data 2 dim\n"
" conv3d_bwd_data: BackwardConvolution data 3 dim\n" " conv3d_bwd_data: BackwardConvolution data 3 dim\n"
" grouped_gemm: Grouped Gemm\n" " reduce: REDUCE\n"
" reduce: REDUCE\n"); " conv2d_bwd_weight: Backward Weight Convolution 2d\n");
// clang-format on // clang-format on
return 0;
} }
return 0;
} }
...@@ -10,9 +10,11 @@ cmake ...@@ -10,9 +10,11 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \ -D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D BUILD_DEV=OFF \ -D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 --amdgpu-target=gfx908 -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only " \ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \ -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE} ${MY_PROJECT_SOURCE}
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
...@@ -20,6 +20,7 @@ include_directories(BEFORE ...@@ -20,6 +20,7 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce ${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/cpu ${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/cpu
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/gpu ${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/gpu
${PROJECT_SOURCE_DIR}/library/include/ck/library/utility
${PROJECT_SOURCE_DIR}/test/include ${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/profiler/include ${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half ${PROJECT_SOURCE_DIR}/external/include/half
...@@ -49,6 +50,8 @@ add_subdirectory(batched_gemm_reduce) ...@@ -49,6 +50,8 @@ add_subdirectory(batched_gemm_reduce)
add_subdirectory(grouped_gemm) add_subdirectory(grouped_gemm)
add_subdirectory(convnd_fwd) add_subdirectory(convnd_fwd)
add_subdirectory(reduce) add_subdirectory(reduce)
add_subdirectory(conv2d_bwd_weight)
add_subdirectory(cpu_ukernel) add_subdirectory(cpu_ukernel)
add_subdirectory(cpu_threadwise_transfer) add_subdirectory(cpu_threadwise_transfer)
add_subdirectory(convnd_fwd_cpu) add_subdirectory(convnd_fwd_cpu)
#include "profile_batched_gemm_impl.hpp"
#include <iostream> #include <iostream>
#include "profile_batched_gemm_impl.hpp"
namespace { namespace {
using ADataType = ck::half_t; using ADataType = ck::half_t;
using BDataType = ck::half_t; using BDataType = ck::half_t;
......
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor)
target_link_libraries(test_conv2d_bwd_weight PRIVATE device_conv2d_bwd_weight_instance)
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector>
#include "conv_fwd_util.hpp"
#include "profile_conv_bwd_weight_impl.hpp"
int test_self()
{
bool pass = true;
std::vector<ck::utils::conv::ConvParams> params;
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
2);
// fp16
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
2);
}
return pass;
}
int main(int argc, char* argv[])
{
int data_type = 0;
int init_method = 0;
// Conv shape
ck::index_t N = 128;
ck::index_t K = 256;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t conv_stride_h = 2;
ck::index_t conv_stride_w = 2;
ck::index_t conv_dilation_h = 1;
ck::index_t conv_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t split_k = 1;
bool pass = true;
if(argc == 1)
{
pass = test_self();
}
else
{
if(argc == 3)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
}
else if(argc == 19)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
N = std::stoi(argv[3]);
K = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
conv_stride_h = std::stoi(argv[10]);
conv_stride_w = std::stoi(argv[11]);
conv_dilation_h = std::stoi(argv[12]);
conv_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
split_k = std::stoi(argv[18]);
}
else
{
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
ck::utils::conv::ConvParams param{2,
N,
K,
C,
{Y, X},
{Hi, Wi},
{conv_stride_h, conv_stride_w},
{conv_dilation_h, conv_dilation_w},
{in_left_pad_h, in_left_pad_w},
{in_right_pad_h, in_right_pad_w}};
if(data_type == 0)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
split_k);
}
else if(data_type == 1)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
split_k);
}
else
{
std::cout << "Not support data type" << std::endl;
return 1;
}
}
if(pass)
{
std::cout << "test conv2d bwd weight : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test conv2d bwd weight: Fail " << std::endl;
return -1;
}
}
...@@ -3,13 +3,13 @@ ...@@ -3,13 +3,13 @@
#include <vector> #include <vector>
#include "config.hpp" #include "config.hpp"
#include "conv_utils.hpp" #include "conv_fwd_util.hpp"
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
#include "test_util.hpp" #include "check_err.hpp"
namespace { namespace {
bool TestConvParams_GetOutputSpatialLengths() bool test_conv_params_get_output_spatial_lengths()
{ {
bool res{true}; bool res{true};
// -------------------------- default 2D ------------------------------------ // -------------------------- default 2D ------------------------------------
...@@ -18,28 +18,28 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -18,28 +18,28 @@ bool TestConvParams_GetOutputSpatialLengths()
// stride {2,2}, // stride {2,2},
// dilations {1,1}, // dilations {1,1},
// padding {{1,1}, {1,1}} // padding {{1,1}, {1,1}}
ck::conv_util::ConvParams conv_params; ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36}, std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor."); "Error: ConvParams 2D default constructor.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."); out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2}; conv_params.input_left_pads = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2}; conv_params.input_right_pads = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37}, std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}."); "Error: ConvParams 2D padding left/right {2,2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."); out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3};
...@@ -47,9 +47,10 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -47,9 +47,10 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1}; conv_params.input_right_pads = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res =
std::vector<ck::index_t>{23, 23}, ck::utils::check_err(out_spatial_len,
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."); std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.");
// -------------------------- 1D ------------------------------------ // -------------------------- 1D ------------------------------------
conv_params.num_dim_spatial = 1; conv_params.num_dim_spatial = 1;
...@@ -61,24 +62,25 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -61,24 +62,25 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1}; conv_params.input_right_pads = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."); res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."); out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.input_left_pads = std::vector<ck::index_t>{2}; conv_params.input_left_pads = std::vector<ck::index_t>{2};
conv_params.input_right_pads = std::vector<ck::index_t>{2}; conv_params.input_right_pads = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37}, std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}."); "Error: ConvParams 1D padding left/right {2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."); out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3};
...@@ -86,9 +88,9 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -86,9 +88,9 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1}; conv_params.input_right_pads = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23}, std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."); "Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.");
// -------------------------- 3D ------------------------------------ // -------------------------- 3D ------------------------------------
conv_params.num_dim_spatial = 3; conv_params.num_dim_spatial = 3;
...@@ -100,35 +102,35 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -100,35 +102,35 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."); out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71}, std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}."); "Error: ConvParams 3D stride {1, 1, 1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2}; conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2}; conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37}, std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}."); "Error: ConvParams 3D padding left/right {2, 2, 2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36}, std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}."); "Error: ConvParams 3D dilation {2, 2, 2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1}; conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err( res = ck::utils::check_err(
out_spatial_len, out_spatial_len,
std::vector<ck::index_t>{23, 23, 23}, std::vector<ck::index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."); "Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.");
...@@ -136,50 +138,54 @@ bool TestConvParams_GetOutputSpatialLengths() ...@@ -136,50 +138,54 @@ bool TestConvParams_GetOutputSpatialLengths()
return res; return res;
} }
bool TestGetHostTensorDescriptor() bool test_get_host_tensor_descriptor()
{ {
bool res{true}; bool res{true};
namespace tl = ck::tensor_layout::convolution; namespace tl = ck::tensor_layout::convolution;
std::vector<std::size_t> dims{2, 3, 4, 5}; std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NHWC{}); HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
res = test::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"); res =
res = test::check_err( ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"); h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCHW{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
res = test::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"); res =
res = test::check_err( ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"); h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!");
dims = std::vector<std::size_t>{2, 3, 4}; dims = std::vector<std::size_t>{2, 3, 4};
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NWC{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
res = test::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"); res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!");
res = test::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"); res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCW{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
res = test::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"); res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!");
res = test::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"); res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!");
dims = std::vector<std::size_t>{2, 3, 4, 5, 6}; dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NDHWC{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
res = test::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"); res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!");
res = test::check_err(h.GetStrides(), res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N {3 * 4 * 5 * 6, // N
1, // C 1, // C
3 * 5 * 6, // D 3 * 5 * 6, // D
3 * 6, // H 3 * 6, // H
3}, // W 3}, // W
"Error: wrong NDHWC dimensions strides!"); "Error: wrong NDHWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCDHW{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
res = test::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"); res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!");
res = test::check_err(h.GetStrides(), res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N {3 * 4 * 5 * 6, // N
4 * 5 * 6, // C 4 * 5 * 6, // C
5 * 6, // D 5 * 6, // D
6, // H 6, // H
1}, // W 1}, // W
"Error: wrong NCDHW dimensions strides!"); "Error: wrong NCDHW dimensions strides!");
return res; return res;
} }
...@@ -188,10 +194,11 @@ bool TestGetHostTensorDescriptor() ...@@ -188,10 +194,11 @@ bool TestGetHostTensorDescriptor()
int main(void) int main(void)
{ {
bool res = TestConvParams_GetOutputSpatialLengths(); bool res = test_conv_params_get_output_spatial_lengths();
std::cout << "TestConvParams_GetOutputSpatialLengths ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "test_conv_params_get_output_spatial_lengths ..... "
<< (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_get_host_tensor_descriptor();
std::cout << "test_get_host_tensor_descriptor ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestGetHostTensorDescriptor();
std::cout << "TestGetHostTensorDescriptor ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1; return res ? 0 : 1;
} }
...@@ -12,7 +12,7 @@ int main() ...@@ -12,7 +12,7 @@ int main()
{ {
bool pass = true; bool pass = true;
// check 1d // check 1d
std::vector<ck::conv_util::ConvParams> params; std::vector<ck::utils::conv::ConvParams> params;
params.push_back({1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}}); params.push_back({1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
params.push_back({1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}}); params.push_back({1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
params.push_back({1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}}); params.push_back({1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
......
...@@ -5,10 +5,11 @@ ...@@ -5,10 +5,11 @@
#include "data_type.hpp" #include "data_type.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "conv_test_util.hpp" #include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
#include "test_util.hpp" #include "check_err.hpp"
// Forward declarations for conv instances. // Forward declarations for conv instances.
...@@ -34,10 +35,10 @@ void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(std::vector<DeviceConv ...@@ -34,10 +35,10 @@ void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(std::vector<DeviceConv
namespace { namespace {
bool TestConv1DNWC() bool test_conv1D_nwc()
{ {
bool res{true}; bool res{true};
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1; params.num_dim_spatial = 1;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
...@@ -49,30 +50,31 @@ bool TestConv1DNWC() ...@@ -49,30 +50,31 @@ bool TestConv1DNWC()
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1}; params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = test::conv::GetHostTensors<float, auto host_tensors =
float, ck::utils::conv::get_host_tensors<float,
float, float,
ck::tensor_layout::convolution::NWC, float,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors); Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<1>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
test::conv::RunConv<1>(params, input, weights, device_output); test::conv::RunConv<1>(params, input, weights, device_output);
res = res && res = res &&
test::check_err( ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f); device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res; return res;
} }
template <typename T> template <typename T>
bool TestConv1DNWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs) bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{ {
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1; params.num_dim_spatial = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{71}; params.input_spatial_lengths = std::vector<ck::index_t>{71};
...@@ -81,51 +83,52 @@ bool TestConv1DNWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs) ...@@ -81,51 +83,52 @@ bool TestConv1DNWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1}; params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = test::conv::GetHostTensors<T, auto host_tensors =
T, ck::utils::conv::get_host_tensors<T,
T, T,
ck::tensor_layout::convolution::NWC, T,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors); const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors); const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors); Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors); Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<1>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
return test::conv::RunConvInstances<1>( return ck::utils::conv::run_convolution_forward_instances<1>(
params, conv_ptrs, input, weights, device_output, host_output); params, conv_ptrs, input, weights, device_output, host_output);
} }
bool TestConv1DNWCBF16Instances() bool test_conv1d_nwc_bf16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(conv_ptrs); add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(conv_ptrs);
return TestConv1DNWCInstances<ck::bhalf_t>(conv_ptrs); return test_conv1d_nwc_instances<ck::bhalf_t>(conv_ptrs);
} }
bool TestConv1DNWCF16Instances() bool test_conv1d_nwc_f16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(conv_ptrs); add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(conv_ptrs);
return TestConv1DNWCInstances<ck::half_t>(conv_ptrs); return test_conv1d_nwc_instances<ck::half_t>(conv_ptrs);
} }
bool TestConv1DNWCF32Instances() bool test_conv1d_nwc_f32_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(conv_ptrs); add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(conv_ptrs);
return TestConv1DNWCInstances<float>(conv_ptrs); return test_conv1d_nwc_instances<float>(conv_ptrs);
} }
bool TestConv1DNWCInt8Instances() bool test_conv1d_nwc_int8_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(conv_ptrs); add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(conv_ptrs);
return TestConv1DNWCInstances<int8_t>(conv_ptrs); return test_conv1d_nwc_instances<int8_t>(conv_ptrs);
} }
} // anonymous namespace } // anonymous namespace
...@@ -133,18 +136,20 @@ bool TestConv1DNWCInt8Instances() ...@@ -133,18 +136,20 @@ bool TestConv1DNWCInt8Instances()
int main() int main()
{ {
bool res{true}; bool res{true};
res = TestConv1DNWC(); res = test_conv1D_nwc();
std::cout << "TestConv1DNWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "test_conv1D_nwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv1DNWCBF16Instances(); res = test_conv1d_nwc_bf16_instances();
std::cout << "\nTestConv1DNWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\nTestConv1DNWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv1DNWCF16Instances(); res = test_conv1d_nwc_f16_instances();
std::cout << "\nTestConv1DNWCF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "\ntest_conv1d_nwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = TestConv1DNWCF32Instances(); << std::endl;
std::cout << "\nTestConv1DNWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; res = test_conv1d_nwc_f32_instances();
res = TestConv1DNWCInt8Instances(); std::cout << "\ntest_conv1d_nwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
std::cout << "\nTestConv1DNWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv1d_nwc_int8_instances();
std::cout << "\ntes_tconv1_dnw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
return res ? 0 : 1; return res ? 0 : 1;
......
...@@ -6,10 +6,11 @@ ...@@ -6,10 +6,11 @@
#include "data_type.hpp" #include "data_type.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "conv_test_util.hpp" #include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
#include "test_util.hpp" #include "check_err.hpp"
// Forward declarations for conv instances. // Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr = using DeviceConvFwdNoOpPtr =
...@@ -36,35 +37,35 @@ void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceC ...@@ -36,35 +37,35 @@ void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceC
namespace { namespace {
bool TestConv2DNHWC() bool test_conv2d_nhwc()
{ {
bool res{true}; bool res{true};
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
params.C = 4; params.C = 4;
params.input_spatial_lengths = std::vector<ck::index_t>{16, 16}; params.input_spatial_lengths = std::vector<ck::index_t>{16, 16};
params.conv_filter_strides = std::vector<ck::index_t>{1, 1}; params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
auto host_tensors = test::conv::GetHostTensors(params); auto host_tensors = ck::utils::conv::get_host_tensors(params);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors); Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<2>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
test::conv::RunConv<2>(params, input, weights, device_output); test::conv::RunConv<2>(params, input, weights, device_output);
res = res && res = res &&
test::check_err( ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f); device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res; return res;
} }
template <typename T> template <typename T>
bool TestConv2DNHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs) bool test_conv2d_nhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{ {
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 2; params.num_dim_spatial = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3}; params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths = std::vector<ck::index_t>{71, 71}; params.input_spatial_lengths = std::vector<ck::index_t>{71, 71};
...@@ -73,54 +74,55 @@ bool TestConv2DNHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs) ...@@ -73,54 +74,55 @@ bool TestConv2DNHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
params.input_left_pads = std::vector<ck::index_t>{1, 1}; params.input_left_pads = std::vector<ck::index_t>{1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1}; params.input_right_pads = std::vector<ck::index_t>{1, 1};
auto host_tensors = test::conv::GetHostTensors<T, auto host_tensors =
T, ck::utils::conv::get_host_tensors<T,
T, T,
ck::tensor_layout::convolution::NHWC, T,
ck::tensor_layout::convolution::KYXC, ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::NHWK>(params); ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors); const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors); const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors); Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors); Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<2>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
return test::conv::RunConvInstances<2>( return ck::utils::conv::run_convolution_forward_instances<2>(
params, conv_ptrs, input, weights, device_output, host_output); params, conv_ptrs, input, weights, device_output, host_output);
} }
bool TestConv2DNHWCBF16Instances() bool test_conv2d_nhwc_bf16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance:: ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs); add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
return TestConv2DNHWCInstances<ck::bhalf_t>(conv_ptrs); return test_conv2d_nhwc_instances<ck::bhalf_t>(conv_ptrs);
} }
bool TestConv2DNHWCF16Instances() bool test_conv2d_nhwc_f16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance:: ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs); add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
ck::tensor_operation::device::device_conv2d_fwd_instance:: ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(conv_ptrs); add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
return TestConv2DNHWCInstances<ck::half_t>(conv_ptrs); return test_conv2d_nhwc_instances<ck::half_t>(conv_ptrs);
} }
bool TestConv2DNHWCF32Instances() bool test_conv2d_nhwc_f32_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance:: ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs); add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
return TestConv2DNHWCInstances<float>(conv_ptrs); return test_conv2d_nhwc_instances<float>(conv_ptrs);
} }
bool TestConv2DNHWCInt8Instances() bool test_conv2d_nhwc_int8_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance:: ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs); add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
return TestConv2DNHWCInstances<int8_t>(conv_ptrs); return test_conv2d_nhwc_instances<int8_t>(conv_ptrs);
} }
} // anonymous namespace } // anonymous namespace
...@@ -128,19 +130,20 @@ bool TestConv2DNHWCInt8Instances() ...@@ -128,19 +130,20 @@ bool TestConv2DNHWCInt8Instances()
int main() int main()
{ {
bool res{true}; bool res{true};
res = TestConv2DNHWC(); res = test_conv2d_nhwc();
std::cout << "TestConv2DNHWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "test_conv2d_nhwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv2DNHWCBF16Instances(); res = test_conv2d_nhwc_bf16_instances();
std::cout << "\nTestConv2DNHWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv2d_nhwc_bf16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv2DNHWCF16Instances(); res = test_conv2d_nhwc_f16_instances();
std::cout << "\nTestConv2DNHWCF16Instances ....." << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "\ntest_conv2d_nhwc_f16_instances ....." << (res ? "SUCCESS" : "FAILURE")
res = TestConv2DNHWCF32Instances();
std::cout << "\nTestConv2DNHWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv2DNHWCInt8Instances(); res = test_conv2d_nhwc_f32_instances();
std::cout << "\nTestConv2DNHWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv2d_nhwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv2d_nhwc_int8_instances();
std::cout << "\ntest_conv2d_nhwc_int8_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
return res ? 0 : 1; return res ? 0 : 1;
......
...@@ -6,10 +6,11 @@ ...@@ -6,10 +6,11 @@
#include "data_type.hpp" #include "data_type.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "conv_test_util.hpp" #include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
#include "test_util.hpp" #include "check_err.hpp"
// Forward declarations for conv instances. // Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr = using DeviceConvFwdNoOpPtr =
...@@ -34,10 +35,10 @@ void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(std::vector<Devi ...@@ -34,10 +35,10 @@ void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(std::vector<Devi
namespace { namespace {
bool TestConv3DNDHWC() bool test_conv3d_ndhwc()
{ {
bool res{true}; bool res{true};
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3; params.num_dim_spatial = 3;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
...@@ -49,30 +50,31 @@ bool TestConv3DNDHWC() ...@@ -49,30 +50,31 @@ bool TestConv3DNDHWC()
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = test::conv::GetHostTensors<float, auto host_tensors =
float, ck::utils::conv::get_host_tensors<float,
float, float,
ck::tensor_layout::convolution::NDHWC, float,
ck::tensor_layout::convolution::KZYXC, ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::NDHWK>(params); ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors); Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<3>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
test::conv::RunConv<3>(params, input, weights, device_output); test::conv::RunConv<3>(params, input, weights, device_output);
res = res && res = res &&
test::check_err( ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f); device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res; return res;
} }
bool TestConv3DNDHWC2GBInput() bool test_conv3d_ndhwc_2gb_input()
{ {
// >2GB Input // >2GB Input
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3; params.num_dim_spatial = 3;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
...@@ -85,12 +87,12 @@ bool TestConv3DNDHWC2GBInput() ...@@ -85,12 +87,12 @@ bool TestConv3DNDHWC2GBInput()
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = auto host_tensors =
test::conv::GetHostTensors<float, ck::utils::conv::get_host_tensors<float,
float, float,
float, float,
ck::tensor_layout::convolution::NDHWC, ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC, ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false); ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
...@@ -113,10 +115,10 @@ bool TestConv3DNDHWC2GBInput() ...@@ -113,10 +115,10 @@ bool TestConv3DNDHWC2GBInput()
return false; return false;
} }
bool TestConv3DNDHWC2GBFilters() bool test_conv3d_ndhwc_2gb_filters()
{ {
// >2GB Filters // >2GB Filters
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3; params.num_dim_spatial = 3;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
...@@ -129,12 +131,12 @@ bool TestConv3DNDHWC2GBFilters() ...@@ -129,12 +131,12 @@ bool TestConv3DNDHWC2GBFilters()
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = auto host_tensors =
test::conv::GetHostTensors<float, ck::utils::conv::get_host_tensors<float,
float, float,
float, float,
ck::tensor_layout::convolution::NDHWC, ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC, ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false); ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
...@@ -157,10 +159,10 @@ bool TestConv3DNDHWC2GBFilters() ...@@ -157,10 +159,10 @@ bool TestConv3DNDHWC2GBFilters()
return false; return false;
} }
bool TestConv3DNDHWC2GBOutput() bool test_conv3d_ndhwc_2gb_output()
{ {
// >2GB Output // >2GB Output
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3; params.num_dim_spatial = 3;
params.N = 2; params.N = 2;
params.K = 16; params.K = 16;
...@@ -173,12 +175,12 @@ bool TestConv3DNDHWC2GBOutput() ...@@ -173,12 +175,12 @@ bool TestConv3DNDHWC2GBOutput()
params.input_right_pads = std::vector<ck::index_t>{2, 2, 2}; params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
auto host_tensors = auto host_tensors =
test::conv::GetHostTensors<float, ck::utils::conv::get_host_tensors<float,
float, float,
float, float,
ck::tensor_layout::convolution::NDHWC, ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC, ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false); ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors); const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors); const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors); Tensor<float>& device_output = std::get<3>(host_tensors);
...@@ -202,9 +204,9 @@ bool TestConv3DNDHWC2GBOutput() ...@@ -202,9 +204,9 @@ bool TestConv3DNDHWC2GBOutput()
} }
template <typename T> template <typename T>
bool TestConv3DNDHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs) bool test_conv3d_ndhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{ {
ck::conv_util::ConvParams params; ck::utils::conv::ConvParams params;
params.N = 64; params.N = 64;
params.num_dim_spatial = 3; params.num_dim_spatial = 3;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 2}; params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 2};
...@@ -214,52 +216,53 @@ bool TestConv3DNDHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs ...@@ -214,52 +216,53 @@ bool TestConv3DNDHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = test::conv::GetHostTensors<T, auto host_tensors =
T, ck::utils::conv::get_host_tensors<T,
T, T,
ck::tensor_layout::convolution::NDHWC, T,
ck::tensor_layout::convolution::KZYXC, ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::NDHWK>(params); ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors); const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors); const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors); Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors); Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<3>(params, input, weights, host_output); ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
return test::conv::RunConvInstances<3>( return ck::utils::conv::run_convolution_forward_instances<3>(
params, conv_ptrs, input, weights, device_output, host_output); params, conv_ptrs, input, weights, device_output, host_output);
} }
bool TestConv3DNDHWCBF16Instances() bool test_conv3d_ndhwc_bf16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance:: ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances(conv_ptrs); add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances(conv_ptrs);
return TestConv3DNDHWCInstances<ck::bhalf_t>(conv_ptrs); return test_conv3d_ndhwc_instances<ck::bhalf_t>(conv_ptrs);
} }
bool TestConv3DNDHWCF16Instances() bool test_conv3d_ndhwc_f16_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance:: ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances(conv_ptrs); add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances(conv_ptrs);
return TestConv3DNDHWCInstances<ck::half_t>(conv_ptrs); return test_conv3d_ndhwc_instances<ck::half_t>(conv_ptrs);
} }
bool TestConv3DNDHWCF32Instances() bool test_conv3d_ndhwc_f32_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance:: ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances(conv_ptrs); add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances(conv_ptrs);
return TestConv3DNDHWCInstances<float>(conv_ptrs); return test_conv3d_ndhwc_instances<float>(conv_ptrs);
} }
bool TestConv3DNDHWCInt8Instances() bool test_conv3d_ndhwc_int8_instances()
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance:: ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(conv_ptrs); add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(conv_ptrs);
return TestConv3DNDHWCInstances<int8_t>(conv_ptrs); return test_conv3d_ndhwc_instances<int8_t>(conv_ptrs);
} }
} // anonymous namespace } // anonymous namespace
...@@ -267,27 +270,30 @@ bool TestConv3DNDHWCInt8Instances() ...@@ -267,27 +270,30 @@ bool TestConv3DNDHWCInt8Instances()
int main() int main()
{ {
bool res{true}; bool res{true};
res = TestConv3DNDHWC(); res = test_conv3d_ndhwc();
std::cout << "TestConv3DNDHWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "test_conv3d_ndhwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv3DNDHWC2GBInput(); res = test_conv3d_ndhwc_2gb_input();
std::cout << "\nTestConv3DNDHWC2GBInput ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << "\ntest_conv3d_ndhwc_2gb_input ..... " << (res ? "SUCCESS" : "FAILURE")
res = TestConv3DNDHWC2GBFilters(); << std::endl;
std::cout << "\nTestConv3DNDHWC2GBFilters ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; res = test_conv3d_ndhwc_2gb_filters();
res = TestConv3DNDHWC2GBOutput(); std::cout << "\ntest_conv3d_ndhwc_2gb_filters ..... " << (res ? "SUCCESS" : "FAILURE")
std::cout << "\nTestConv3DNDHWC2GBOutput ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; << std::endl;
res = test_conv3d_ndhwc_2gb_output();
std::cout << "\ntest_conv3d_ndhwc_2gb_output ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv3DNDHWCBF16Instances(); res = test_conv3d_ndhwc_bf16_instances();
std::cout << "\nTestConv3DNDHWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv3d_ndhwc_bf16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv3DNDHWCF16Instances(); res = test_conv3d_ndhwc_f16_instances();
std::cout << "\nTestConv3DNDHWCF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv3d_ndhwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv3DNDHWCF32Instances(); res = test_conv3d_ndhwc_f32_instances();
std::cout << "\nTestConv3DNDHWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv3d_ndhwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
res = TestConv3DNDHWCInt8Instances(); res = test_conv3d_ndhwc_int8_instances();
std::cout << "\nTestConv3DNDHWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE") std::cout << "\ntest_conv3d_ndhw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl; << std::endl;
return res ? 0 : 1; return res ? 0 : 1;
......
#ifndef TEST_CONV_UTIL_HPP
#define TEST_CONV_UTIL_HPP
#include <tuple>
#include "config.hpp"
#include "conv_fwd_util.hpp"
#include "device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "sequence.hpp"
namespace {
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device::
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
// clang-format off
InDataType, //
WeiDataType, //
OutDataType, //
InDataType, //
InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation
ConvFwdDefault, // ConvForwardSpecialization
SpatialDims, // SptialDims
64, // BlockSize
16, // MPerBlock
16, // NPerBlock
4, // K0PerBlock
1, // K1
16, // MPerXDL
16, // NPerXDL
1, // MXdlPerWave
1, // NXdlPerWave
S<1, 16, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
1, // ABlockTransferSrcScalarPerVector
1, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcScalarPerVector
1, // BBlockTransferDstScalarPerVector_K1
true, // BBlockTransferAddExtraN
7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector
// clang-format on
} // namespace
namespace test {
namespace conv {
template <ck::index_t NDim,
typename InDataType = float,
typename WeiDataType = float,
typename OutDataType = float>
void RunConv(const ck::utils::conv::ConvParams& params,
const Tensor<InDataType>& input,
const Tensor<WeiDataType>& weights,
Tensor<OutDataType>& output)
{
ck::utils::conv::run_convolution_forward<NDim,
InDataType,
WeiDataType,
OutDataType,
DeviceConvNDFwdInstance>(
params, input, weights, output);
}
} // namespace conv
} // namespace test
#endif
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment