Commit 07a673c6 authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into cpu_avx2

parents c0f698d5 ac0d8066
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_bwd_weight_impl.hpp"
enum struct ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3
};
enum struct ConvInputLayout
{
NCHW, // 0
NHWC, // 1
};
enum struct ConvWeightLayout
{
KCYX, // 0
KYXC, // 1
};
enum struct ConvOutputLayout
{
NKHW, // 0
NHWK, // 1
};
int profile_conv_bwd_weight(int argc, char* argv[])
{
if(argc != 26)
{
printf("arg1: tensor operation (conv_fwd: ForwardConvolution)\n");
printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
printf("arg5: output tensor layout (0: NKHW; 1: NHWK)\n");
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
printf("arg25: split k (>=1)\n");
exit(1);
}
const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const auto in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const auto wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const auto out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
const ck::index_t C = std::stoi(argv[12]);
const ck::index_t Y = std::stoi(argv[13]);
const ck::index_t X = std::stoi(argv[14]);
const ck::index_t Hi = std::stoi(argv[15]);
const ck::index_t Wi = std::stoi(argv[16]);
const ck::index_t conv_stride_h = std::stoi(argv[17]);
const ck::index_t conv_stride_w = std::stoi(argv[18]);
const ck::index_t conv_dilation_h = std::stoi(argv[19]);
const ck::index_t conv_dilation_w = std::stoi(argv[20]);
const ck::index_t in_left_pad_h = std::stoi(argv[21]);
const ck::index_t in_left_pad_w = std::stoi(argv[22]);
const ck::index_t in_right_pad_h = std::stoi(argv[23]);
const ck::index_t in_right_pad_w = std::stoi(argv[24]);
ck::index_t split_k = std::stoi(argv[25]);
split_k = std::max(1, split_k);
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
if(data_type == ConvDataType::F32_F32_F32 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w},
split_k);
}
else if(data_type == ConvDataType::F16_F16_F16 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w},
split_k);
}
else
{
throw std::runtime_error("wrong! this Conv data_type & layout is not implemented");
}
return 1;
}
......@@ -7,7 +7,7 @@
#include "profile_convnd_bwd_data_impl.hpp"
enum ConvDataType
enum struct ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
......@@ -15,27 +15,27 @@ enum ConvDataType
INT8_INT8_INT8, // 3
};
enum ConvInputLayout
enum struct ConvInputLayout
{
NCHW, // 0
NHWC, // 1
};
enum ConvWeightLayout
enum struct ConvWeightLayout
{
KCYX, // 0
KYXC, // 1
};
enum ConvOutputLayout
enum struct ConvOutputLayout
{
NKHW, // 0
NHWK, // 1
};
ck::conv_util::ConvParams parse_conv_params(int num_dim_spatial, char* argv[], int arg_idx)
ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[], int arg_idx)
{
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = num_dim_spatial;
params.N = std::stoi(argv[arg_idx++]);
......@@ -97,16 +97,16 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
return 1;
}
const int data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const int in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const int wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const int out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const auto in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const auto wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const auto out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
ck::conv_util::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
auto Run = [&](auto input_type, auto wei_type, auto out_type, auto acc_type) {
using InDataType = decltype(input_type);
......
......@@ -8,7 +8,7 @@
int profile_gemm_reduce(int argc, char* argv[])
{
enum struct GemmMatrixLayout_t
enum struct GemmMatrixLayout
{
MK_KN_MN, // 0
MK_NK_MN, // 1
......@@ -16,7 +16,7 @@ int profile_gemm_reduce(int argc, char* argv[])
KM_NK_MN, // 3
};
enum struct GemmReduceDataType_t
enum struct GemmReduceDataType
{
F32_F32_F32_F32_F32, // 0
F16_F16_F16_F32_F32, // 1
......@@ -39,8 +39,8 @@ int profile_gemm_reduce(int argc, char* argv[])
exit(1);
}
const auto data_type = static_cast<GemmReduceDataType_t>(std::stoi(argv[2]));
const auto layout = static_cast<GemmMatrixLayout_t>(std::stoi(argv[3]));
const auto data_type = static_cast<GemmReduceDataType>(std::stoi(argv[2]));
const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
......@@ -54,8 +54,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const int StrideB = std::stoi(argv[12]);
const int StrideC = std::stoi(argv[13]);
if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::MK_KN_MN)
if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
{
ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -75,8 +74,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? N : StrideB,
(StrideC < 0) ? N : StrideC);
}
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::MK_NK_MN)
else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout::MK_NK_MN)
{
ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -96,8 +95,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? K : StrideB,
(StrideC < 0) ? N : StrideC);
}
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::KM_KN_MN)
else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout::KM_KN_MN)
{
ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -117,8 +116,8 @@ int profile_gemm_reduce(int argc, char* argv[])
(StrideB < 0) ? N : StrideB,
(StrideC < 0) ? N : StrideC);
}
else if(data_type == GemmReduceDataType_t::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout_t::KM_NK_MN)
else if(data_type == GemmReduceDataType::F16_F16_F16_F32_F32 &&
layout == GemmMatrixLayout::KM_NK_MN)
{
ck::profiler::profile_gemm_reduce_impl<ck::half_t,
ck::half_t,
......
......@@ -6,7 +6,7 @@
#include <half.hpp>
#include "profile_grouped_gemm_impl.hpp"
enum GemmMatrixLayout
enum struct GemmMatrixLayout
{
MK_KN_MN, // 0
MK_NK_MN, // 1
......@@ -18,7 +18,7 @@ enum GemmMatrixLayout
KM_NK_NM, // 7
};
enum GemmDataType
enum struct GemmDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
......@@ -61,8 +61,8 @@ int profile_grouped_gemm(int argc, char* argv[])
exit(1);
}
const int data_type = static_cast<GemmDataType>(std::stoi(argv[2]));
const int layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const auto data_type = static_cast<GemmDataType>(std::stoi(argv[2]));
const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
......
......@@ -20,9 +20,9 @@
using namespace std;
using ck::NanPropagation_t;
using ck::ReduceTensorIndices_t;
using ck::ReduceTensorOp_t;
using ck::NanPropagation;
using ck::ReduceTensorIndices;
using ck::ReduceTensorOp;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
{"reduceDims", required_argument, nullptr, 'R'},
......@@ -84,7 +84,7 @@ static std::vector<T> getTypeValuesFromString(const char* cstr_values)
return (values);
}
enum struct appDataType_t
enum struct AppDataType
{
appHalf = 0,
appFloat = 1,
......@@ -130,18 +130,18 @@ class AppArgs
std::vector<float> scales;
ReduceTensorOp_t reduceOp = ReduceTensorOp_t::ADD;
appDataType_t compTypeId = appDataType_t::appFloat;
appDataType_t outTypeId = appDataType_t::appFloat;
ReduceTensorOp reduceOp = ReduceTensorOp::ADD;
AppDataType compTypeId = AppDataType::appFloat;
AppDataType outTypeId = AppDataType::appFloat;
bool compType_assigned = false;
bool outType_assigned = false;
NanPropagation_t nanOpt = NanPropagation_t::NOT_PROPAGATE_NAN;
ReduceTensorIndices_t indicesOpt = ReduceTensorIndices_t::NO_INDICES;
bool do_log = false;
bool do_verification = false;
bool do_dumpout = false;
NanPropagation nanOpt = NanPropagation::NOT_PROPAGATE_NAN;
ReduceTensorIndices indicesOpt = ReduceTensorIndices::NO_INDICES;
bool do_log = false;
bool do_verification = false;
bool do_dumpout = false;
int init_method;
int nrepeat;
......@@ -213,33 +213,33 @@ class AppArgs
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceOp = static_cast<ReduceTensorOp_t>(std::atoi(optarg));
reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
break;
case 'C':
if(!optarg)
throw std::runtime_error("Invalid option format!");
compTypeId = static_cast<appDataType_t>(std::atoi(optarg));
compTypeId = static_cast<AppDataType>(std::atoi(optarg));
compType_assigned = true;
break;
case 'W':
if(!optarg)
throw std::runtime_error("Invalid option format!");
outTypeId = static_cast<appDataType_t>(std::atoi(optarg));
outTypeId = static_cast<AppDataType>(std::atoi(optarg));
outType_assigned = true;
break;
case 'N':
if(!optarg)
throw std::runtime_error("Invalid option format!");
nanOpt = static_cast<NanPropagation_t>(std::atoi(optarg));
nanOpt = static_cast<NanPropagation>(std::atoi(optarg));
break;
case 'I':
if(!optarg)
throw std::runtime_error("Invalid option format!");
indicesOpt = static_cast<ReduceTensorIndices_t>(std::atoi(optarg));
indicesOpt = static_cast<ReduceTensorIndices>(std::atoi(optarg));
break;
case 'S':
if(!optarg)
......@@ -303,10 +303,10 @@ class AppArgs
scales.push_back(0.0f);
};
if(reduceOp == ReduceTensorOp_t::MIN || reduceOp == ReduceTensorOp_t::MAX ||
reduceOp == ReduceTensorOp_t::AMAX)
if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
reduceOp == ReduceTensorOp::AMAX)
{
if(indicesOpt != ReduceTensorIndices_t::NO_INDICES)
if(indicesOpt != ReduceTensorIndices::NO_INDICES)
need_indices = true;
// for indexable operations, no need to assign compType and outType, just let them be
......@@ -333,22 +333,22 @@ int profile_reduce(int argc, char* argv[])
check_reduce_dims(rank, args.reduceDims);
if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1)
if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");
if(args.use_half)
{
if(!args.compType_assigned)
args.compTypeId = appDataType_t::appHalf;
args.compTypeId = AppDataType::appHalf;
if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appHalf && args.outTypeId != appDataType_t::appFloat))
args.outTypeId = appDataType_t::appFloat;
(args.outTypeId != AppDataType::appHalf && args.outTypeId != AppDataType::appFloat))
args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appHalf;
args.outTypeId = AppDataType::appHalf;
if(args.compTypeId == appDataType_t::appHalf)
if(args.compTypeId == AppDataType::appHalf)
{
profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(args.do_verification,
args.init_method,
......@@ -363,7 +363,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appFloat)
else if(args.compTypeId == AppDataType::appFloat)
{
profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
args.init_method,
......@@ -399,16 +399,16 @@ int profile_reduce(int argc, char* argv[])
else if(args.use_int8)
{
if(!args.compType_assigned)
args.compTypeId = appDataType_t::appInt8;
args.compTypeId = AppDataType::appInt8;
if(args.outType_assigned &&
(args.outTypeId != appDataType_t::appInt8 && args.outTypeId != appDataType_t::appInt32))
args.outTypeId = appDataType_t::appInt32;
(args.outTypeId != AppDataType::appInt8 && args.outTypeId != AppDataType::appInt32))
args.outTypeId = AppDataType::appInt32;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appInt8;
args.outTypeId = AppDataType::appInt8;
if(args.compTypeId == appDataType_t::appInt8)
if(args.compTypeId == AppDataType::appInt8)
{
profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
args.init_method,
......@@ -423,7 +423,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appInt32)
else if(args.compTypeId == AppDataType::appInt32)
{
profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
args.init_method,
......@@ -443,12 +443,12 @@ int profile_reduce(int argc, char* argv[])
}
else if(args.use_bf16)
{
if(args.outType_assigned && (args.outTypeId != appDataType_t::appBFloat16 &&
args.outTypeId != appDataType_t::appFloat))
args.outTypeId = appDataType_t::appFloat;
if(args.outType_assigned &&
(args.outTypeId != AppDataType::appBFloat16 && args.outTypeId != AppDataType::appFloat))
args.outTypeId = AppDataType::appFloat;
if(!args.outType_assigned)
args.outTypeId = appDataType_t::appBFloat16;
args.outTypeId = AppDataType::appBFloat16;
profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
args.init_method,
......@@ -465,7 +465,7 @@ int profile_reduce(int argc, char* argv[])
}
else
{
if(args.compTypeId == appDataType_t::appFloat)
if(args.compTypeId == AppDataType::appFloat)
{
profile_reduce_impl<float, float, float>(args.do_verification,
args.init_method,
......@@ -480,7 +480,7 @@ int profile_reduce(int argc, char* argv[])
args.scales[0],
args.scales[1]);
}
else if(args.compTypeId == appDataType_t::appDouble)
else if(args.compTypeId == AppDataType::appDouble)
{
profile_reduce_impl<float, double, float>(args.do_verification,
args.init_method,
......
......@@ -17,6 +17,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int profile_convnd_bwd_data(int, char*[], int);
int profile_reduce(int, char*[]);
int profile_conv_bwd_weight(int, char*[]);
int profile_batched_gemm_reduce(int, char*[]);
int main(int argc, char* argv[])
......@@ -89,6 +90,10 @@ int main(int argc, char* argv[])
{
return profile_reduce(argc, argv);
}
else if(strcmp(argv[1], "conv2d_bwd_weight") == 0)
{
return profile_conv_bwd_weight(argc, argv);
}
else
{
// clang-format off
......@@ -97,7 +102,7 @@ int main(int argc, char* argv[])
" gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped Gemm\n"
" grouped_gemm: Grouped GEMM\n"
" conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
......@@ -106,10 +111,9 @@ int main(int argc, char* argv[])
" conv1d_bwd_data: BackwardConvolution data 1 dim\n"
" conv2d_bwd_data: BackwardConvolution data 2 dim\n"
" conv3d_bwd_data: BackwardConvolution data 3 dim\n"
" grouped_gemm: Grouped Gemm\n"
" reduce: REDUCE\n");
" reduce: REDUCE\n"
" conv2d_bwd_weight: Backward Weight Convolution 2d\n");
// clang-format on
return 0;
}
return 0;
}
......@@ -10,9 +10,11 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 --amdgpu-target=gfx908 -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only " \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE}
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
......@@ -20,6 +20,7 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/cpu
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/gpu
${PROJECT_SOURCE_DIR}/library/include/ck/library/utility
${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half
......@@ -49,6 +50,8 @@ add_subdirectory(batched_gemm_reduce)
add_subdirectory(grouped_gemm)
add_subdirectory(convnd_fwd)
add_subdirectory(reduce)
add_subdirectory(conv2d_bwd_weight)
add_subdirectory(cpu_ukernel)
add_subdirectory(cpu_threadwise_transfer)
add_subdirectory(convnd_fwd_cpu)
#include "profile_batched_gemm_impl.hpp"
#include <iostream>
#include "profile_batched_gemm_impl.hpp"
namespace {
using ADataType = ck::half_t;
using BDataType = ck::half_t;
......
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor)
target_link_libraries(test_conv2d_bwd_weight PRIVATE device_conv2d_bwd_weight_instance)
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector>
#include "conv_fwd_util.hpp"
#include "profile_conv_bwd_weight_impl.hpp"
int test_self()
{
bool pass = true;
std::vector<ck::utils::conv::ConvParams> params;
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
2);
// fp16
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
2);
}
return pass;
}
int main(int argc, char* argv[])
{
int data_type = 0;
int init_method = 0;
// Conv shape
ck::index_t N = 128;
ck::index_t K = 256;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t conv_stride_h = 2;
ck::index_t conv_stride_w = 2;
ck::index_t conv_dilation_h = 1;
ck::index_t conv_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t split_k = 1;
bool pass = true;
if(argc == 1)
{
pass = test_self();
}
else
{
if(argc == 3)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
}
else if(argc == 19)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
N = std::stoi(argv[3]);
K = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
conv_stride_h = std::stoi(argv[10]);
conv_stride_w = std::stoi(argv[11]);
conv_dilation_h = std::stoi(argv[12]);
conv_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
split_k = std::stoi(argv[18]);
}
else
{
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
ck::utils::conv::ConvParams param{2,
N,
K,
C,
{Y, X},
{Hi, Wi},
{conv_stride_h, conv_stride_w},
{conv_dilation_h, conv_dilation_w},
{in_left_pad_h, in_left_pad_w},
{in_right_pad_h, in_right_pad_w}};
if(data_type == 0)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
split_k);
}
else if(data_type == 1)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
split_k);
}
else
{
std::cout << "Not support data type" << std::endl;
return 1;
}
}
if(pass)
{
std::cout << "test conv2d bwd weight : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test conv2d bwd weight: Fail " << std::endl;
return -1;
}
}
......@@ -3,13 +3,13 @@
#include <vector>
#include "config.hpp"
#include "conv_utils.hpp"
#include "conv_fwd_util.hpp"
#include "tensor_layout.hpp"
#include "test_util.hpp"
#include "check_err.hpp"
namespace {
bool TestConvParams_GetOutputSpatialLengths()
bool test_conv_params_get_output_spatial_lengths()
{
bool res{true};
// -------------------------- default 2D ------------------------------------
......@@ -18,28 +18,28 @@ bool TestConvParams_GetOutputSpatialLengths()
// stride {2,2},
// dilations {1,1},
// padding {{1,1}, {1,1}}
ck::conv_util::ConvParams conv_params;
ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3};
......@@ -47,9 +47,10 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.");
res =
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.");
// -------------------------- 1D ------------------------------------
conv_params.num_dim_spatial = 1;
......@@ -61,24 +62,25 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.");
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_strides = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.input_left_pads = std::vector<ck::index_t>{2};
conv_params.input_right_pads = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3};
......@@ -86,9 +88,9 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.");
// -------------------------- 3D ------------------------------------
conv_params.num_dim_spatial = 3;
......@@ -100,35 +102,35 @@ bool TestConvParams_GetOutputSpatialLengths()
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.");
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = test::check_err(
res = ck::utils::check_err(
out_spatial_len,
std::vector<ck::index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.");
......@@ -136,50 +138,54 @@ bool TestConvParams_GetOutputSpatialLengths()
return res;
}
bool TestGetHostTensorDescriptor()
bool test_get_host_tensor_descriptor()
{
bool res{true};
namespace tl = ck::tensor_layout::convolution;
std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NHWC{});
res = test::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!");
res = test::check_err(
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
res =
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCHW{});
res = test::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!");
res = test::check_err(
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
res =
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!");
dims = std::vector<std::size_t>{2, 3, 4};
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NWC{});
res = test::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!");
res = test::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!");
res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCW{});
res = test::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!");
res = test::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!");
res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!");
dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NDHWC{});
res = test::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!");
res = test::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
1, // C
3 * 5 * 6, // D
3 * 6, // H
3}, // W
"Error: wrong NDHWC dimensions strides!");
h = ck::conv_util::GetHostTensorDescriptor(dims, tl::NCDHW{});
res = test::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!");
res = test::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
4 * 5 * 6, // C
5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!");
res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
1, // C
3 * 5 * 6, // D
3 * 6, // H
3}, // W
"Error: wrong NDHWC dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!");
res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
4 * 5 * 6, // C
5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!");
return res;
}
......@@ -188,10 +194,11 @@ bool TestGetHostTensorDescriptor()
int main(void)
{
bool res = TestConvParams_GetOutputSpatialLengths();
std::cout << "TestConvParams_GetOutputSpatialLengths ..... " << (res ? "SUCCESS" : "FAILURE")
bool res = test_conv_params_get_output_spatial_lengths();
std::cout << "test_conv_params_get_output_spatial_lengths ..... "
<< (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_get_host_tensor_descriptor();
std::cout << "test_get_host_tensor_descriptor ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestGetHostTensorDescriptor();
std::cout << "TestGetHostTensorDescriptor ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
......@@ -12,7 +12,7 @@ int main()
{
bool pass = true;
// check 1d
std::vector<ck::conv_util::ConvParams> params;
std::vector<ck::utils::conv::ConvParams> params;
params.push_back({1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
params.push_back({1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
params.push_back({1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
......
......@@ -5,10 +5,11 @@
#include "data_type.hpp"
#include "element_wise_operation.hpp"
#include "conv_test_util.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "test_util.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
......@@ -34,10 +35,10 @@ void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(std::vector<DeviceConv
namespace {
bool TestConv1DNWC()
bool test_conv1D_nwc()
{
bool res{true};
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.N = 2;
params.K = 16;
......@@ -49,30 +50,31 @@ bool TestConv1DNWC()
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = test::conv::GetHostTensors<float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<1>(params, input, weights, host_output);
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
test::conv::RunConv<1>(params, input, weights, device_output);
res = res &&
test::check_err(
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res;
}
template <typename T>
bool TestConv1DNWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{71};
......@@ -81,51 +83,52 @@ bool TestConv1DNWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = test::conv::GetHostTensors<T,
T,
T,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<1>(params, input, weights, host_output);
return test::conv::RunConvInstances<1>(
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<1>(
params, conv_ptrs, input, weights, device_output, host_output);
}
bool TestConv1DNWCBF16Instances()
bool test_conv1d_nwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(conv_ptrs);
return TestConv1DNWCInstances<ck::bhalf_t>(conv_ptrs);
return test_conv1d_nwc_instances<ck::bhalf_t>(conv_ptrs);
}
bool TestConv1DNWCF16Instances()
bool test_conv1d_nwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(conv_ptrs);
return TestConv1DNWCInstances<ck::half_t>(conv_ptrs);
return test_conv1d_nwc_instances<ck::half_t>(conv_ptrs);
}
bool TestConv1DNWCF32Instances()
bool test_conv1d_nwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(conv_ptrs);
return TestConv1DNWCInstances<float>(conv_ptrs);
return test_conv1d_nwc_instances<float>(conv_ptrs);
}
bool TestConv1DNWCInt8Instances()
bool test_conv1d_nwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(conv_ptrs);
return TestConv1DNWCInstances<int8_t>(conv_ptrs);
return test_conv1d_nwc_instances<int8_t>(conv_ptrs);
}
} // anonymous namespace
......@@ -133,18 +136,20 @@ bool TestConv1DNWCInt8Instances()
int main()
{
bool res{true};
res = TestConv1DNWC();
std::cout << "TestConv1DNWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv1D_nwc();
std::cout << "test_conv1D_nwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv1DNWCBF16Instances();
res = test_conv1d_nwc_bf16_instances();
std::cout << "\nTestConv1DNWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv1DNWCF16Instances();
std::cout << "\nTestConv1DNWCF16Instances ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv1DNWCF32Instances();
std::cout << "\nTestConv1DNWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv1DNWCInt8Instances();
std::cout << "\nTestConv1DNWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv1d_nwc_f16_instances();
std::cout << "\ntest_conv1d_nwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_f32_instances();
std::cout << "\ntest_conv1d_nwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_int8_instances();
std::cout << "\ntes_tconv1_dnw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
......
......@@ -6,10 +6,11 @@
#include "data_type.hpp"
#include "element_wise_operation.hpp"
#include "conv_test_util.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "test_util.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
......@@ -36,35 +37,35 @@ void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceC
namespace {
bool TestConv2DNHWC()
bool test_conv2d_nhwc()
{
bool res{true};
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.N = 2;
params.K = 16;
params.C = 4;
params.input_spatial_lengths = std::vector<ck::index_t>{16, 16};
params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
auto host_tensors = test::conv::GetHostTensors(params);
auto host_tensors = ck::utils::conv::get_host_tensors(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<2>(params, input, weights, host_output);
ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
test::conv::RunConv<2>(params, input, weights, device_output);
res = res &&
test::check_err(
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res;
}
template <typename T>
bool TestConv2DNHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv2d_nhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths = std::vector<ck::index_t>{71, 71};
......@@ -73,54 +74,55 @@ bool TestConv2DNHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
params.input_left_pads = std::vector<ck::index_t>{1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1};
auto host_tensors = test::conv::GetHostTensors<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(params);
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<2>(params, input, weights, host_output);
return test::conv::RunConvInstances<2>(
ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<2>(
params, conv_ptrs, input, weights, device_output, host_output);
}
bool TestConv2DNHWCBF16Instances()
bool test_conv2d_nhwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
return TestConv2DNHWCInstances<ck::bhalf_t>(conv_ptrs);
return test_conv2d_nhwc_instances<ck::bhalf_t>(conv_ptrs);
}
bool TestConv2DNHWCF16Instances()
bool test_conv2d_nhwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
return TestConv2DNHWCInstances<ck::half_t>(conv_ptrs);
return test_conv2d_nhwc_instances<ck::half_t>(conv_ptrs);
}
bool TestConv2DNHWCF32Instances()
bool test_conv2d_nhwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
return TestConv2DNHWCInstances<float>(conv_ptrs);
return test_conv2d_nhwc_instances<float>(conv_ptrs);
}
bool TestConv2DNHWCInt8Instances()
bool test_conv2d_nhwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
return TestConv2DNHWCInstances<int8_t>(conv_ptrs);
return test_conv2d_nhwc_instances<int8_t>(conv_ptrs);
}
} // anonymous namespace
......@@ -128,19 +130,20 @@ bool TestConv2DNHWCInt8Instances()
int main()
{
bool res{true};
res = TestConv2DNHWC();
std::cout << "TestConv2DNHWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv2d_nhwc();
std::cout << "test_conv2d_nhwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv2DNHWCBF16Instances();
std::cout << "\nTestConv2DNHWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv2d_nhwc_bf16_instances();
std::cout << "\ntest_conv2d_nhwc_bf16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv2DNHWCF16Instances();
std::cout << "\nTestConv2DNHWCF16Instances ....." << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv2DNHWCF32Instances();
std::cout << "\nTestConv2DNHWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv2d_nhwc_f16_instances();
std::cout << "\ntest_conv2d_nhwc_f16_instances ....." << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv2DNHWCInt8Instances();
std::cout << "\nTestConv2DNHWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv2d_nhwc_f32_instances();
std::cout << "\ntest_conv2d_nhwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv2d_nhwc_int8_instances();
std::cout << "\ntest_conv2d_nhwc_int8_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
......
......@@ -6,10 +6,11 @@
#include "data_type.hpp"
#include "element_wise_operation.hpp"
#include "conv_test_util.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "test_util.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
......@@ -34,10 +35,10 @@ void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(std::vector<Devi
namespace {
bool TestConv3DNDHWC()
bool test_conv3d_ndhwc()
{
bool res{true};
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -49,30 +50,31 @@ bool TestConv3DNDHWC()
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = test::conv::GetHostTensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<3>(params, input, weights, host_output);
ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
test::conv::RunConv<3>(params, input, weights, device_output);
res = res &&
test::check_err(
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res;
}
bool TestConv3DNDHWC2GBInput()
bool test_conv3d_ndhwc_2gb_input()
{
// >2GB Input
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -85,12 +87,12 @@ bool TestConv3DNDHWC2GBInput()
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
test::conv::GetHostTensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
......@@ -113,10 +115,10 @@ bool TestConv3DNDHWC2GBInput()
return false;
}
bool TestConv3DNDHWC2GBFilters()
bool test_conv3d_ndhwc_2gb_filters()
{
// >2GB Filters
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -129,12 +131,12 @@ bool TestConv3DNDHWC2GBFilters()
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
test::conv::GetHostTensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
......@@ -157,10 +159,10 @@ bool TestConv3DNDHWC2GBFilters()
return false;
}
bool TestConv3DNDHWC2GBOutput()
bool test_conv3d_ndhwc_2gb_output()
{
// >2GB Output
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -173,12 +175,12 @@ bool TestConv3DNDHWC2GBOutput()
params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
auto host_tensors =
test::conv::GetHostTensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
......@@ -202,9 +204,9 @@ bool TestConv3DNDHWC2GBOutput()
}
template <typename T>
bool TestConv3DNDHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv3d_ndhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
{
ck::conv_util::ConvParams params;
ck::utils::conv::ConvParams params;
params.N = 64;
params.num_dim_spatial = 3;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 2};
......@@ -214,52 +216,53 @@ bool TestConv3DNDHWCInstances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors = test::conv::GetHostTensors<T,
T,
T,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
test::conv::RunReferenceConv<3>(params, input, weights, host_output);
return test::conv::RunConvInstances<3>(
ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<3>(
params, conv_ptrs, input, weights, device_output, host_output);
}
bool TestConv3DNDHWCBF16Instances()
bool test_conv3d_ndhwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances(conv_ptrs);
return TestConv3DNDHWCInstances<ck::bhalf_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::bhalf_t>(conv_ptrs);
}
bool TestConv3DNDHWCF16Instances()
bool test_conv3d_ndhwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances(conv_ptrs);
return TestConv3DNDHWCInstances<ck::half_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::half_t>(conv_ptrs);
}
bool TestConv3DNDHWCF32Instances()
bool test_conv3d_ndhwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances(conv_ptrs);
return TestConv3DNDHWCInstances<float>(conv_ptrs);
return test_conv3d_ndhwc_instances<float>(conv_ptrs);
}
bool TestConv3DNDHWCInt8Instances()
bool test_conv3d_ndhwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(conv_ptrs);
return TestConv3DNDHWCInstances<int8_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<int8_t>(conv_ptrs);
}
} // anonymous namespace
......@@ -267,27 +270,30 @@ bool TestConv3DNDHWCInt8Instances()
int main()
{
bool res{true};
res = TestConv3DNDHWC();
std::cout << "TestConv3DNDHWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv3d_ndhwc();
std::cout << "test_conv3d_ndhwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv3DNDHWC2GBInput();
std::cout << "\nTestConv3DNDHWC2GBInput ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv3DNDHWC2GBFilters();
std::cout << "\nTestConv3DNDHWC2GBFilters ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = TestConv3DNDHWC2GBOutput();
std::cout << "\nTestConv3DNDHWC2GBOutput ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv3d_ndhwc_2gb_input();
std::cout << "\ntest_conv3d_ndhwc_2gb_input ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv3d_ndhwc_2gb_filters();
std::cout << "\ntest_conv3d_ndhwc_2gb_filters ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv3d_ndhwc_2gb_output();
std::cout << "\ntest_conv3d_ndhwc_2gb_output ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv3DNDHWCBF16Instances();
std::cout << "\nTestConv3DNDHWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv3d_ndhwc_bf16_instances();
std::cout << "\ntest_conv3d_ndhwc_bf16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv3DNDHWCF16Instances();
std::cout << "\nTestConv3DNDHWCF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv3d_ndhwc_f16_instances();
std::cout << "\ntest_conv3d_ndhwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv3DNDHWCF32Instances();
std::cout << "\nTestConv3DNDHWCF32Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv3d_ndhwc_f32_instances();
std::cout << "\ntest_conv3d_ndhwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = TestConv3DNDHWCInt8Instances();
std::cout << "\nTestConv3DNDHWCInt8Instances ..... " << (res ? "SUCCESS" : "FAILURE")
res = test_conv3d_ndhwc_int8_instances();
std::cout << "\ntest_conv3d_ndhw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
......
#ifndef TEST_CONV_UTIL_HPP
#define TEST_CONV_UTIL_HPP
#include <tuple>
#include "config.hpp"
#include "conv_fwd_util.hpp"
#include "device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "sequence.hpp"
namespace {
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device::
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
// clang-format off
InDataType, //
WeiDataType, //
OutDataType, //
InDataType, //
InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation
ConvFwdDefault, // ConvForwardSpecialization
SpatialDims, // SptialDims
64, // BlockSize
16, // MPerBlock
16, // NPerBlock
4, // K0PerBlock
1, // K1
16, // MPerXDL
16, // NPerXDL
1, // MXdlPerWave
1, // NXdlPerWave
S<1, 16, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
1, // ABlockTransferSrcScalarPerVector
1, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcScalarPerVector
1, // BBlockTransferDstScalarPerVector_K1
true, // BBlockTransferAddExtraN
7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector
// clang-format on
} // namespace
namespace test {
namespace conv {
template <ck::index_t NDim,
typename InDataType = float,
typename WeiDataType = float,
typename OutDataType = float>
void RunConv(const ck::utils::conv::ConvParams& params,
const Tensor<InDataType>& input,
const Tensor<WeiDataType>& weights,
Tensor<OutDataType>& output)
{
ck::utils::conv::run_convolution_forward<NDim,
InDataType,
WeiDataType,
OutDataType,
DeviceConvNDFwdInstance>(
params, input, weights, output);
}
} // namespace conv
} // namespace test
#endif
......@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
......@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
......@@ -19,7 +19,6 @@
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment