"...resnet50_tensorflow.git" did not exist on "d872cee2a854db6aa6da0cf0a7e4e5c13b736c84"
Commit d92fb7e8 authored by rocking's avatar rocking
Browse files

Merge commit 'a3c910ac' into gemm_softmax

parents bfc80764 a3c910ac
#pragma once
namespace ck {
namespace profiler {
int profile_convnd_fwd(int argc, char* argv[]);
} // namespace profiler
} // namespace ck
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "element_wise_reduce_operation.hpp" #include "reduction_operator.hpp"
#include "device_gemm_reduce.hpp" #include "device_gemm_reduce.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
...@@ -20,8 +20,7 @@ using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePt ...@@ -20,8 +20,7 @@ using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePt
ck::tensor_operation::element_wise::PassThrough, ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::ReduceSum, ck::tensor_operation::element_wise::UnarySquare<float, float, false>>;
ck::tensor_operation::element_wise::ReduceSquareSum>;
void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances( void add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances(
std::vector<DeviceGemmReduceNoOpPtr>&); std::vector<DeviceGemmReduceNoOpPtr>&);
...@@ -113,17 +112,19 @@ bool profile_gemm_reduce_impl(int do_verification, ...@@ -113,17 +112,19 @@ bool profile_gemm_reduce_impl(int do_verification,
b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5}, num_thread); b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5}, num_thread);
} }
using AElementOp = ck::tensor_operation::element_wise::PassThrough; using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using BElementOp = ck::tensor_operation::element_wise::PassThrough; using BElementOp = ck::tensor_operation::element_wise::PassThrough;
using CElementOp = ck::tensor_operation::element_wise::PassThrough; using CElementOp = ck::tensor_operation::element_wise::PassThrough;
using D0ReduceOp = ck::tensor_operation::element_wise::ReduceSum; using D0ReduceOp = ck::reduce::Add<float>;
using D1ReduceOp = ck::tensor_operation::element_wise::ReduceSquareSum; using D1ReduceOp = ck::reduce::Add<float>;
using D1ElementOp = ck::tensor_operation::element_wise::UnarySquare<float, float, false>;
const auto a_element_op = AElementOp{}; const auto a_element_op = AElementOp{};
const auto b_element_op = BElementOp{}; const auto b_element_op = BElementOp{};
const auto c_element_op = CElementOp{}; const auto c_element_op = CElementOp{};
const auto d0_reduce_op = D0ReduceOp{}; const auto d0_reduce_op = D0ReduceOp{};
const auto d1_reduce_op = D1ReduceOp{}; const auto d1_reduce_op = D1ReduceOp{};
const auto d1_element_op = D1ElementOp{};
if(do_verification) if(do_verification)
{ {
...@@ -140,17 +141,21 @@ bool profile_gemm_reduce_impl(int do_verification, ...@@ -140,17 +141,21 @@ bool profile_gemm_reduce_impl(int do_verification,
for(int m = 0; m < M; ++m) for(int m = 0; m < M; ++m)
{ {
float d0_acc = d0_reduce_op.GetReduceZeroValue(); float d0_acc = d0_reduce_op.GetReductionZeroVal();
float d1_acc = d1_reduce_op.GetReduceZeroValue(); float d1_acc = d1_reduce_op.GetReductionZeroVal();
for(int n = 0; n < N; ++n) for(int n = 0; n < N; ++n)
{ {
d0_reduce_op.Reduce(d0_acc, c_m_n_host_result(m, n)); float d0_val = ck::type_convert<float>(c_m_n_host_result(m, n));
d1_reduce_op.Reduce(d1_acc, c_m_n_host_result(m, n)); float d1_val;
d1_element_op(d1_val, d0_val);
d0_reduce_op(d0_acc, d0_val);
d1_reduce_op(d1_acc, d1_val);
} }
d0_m_host_result(m) = d0_acc; d0_m_host_result(m) = ck::type_convert<DDataType>(d0_acc);
d1_m_host_result(m) = d1_acc; d1_m_host_result(m) = ck::type_convert<DDataType>(d1_acc);
} }
} }
...@@ -232,8 +237,7 @@ bool profile_gemm_reduce_impl(int do_verification, ...@@ -232,8 +237,7 @@ bool profile_gemm_reduce_impl(int do_verification,
a_element_op, a_element_op,
b_element_op, b_element_op,
c_element_op, c_element_op,
d0_reduce_op, d1_element_op);
d1_reduce_op);
auto invoker_ptr = gemm_ptr->MakeInvokerPointer(); auto invoker_ptr = gemm_ptr->MakeInvokerPointer();
......
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_fwd_impl.hpp"
enum struct ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3
};
enum struct ConvInputLayout
{
NCHW, // 0
NHWC, // 1
};
enum struct ConvWeightLayout
{
KCYX, // 0
KYXC, // 1
};
enum struct ConvOutputLayout
{
NKHW, // 0
NHWK, // 1
};
int profile_conv_fwd(int argc, char* argv[])
{
if(argc != 25)
{
printf("arg1: tensor operation (conv_fwd: ForwardConvolution)\n");
printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
printf("arg5: output tensor layout (0: NKHW; 1: NHWK)\n");
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: run kernel # of times (>1)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const auto in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const auto wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const auto out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const int nrepeat = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
const ck::index_t C = std::stoi(argv[12]);
const ck::index_t Y = std::stoi(argv[13]);
const ck::index_t X = std::stoi(argv[14]);
const ck::index_t Hi = std::stoi(argv[15]);
const ck::index_t Wi = std::stoi(argv[16]);
const ck::index_t conv_stride_h = std::stoi(argv[17]);
const ck::index_t conv_stride_w = std::stoi(argv[18]);
const ck::index_t conv_dilation_h = std::stoi(argv[19]);
const ck::index_t conv_dilation_w = std::stoi(argv[20]);
const ck::index_t in_left_pad_h = std::stoi(argv[21]);
const ck::index_t in_left_pad_w = std::stoi(argv[22]);
const ck::index_t in_right_pad_h = std::stoi(argv[23]);
const ck::index_t in_right_pad_w = std::stoi(argv[24]);
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
if(data_type == ConvDataType::F32_F32_F32 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_fwd_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w});
}
else if(data_type == ConvDataType::F16_F16_F16 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_fwd_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w});
}
else if(data_type == ConvDataType::BF16_BF16_BF16 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_fwd_impl<2,
uint16_t,
uint16_t,
uint16_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w});
}
else if(data_type == ConvDataType::INT8_INT8_INT8 && in_layout == ConvInputLayout::NHWC &&
wei_layout == ConvWeightLayout::KYXC && out_layout == ConvOutputLayout::NHWK)
{
ck::profiler::profile_conv_fwd_impl<2,
int8_t,
int8_t,
int8_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
do_verification,
init_method,
do_log,
nrepeat,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w});
}
else
{
throw std::runtime_error("wrong! this Conv data_type & layout is not implemented");
}
return 1;
}
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include "profile_convnd_bwd_data_impl.hpp" #include "profile_convnd_bwd_data_impl.hpp"
namespace {
enum struct ConvDataType enum struct ConvDataType
{ {
F32_F32_F32, // 0 F32_F32_F32, // 0
...@@ -76,6 +78,8 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[], ...@@ -76,6 +78,8 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[],
return params; return params;
} }
} // namespace
int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial) int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
{ {
const int preParams = 10; const int preParams = 10;
......
#include <cstdlib>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <half.hpp>
#include "conv_fwd_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "profile_convnd_fwd.hpp"
#include "tensor_layout.hpp"
namespace {
enum struct ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3
};
enum struct ConvDataLayout
{
NCHW, // 0
NHWC, // 1
};
namespace ctl = ck::tensor_layout::convolution;
template <int NDim, ConvDataLayout DataLayout>
struct ConvolutionLayouts;
template <>
struct ConvolutionLayouts<1, ConvDataLayout::NHWC>
{
typedef ctl::NWC Input;
typedef ctl::KXC Weight;
typedef ctl::NWK Output;
};
template <>
struct ConvolutionLayouts<2, ConvDataLayout::NHWC>
{
typedef ctl::NHWC Input;
typedef ctl::KYXC Weight;
typedef ctl::NHWK Output;
};
template <>
struct ConvolutionLayouts<3, ConvDataLayout::NHWC>
{
typedef ctl::NDHWC Input;
typedef ctl::KZYXC Weight;
typedef ctl::NDHWK Output;
};
template <>
struct ConvolutionLayouts<1, ConvDataLayout::NCHW>
{
typedef ctl::NCW Input;
typedef ctl::KCX Weight;
typedef ctl::NKW Output;
};
template <>
struct ConvolutionLayouts<2, ConvDataLayout::NCHW>
{
typedef ctl::NCHW Input;
typedef ctl::KCYX Weight;
typedef ctl::NKHW Output;
};
template <>
struct ConvolutionLayouts<3, ConvDataLayout::NCHW>
{
typedef ctl::NCDHW Input;
typedef ctl::KCZYX Weight;
typedef ctl::NKDHW Output;
};
void print_use_msg()
{
std::cout << "arg1: tensor operation (conv_fwd: ForwardConvolution)\n"
<< "arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)\n"
<< "arg3: data layout (0: NCHW; 1: NHWC)\n"
<< "arg4: verification (0=no, 1=yes)\n"
<< "arg5: initialization (0=no init, 1=integer value, 2=decimal value)\n"
<< "arg6: print tensor value (0: no; 1: yes)\n"
<< "arg7: run kernel # of times (>1)\n"
<< "arg8: N spatial dimensions (default 2)\n"
<< "Following arguments (depending on number of spatial dims):\n"
<< " N, K, C, \n"
<< " <filter spatial dimensions>, (ie Y, X for 2D)\n"
<< " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
<< " <strides>, (ie Sy, Sx for 2D)\n"
<< " <dilations>, (ie Dy, Dx for 2D)\n"
<< " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
<< " <right padding>, (ie RightPy, RightPx for 2D)\n"
<< std::endl;
}
ck::utils::conv::ConvParams parse_params(int num_dim_spatial, int argc, char* argv[])
{
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
int conv_args = 3 + num_dim_spatial * 6;
int cmdline_nargs = conv_args + 9;
if(cmdline_nargs != argc)
{
print_use_msg();
exit(1);
}
int arg_idx = 9;
return ck::utils::conv::parse_conv_params(num_dim_spatial, arg_idx, argv);
}
template <int NDim,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename ConvLayouts>
void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
bool do_verification,
bool do_log,
int nrepeat,
int init_method,
ConvLayouts)
{
using namespace std::placeholders;
using namespace ck::utils;
std::unique_ptr<OpInstance<OutDataType, InDataType, WeiDataType>> conv_instance;
switch(init_method)
{
case 0:
conv_instance =
std::make_unique<conv::ConvFwdOpInstance<InDataType,
WeiDataType,
OutDataType,
typename ConvLayouts::Input,
typename ConvLayouts::Weight,
typename ConvLayouts::Output>>(params, false);
break;
case 1:
conv_instance = std::make_unique<
conv::ConvFwdOpInstance<InDataType,
WeiDataType,
OutDataType,
typename ConvLayouts::Input,
typename ConvLayouts::Weight,
typename ConvLayouts::Output,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::utils::FillUniform<int>,
ck::utils::FillUniform<int>>>(
params, true, ck::utils::FillUniform<int>{}, ck::utils::FillUniform<int>{});
break;
case 2:
conv_instance = std::make_unique<
conv::ConvFwdOpInstance<InDataType,
WeiDataType,
OutDataType,
typename ConvLayouts::Input,
typename ConvLayouts::Weight,
typename ConvLayouts::Output,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::utils::FillUniform<InDataType>,
ck::utils::FillUniform<WeiDataType>>>(
params,
true,
ck::utils::FillUniform<InDataType>{},
ck::utils::FillUniform<WeiDataType>{});
break;
default: throw std::runtime_error("Unsupported init method!");
}
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<NDim, InDataType, WeiDataType, OutDataType>,
params,
_1,
_2,
_3);
OpInstanceRunEngine<InDataType, WeiDataType, OutDataType> run_engine(*conv_instance,
reference_conv_fwd_fun);
auto best_conf = run_engine.Profile(
conv::ConvolutionFwdInstances<InDataType, WeiDataType, OutDataType>::template Get<NDim>(),
nrepeat,
do_verification,
do_log);
std::cout << "Best configuration parameters:"
<< "\nname: " << best_conf.best_op_name << "\navg_time: " << best_conf.best_avg_time
<< "\ntflops: " << best_conf.best_tflops << "\nGB/s: " << best_conf.best_gb_per_sec
<< std::endl;
}
template <int NDim>
void profile_convnd_instances(ConvDataType data_type,
ConvDataLayout data_layout,
const ck::utils::conv::ConvParams& params,
bool do_verification,
bool do_log,
int nrepeat,
int init_method)
{
switch(data_layout)
{
case ConvDataLayout::NHWC: {
switch(data_type)
{
case ConvDataType::F32_F32_F32:
profile_convnd_instances_impl<NDim, float, float, float>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
case ConvDataType::F16_F16_F16:
profile_convnd_instances_impl<NDim, ck::half_t, ck::half_t, ck::half_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
case ConvDataType::BF16_BF16_BF16:
profile_convnd_instances_impl<NDim, ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
case ConvDataType::INT8_INT8_INT8:
profile_convnd_instances_impl<NDim, int8_t, int8_t, int8_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
break;
}
break;
}
case ConvDataLayout::NCHW: {
switch(data_type)
{
case ConvDataType::F32_F32_F32:
profile_convnd_instances_impl<NDim, float, float, float>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
case ConvDataType::F16_F16_F16:
profile_convnd_instances_impl<NDim, ck::half_t, ck::half_t, ck::half_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
case ConvDataType::BF16_BF16_BF16:
profile_convnd_instances_impl<NDim, ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
case ConvDataType::INT8_INT8_INT8:
profile_convnd_instances_impl<NDim, int8_t, int8_t, int8_t>(
params,
do_verification,
do_log,
nrepeat,
init_method,
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
break;
}
break;
}
}
}
} // namespace
int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
{
using namespace ck::utils::conv;
ConvDataType data_type{ConvDataType::F32_F32_F32};
ConvDataLayout data_layout{ConvDataLayout::NHWC};
bool do_verification{true};
int init_method{2};
bool do_log{false};
int nrepeat{100};
int num_dim_spatial{2};
ConvParams params;
if(argc >= 4)
{
data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
data_layout = static_cast<ConvDataLayout>(std::stoi(argv[3]));
}
if(argc >= 9)
{
do_verification = std::stoi(argv[4]);
init_method = std::stoi(argv[5]);
do_log = std::stoi(argv[6]);
nrepeat = std::stoi(argv[7]);
num_dim_spatial = std::stoi(argv[8]);
}
if(argc >= 10)
{
params = parse_params(num_dim_spatial, argc, argv);
}
// TODO Print nice message what is being profiled.
switch(num_dim_spatial)
{
case 1:
profile_convnd_instances<1>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
break;
case 2:
profile_convnd_instances<2>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
break;
case 3:
profile_convnd_instances<3>(
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
break;
default:
throw std::runtime_error("profile_conv_fwd: unsupported num_dim_spatial value: " +
std::to_string(num_dim_spatial));
}
return 1;
}
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include "profile_convnd_fwd.hpp"
int profile_gemm(int, char*[]); int profile_gemm(int, char*[]);
int profile_gemm_bias_2d(int, char*[]); int profile_gemm_bias_2d(int, char*[]);
int profile_gemm_bias_relu(int, char*[]); int profile_gemm_bias_relu(int, char*[]);
...@@ -11,7 +13,6 @@ int profile_gemm_bias_relu_add(int, char*[]); ...@@ -11,7 +13,6 @@ int profile_gemm_bias_relu_add(int, char*[]);
int profile_gemm_reduce(int, char*[]); int profile_gemm_reduce(int, char*[]);
int profile_batched_gemm(int, char*[]); int profile_batched_gemm(int, char*[]);
int profile_grouped_gemm(int, char*[]); int profile_grouped_gemm(int, char*[]);
int profile_conv_fwd(int, char*[]);
int profile_conv_fwd_bias_relu(int, char*[]); int profile_conv_fwd_bias_relu(int, char*[]);
int profile_conv_fwd_bias_relu_add(int, char*[]); int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]); int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
...@@ -56,7 +57,7 @@ int main(int argc, char* argv[]) ...@@ -56,7 +57,7 @@ int main(int argc, char* argv[])
} }
else if(strcmp(argv[1], "conv_fwd") == 0) else if(strcmp(argv[1], "conv_fwd") == 0)
{ {
return profile_conv_fwd(argc, argv); return ck::profiler::profile_convnd_fwd(argc, argv);
} }
else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0) else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
{ {
......
find . -name deps -prune -o -name build -prune -o -iname '*.h' -o -iname '*.hpp' -o -iname '*.cpp' -o -iname '*.h.in' -o -iname '*.hpp.in' -o -iname '*.cpp.in' -o -iname '*.cl' -o -iname '*.cuh' -o -iname '*.cu' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-10 -i -style=file {}' #find . -name deps -prune -o -name build -prune -o -iname '*.h' -o -iname '*.hpp' -o -iname '*.cpp' -o -iname '*.h.in' -o -iname '*.hpp.in' -o -iname '*.cpp.in' -o -iname '*.cl' -o -iname '*.cuh' -o -iname '*.cu' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-10 -i -style=file {}'
git status --porcelain | awk '$1 != "D" && (match($2, "\\.cpp|hpp")) {print $2}' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-10 -i -style=file {}'
#!/usr/bin/env python3
import os, io
import argparse
def print_to_string(*args, **kwargs):
output = io.StringIO()
print(*args, file=output, **kwargs)
contents = output.getvalue()
output.close()
return contents
def parse_args():
parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
args = parser.parse_args()
files = []
if os.path.isdir(args.filename):
all_files = os.listdir(args.filename)
for name in all_files:
if not 'log' in name:
continue
files.append(os.path.join(args.filename, name))
else:
files = [args.filename]
args.files = files
return args
def main():
args = parse_args()
results = []
#parse results
glue=""
for filename in args.files:
for line in open(filename):
if 'Best Perf' in line:
lst=line.split()
results.append(print_to_string(glue.join(lst[8:]),lst[4]))
#sort results
#read baseline results for the latest develop branch
#write new results to the db
#compare the results to the baseline
#return 0 if performance criteria met, otherwise return 1
print(results)
return 0
if __name__ == '__main__':
main()
\ No newline at end of file
#!/bin/bash #!/bin/bash
## GPU visibility ## GPU visibility
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
#make -j ckProfiler
make -j ckProfiler DRIVER="../build/bin/ckProfiler"
echo $DRIVER
DRIVER="./profiler/ckProfiler"
OP=$1 OP=$1
DATATYPE=$2 DATATYPE=$2
LAYOUT=$3 LAYOUT=$3
...@@ -43,3 +41,13 @@ $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1 ...@@ -43,3 +41,13 @@ $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 6656 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3328 4096 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1664 2048 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 832 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7040 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 5120 5632 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2560 2816 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1280 1408 1024 -1 -1 -1
...@@ -24,6 +24,7 @@ include_directories(BEFORE ...@@ -24,6 +24,7 @@ include_directories(BEFORE
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
add_custom_target(tests) add_custom_target(tests)
function(add_test_executable TEST_NAME) function(add_test_executable TEST_NAME)
message("adding test ${TEST_NAME}") message("adding test ${TEST_NAME}")
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
...@@ -32,6 +33,20 @@ function(add_test_executable TEST_NAME) ...@@ -32,6 +33,20 @@ function(add_test_executable TEST_NAME)
add_dependencies(check ${TEST_NAME}) add_dependencies(check ${TEST_NAME})
endfunction(add_test_executable TEST_NAME) endfunction(add_test_executable TEST_NAME)
include(GoogleTest)
function(add_gtest_executable TEST_NAME)
message("adding gtest ${TEST_NAME}")
add_executable(${TEST_NAME} ${ARGN})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
# suppress gtest warnings
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors)
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
gtest_discover_tests(${TEST_NAME})
endfunction(add_gtest_executable TEST_NAME)
add_subdirectory(magic_number_division) add_subdirectory(magic_number_division)
add_subdirectory(space_filling_curve) add_subdirectory(space_filling_curve)
add_subdirectory(conv_util) add_subdirectory(conv_util)
......
...@@ -4,5 +4,4 @@ include_directories(BEFORE ...@@ -4,5 +4,4 @@ include_directories(BEFORE
) )
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp) add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor) target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_fwd_util)
target_link_libraries(test_conv2d_bwd_weight PRIVATE device_conv2d_bwd_weight_instance)
add_test_executable(test_conv_util conv_util.cpp) add_gtest_executable(test_conv_util conv_util.cpp)
target_link_libraries(test_conv_util PRIVATE host_tensor) target_link_libraries(test_conv_util PRIVATE host_tensor conv_fwd_util)
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "gtest/gtest.h"
#include "config.hpp" #include "config.hpp"
#include "conv_fwd_util.hpp" #include "conv_fwd_util.hpp"
...@@ -9,196 +10,194 @@ ...@@ -9,196 +10,194 @@
namespace { namespace {
bool test_conv_params_get_output_spatial_lengths() class TestConvUtil : public ::testing::Test
{ {
bool res{true}; public:
// -------------------------- default 2D ------------------------------------ void SetNDParams(std::size_t ndims)
{
conv_params.num_dim_spatial = ndims;
conv_params.filter_spatial_lengths = std::vector<ck::index_t>(ndims, 3);
conv_params.input_spatial_lengths = std::vector<ck::index_t>(ndims, 71);
conv_params.conv_filter_strides = std::vector<ck::index_t>(ndims, 2);
conv_params.conv_filter_dilations = std::vector<ck::index_t>(ndims, 1);
conv_params.input_left_pads = std::vector<ck::index_t>(ndims, 1);
conv_params.input_right_pads = std::vector<ck::index_t>(ndims, 1);
}
protected:
// ------- default 2D -------
// input NCHW {128,192,71,71}, // input NCHW {128,192,71,71},
// weights KCYX {256,192,3,3}, // weights KCYX {256,192,3,3},
// stride {2,2}, // stride {2,2},
// dilations {1,1}, // dilations {1,1},
// padding {{1,1}, {1,1}} // padding {{1,1}, {1,1}}
ck::utils::conv::ConvParams conv_params; ck::utils::conv::ConvParams conv_params;
};
} // namespace
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{
ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36}, std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor."); "Error: ConvParams 2D default constructor."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."); out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2}; conv_params.input_left_pads = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2}; conv_params.input_right_pads = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37}, std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}."); "Error: ConvParams 2D padding left/right {2,2}."));
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."); out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1}; conv_params.input_left_pads = std::vector<ck::index_t>{1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1}; conv_params.input_right_pads = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = EXPECT_TRUE(
ck::utils::check_err(out_spatial_len, ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23}, std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."); "Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
}
// -------------------------- 1D ------------------------------------ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
conv_params.num_dim_spatial = 1; {
conv_params.filter_spatial_lengths = std::vector<ck::index_t>{3}; SetNDParams(1);
conv_params.input_spatial_lengths = std::vector<ck::index_t>{71};
conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{1};
conv_params.input_left_pads = std::vector<ck::index_t>{1};
conv_params.input_right_pads = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."); out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."); out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.input_left_pads = std::vector<ck::index_t>{2}; conv_params.input_left_pads = std::vector<ck::index_t>{2};
conv_params.input_right_pads = std::vector<ck::index_t>{2}; conv_params.input_right_pads = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37}, std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}."); "Error: ConvParams 1D padding left/right {2}."));
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."); out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3};
conv_params.input_left_pads = std::vector<ck::index_t>{1}; conv_params.input_left_pads = std::vector<ck::index_t>{1};
conv_params.input_right_pads = std::vector<ck::index_t>{1}; conv_params.input_right_pads = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(
std::vector<ck::index_t>{23}, ck::utils::check_err(out_spatial_len,
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."); std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
// -------------------------- 3D ------------------------------------ }
conv_params.num_dim_spatial = 3;
conv_params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 3}; TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
conv_params.input_spatial_lengths = std::vector<ck::index_t>{71, 71, 71}; {
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2}; SetNDParams(3);
conv_params.conv_filter_dilations = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."));
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1}; conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71}, std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}."); "Error: ConvParams 3D stride {1, 1, 1}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2}; conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2}; conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37}, std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}."); "Error: ConvParams 3D padding left/right {2, 2, 2}."));
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36}, std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}."); "Error: ConvParams 3D dilation {2, 2, 2}."));
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3}; conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1}; conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1}; conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2}; conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, out_spatial_len,
std::vector<ck::index_t>{23, 23, 23}, std::vector<ck::index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."); "Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
return res;
} }
bool test_get_host_tensor_descriptor() TEST(ConvUtil, GetHostTensorDescriptor)
{ {
bool res{true};
namespace tl = ck::tensor_layout::convolution; namespace tl = ck::tensor_layout::convolution;
std::vector<std::size_t> dims{2, 3, 4, 5}; std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{}); HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
res = EXPECT_TRUE(ck::utils::check_err(
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"); h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"));
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"); h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
res = EXPECT_TRUE(ck::utils::check_err(
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"); h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"));
res = ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"); h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4}; dims = std::vector<std::size_t>{2, 3, 4};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"); EXPECT_TRUE(
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
ck::utils::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"); EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"); EXPECT_TRUE(
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"));
ck::utils::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"); EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4, 5, 6}; dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{}); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"); EXPECT_TRUE(
res = ck::utils::check_err(h.GetStrides(), ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
{3 * 4 * 5 * 6, // N EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
1, // C {3 * 4 * 5 * 6, // N
3 * 5 * 6, // D 1, // C
3 * 6, // H 3 * 5 * 6, // D
3}, // W 3 * 6, // H
"Error: wrong NDHWC dimensions strides!"); 3}, // W
"Error: wrong NDHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
res = ck::utils::check_err(h.GetStrides(), EXPECT_TRUE(
{3 * 4 * 5 * 6, // N ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"));
4 * 5 * 6, // C EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
5 * 6, // D {3 * 4 * 5 * 6, // N
6, // H 4 * 5 * 6, // C
1}, // W 5 * 6, // D
"Error: wrong NCDHW dimensions strides!"); 6, // H
1}, // W
return res; "Error: wrong NCDHW dimensions strides!"));
}
} // namespace
int main(void)
{
bool res = test_conv_params_get_output_spatial_lengths();
std::cout << "test_conv_params_get_output_spatial_lengths ..... "
<< (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_get_host_tensor_descriptor();
std::cout << "test_get_host_tensor_descriptor ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
} }
...@@ -4,5 +4,4 @@ include_directories(BEFORE ...@@ -4,5 +4,4 @@ include_directories(BEFORE
) )
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp) add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor) target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_fwd_util)
target_link_libraries(test_convnd_bwd_data PRIVATE device_convnd_bwd_data_instance)
add_custom_target(test_convnd_fwd) add_custom_target(test_convnd_fwd)
add_test_executable(test_conv1d_fwd conv1d_fwd.cpp) add_gtest_executable(test_conv1d_fwd conv1d_fwd.cpp)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor) target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_fwd_util)
target_link_libraries(test_conv1d_fwd PRIVATE device_conv1d_fwd_instance)
add_dependencies(test_convnd_fwd test_conv1d_fwd) add_dependencies(test_convnd_fwd test_conv1d_fwd)
add_test_executable(test_conv2d_fwd conv2d_fwd.cpp) add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor) target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_fwd_util)
target_link_libraries(test_conv2d_fwd PRIVATE device_conv2d_fwd_instance)
add_dependencies(test_convnd_fwd test_conv2d_fwd) add_dependencies(test_convnd_fwd test_conv2d_fwd)
add_test_executable(test_conv3d_fwd conv3d_fwd.cpp) add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor) target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_fwd_util)
target_link_libraries(test_conv3d_fwd PRIVATE device_conv3d_fwd_instance)
add_dependencies(test_convnd_fwd test_conv3d_fwd) add_dependencies(test_convnd_fwd test_conv3d_fwd)
...@@ -2,155 +2,92 @@ ...@@ -2,155 +2,92 @@
#include <stdexcept> #include <stdexcept>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gtest/gtest.h"
#include "data_type.hpp" #include "data_type.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp" #include "conv_fwd_util.hpp"
#include "conv_util.hpp" #include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation {
namespace device {
namespace device_conv1d_fwd_instance {
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv1d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace { namespace {
bool test_conv1D_nwc() template <typename T>
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{ {
bool res{true}; using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1; params.num_dim_spatial = 1;
params.N = 2;
params.K = 16;
params.C = 4;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{16}; params.input_spatial_lengths = std::vector<ck::index_t>{71};
params.conv_filter_strides = std::vector<ck::index_t>{1}; params.conv_filter_strides = std::vector<ck::index_t>{2};
params.conv_filter_dilations = std::vector<ck::index_t>{1}; params.conv_filter_dilations = std::vector<ck::index_t>{1};
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1}; params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = conv::ConvFwdOpInstance<T, T, T, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(params);
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
test::conv::RunConv<1>(params, input, weights, device_output);
res = res &&
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res; auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
} }
template <typename T> } // anonymous namespace
bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
TEST(Conv1DFwdNWC, TestConv1D)
{ {
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1; params.num_dim_spatial = 1;
params.N = 2;
params.K = 16;
params.C = 4;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{71}; params.input_spatial_lengths = std::vector<ck::index_t>{16};
params.conv_filter_strides = std::vector<ck::index_t>{2}; params.conv_filter_strides = std::vector<ck::index_t>{1};
params.conv_filter_dilations = std::vector<ck::index_t>{1}; params.conv_filter_dilations = std::vector<ck::index_t>{1};
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1}; params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors = std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
ck::utils::conv::get_host_tensors<T, test::conv::get_test_convolution_fwd_instance<1>(conv_ptrs);
T, conv::ConvFwdOpInstance<float, float, float, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(
T, params);
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC, auto reference_conv_fwd_fun = std::bind(
ck::tensor_layout::convolution::NWK>(params); conv::run_reference_convolution_forward<1, float, float, float>, params, _1, _2, _3);
const Tensor<T>& input = std::get<0>(host_tensors); OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
const Tensor<T>& weights = std::get<1>(host_tensors); run_engine.SetAtol(1e-5);
Tensor<T>& host_output = std::get<2>(host_tensors); run_engine.SetRtol(1e-4);
Tensor<T>& device_output = std::get<3>(host_tensors); EXPECT_TRUE(run_engine.Test(conv_ptrs));
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<1>(
params, conv_ptrs, input, weights, device_output, host_output);
}
bool test_conv1d_nwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(conv_ptrs);
return test_conv1d_nwc_instances<ck::bhalf_t>(conv_ptrs);
} }
bool test_conv1d_nwc_f16_instances() TEST(Conv1DFwdNWC, Bf16Iinstances)
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; EXPECT_TRUE(test_conv1d_nwc_instances<ck::bhalf_t>(
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<1>()));
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(conv_ptrs);
return test_conv1d_nwc_instances<ck::half_t>(conv_ptrs);
} }
bool test_conv1d_nwc_f32_instances() TEST(Conv1DFwdNWC, F16Instances)
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; EXPECT_TRUE(test_conv1d_nwc_instances<ck::half_t>(
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<1>()));
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(conv_ptrs);
return test_conv1d_nwc_instances<float>(conv_ptrs);
} }
bool test_conv1d_nwc_int8_instances() TEST(Conv1DFwdNWC, F32Instances)
{ {
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs; EXPECT_TRUE(test_conv1d_nwc_instances<float>(
ck::tensor_operation::device::device_conv1d_fwd_instance:: ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<1>()));
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(conv_ptrs);
return test_conv1d_nwc_instances<int8_t>(conv_ptrs);
} }
} // anonymous namespace TEST(Conv1DFwdNWC, Int8Instances)
int main()
{ {
bool res{true}; EXPECT_TRUE(test_conv1d_nwc_instances<int8_t>(
res = test_conv1D_nwc(); ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<1>()));
std::cout << "test_conv1D_nwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv1d_nwc_bf16_instances();
std::cout << "\nTestConv1DNWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_f16_instances();
std::cout << "\ntest_conv1d_nwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_f32_instances();
std::cout << "\ntest_conv1d_nwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_int8_instances();
std::cout << "\ntes_tconv1_dnw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
} }
This diff is collapsed.
This diff is collapsed.
...@@ -10,7 +10,8 @@ ...@@ -10,7 +10,8 @@
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "sequence.hpp" #include "sequence.hpp"
namespace { namespace test {
namespace conv {
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
...@@ -19,6 +20,9 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough; ...@@ -19,6 +20,9 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough; using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough; using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<InElementOp, WeiElementOp, OutElementOp>;
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default; ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
...@@ -62,26 +66,14 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device:: ...@@ -62,26 +66,14 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::
1>; // CThreadTransferDstScalarPerVector 1>; // CThreadTransferDstScalarPerVector
// clang-format on // clang-format on
} // namespace
namespace test {
namespace conv {
template <ck::index_t NDim, template <ck::index_t NDim,
typename InDataType = float, typename InDataType = float,
typename WeiDataType = float, typename WeiDataType = float,
typename OutDataType = float> typename OutDataType = float>
void RunConv(const ck::utils::conv::ConvParams& params, void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
const Tensor<InDataType>& input,
const Tensor<WeiDataType>& weights,
Tensor<OutDataType>& output)
{ {
ck::utils::conv::run_convolution_forward<NDim, using ConvInstanceT = DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType>;
InDataType, instances.emplace_back(std::make_unique<ConvInstanceT>());
WeiDataType,
OutDataType,
DeviceConvNDFwdInstance>(
params, input, weights, output);
} }
} // namespace conv } // namespace conv
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment