Commit 76ee0baf authored by Chao Liu's avatar Chao Liu
Browse files

Merge remote-tracking branch 'origin/develop' into improve_pipeline

parents 4816890d 1a0cd5d1
......@@ -4,6 +4,8 @@
#include <cstdlib>
#include <cstring>
#include "profile_convnd_fwd.hpp"
int profile_gemm(int, char*[]);
int profile_gemm_bias_2d(int, char*[]);
int profile_gemm_bias_relu(int, char*[]);
......@@ -11,7 +13,6 @@ int profile_gemm_bias_relu_add(int, char*[]);
int profile_gemm_reduce(int, char*[]);
int profile_batched_gemm(int, char*[]);
int profile_grouped_gemm(int, char*[]);
int profile_conv_fwd(int, char*[]);
int profile_conv_fwd_bias_relu(int, char*[]);
int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
......@@ -56,7 +57,7 @@ int main(int argc, char* argv[])
}
else if(strcmp(argv[1], "conv_fwd") == 0)
{
return profile_conv_fwd(argc, argv);
return ck::profiler::profile_convnd_fwd(argc, argv);
}
else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
{
......
......@@ -4,5 +4,4 @@ include_directories(BEFORE
)
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor)
target_link_libraries(test_conv2d_bwd_weight PRIVATE device_conv2d_bwd_weight_instance)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_fwd_util)
add_test_executable(test_conv_util conv_util.cpp)
target_link_libraries(test_conv_util PRIVATE host_tensor)
target_link_libraries(test_conv_util PRIVATE host_tensor conv_fwd_util)
......@@ -4,5 +4,4 @@ include_directories(BEFORE
)
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor)
target_link_libraries(test_convnd_bwd_data PRIVATE device_convnd_bwd_data_instance)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_fwd_util)
add_custom_target(test_convnd_fwd)
add_test_executable(test_conv1d_fwd conv1d_fwd.cpp)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor)
target_link_libraries(test_conv1d_fwd PRIVATE device_conv1d_fwd_instance)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_fwd_util)
target_link_libraries(test_conv1d_fwd PRIVATE )
add_dependencies(test_convnd_fwd test_conv1d_fwd)
add_test_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor)
target_link_libraries(test_conv2d_fwd PRIVATE device_conv2d_fwd_instance)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_fwd_util)
add_dependencies(test_convnd_fwd test_conv2d_fwd)
add_test_executable(test_conv3d_fwd conv3d_fwd.cpp)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor)
target_link_libraries(test_conv3d_fwd PRIVATE device_conv3d_fwd_instance)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_fwd_util)
add_dependencies(test_convnd_fwd test_conv3d_fwd)
......@@ -7,37 +7,15 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation {
namespace device {
namespace device_conv1d_fwd_instance {
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv1d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace {
bool test_conv1D_nwc()
{
bool res{true};
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.N = 2;
......@@ -50,30 +28,26 @@ bool test_conv1D_nwc()
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
test::conv::RunConv<1>(params, input, weights, device_output);
res = res &&
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1>(conv_ptrs);
conv::ConvFwdOpInstance<float, float, float, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(
params);
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<1, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3};
......@@ -83,52 +57,36 @@ bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptr
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<1>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<1>(
params, conv_ptrs, input, weights, device_output, host_output);
conv::ConvFwdOpInstance<T, T, T, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(params);
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
}
bool test_conv1d_nwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances(conv_ptrs);
return test_conv1d_nwc_instances<ck::bhalf_t>(conv_ptrs);
return test_conv1d_nwc_instances<ck::bhalf_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<1>());
}
bool test_conv1d_nwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances(conv_ptrs);
return test_conv1d_nwc_instances<ck::half_t>(conv_ptrs);
return test_conv1d_nwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<1>());
}
bool test_conv1d_nwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances(conv_ptrs);
return test_conv1d_nwc_instances<float>(conv_ptrs);
return test_conv1d_nwc_instances<float>(
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<1>());
}
bool test_conv1d_nwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv1d_fwd_instance::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances(conv_ptrs);
return test_conv1d_nwc_instances<int8_t>(conv_ptrs);
return test_conv1d_nwc_instances<int8_t>(
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<1>());
}
} // anonymous namespace
......@@ -149,7 +107,7 @@ int main()
std::cout << "\ntest_conv1d_nwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_int8_instances();
std::cout << "\ntes_tconv1_dnw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
std::cout << "\ntest_conv1d_nwc_int8_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
......
#include <half.hpp>
#include <iostream>
#include <stdexcept>
#include <tuple>
#include <vector>
......@@ -8,38 +7,14 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation {
namespace device {
namespace device_conv2d_fwd_instance {
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(
std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv2d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace {
bool test_conv2d_nhwc()
{
bool res{true};
using namespace std::placeholders;
using namespace ck::utils;
ck::utils::conv::ConvParams params;
params.N = 2;
params.K = 16;
......@@ -47,25 +22,25 @@ bool test_conv2d_nhwc()
params.input_spatial_lengths = std::vector<ck::index_t>{16, 16};
params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
auto host_tensors = ck::utils::conv::get_host_tensors(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
test::conv::RunConv<2>(params, input, weights, device_output);
res = res &&
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<2>(conv_ptrs);
conv::ConvFwdOpInstance<float, float, float> conv_instance(params);
return res;
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<2, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_conv2d_nhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv2d_nhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{
ck::utils::conv::ConvParams params;
using namespace std::placeholders;
using namespace ck::utils;
conv::ConvParams params;
params.num_dim_spatial = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths = std::vector<ck::index_t>{71, 71};
......@@ -74,55 +49,36 @@ bool test_conv2d_nhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_pt
params.input_left_pads = std::vector<ck::index_t>{1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1};
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<2>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<2>(
params, conv_ptrs, input, weights, device_output, host_output);
conv::ConvFwdOpInstance<T, T, T> conv_instance(params);
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
}
bool test_conv2d_nhwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
return test_conv2d_nhwc_instances<ck::bhalf_t>(conv_ptrs);
return test_conv2d_nhwc_instances<ck::bhalf_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<2>());
}
bool test_conv2d_nhwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
return test_conv2d_nhwc_instances<ck::half_t>(conv_ptrs);
return test_conv2d_nhwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<2>());
}
bool test_conv2d_nhwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
return test_conv2d_nhwc_instances<float>(conv_ptrs);
return test_conv2d_nhwc_instances<float>(
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<2>());
}
bool test_conv2d_nhwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
return test_conv2d_nhwc_instances<int8_t>(conv_ptrs);
return test_conv2d_nhwc_instances<int8_t>(
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<2>());
}
} // anonymous namespace
......
......@@ -8,37 +8,16 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation {
namespace device {
namespace device_conv3d_fwd_instance {
void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv3d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace {
bool test_conv3d_ndhwc()
{
bool res{true};
ck::utils::conv::ConvParams params;
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -50,31 +29,26 @@ bool test_conv3d_ndhwc()
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& host_output = std::get<2>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
test::conv::RunConv<3>(params, input, weights, device_output);
res = res &&
ck::utils::check_err(
device_output.mData, host_output.mData, "Error: incorrect results!", 1e-5f, 1e-4f);
return res;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs);
conv::ConvFwdOpInstance<float, float, float, ctl::NDHWC, ctl::KZYXC, ctl::NDHWK> conv_instance(
params);
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<3, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
return run_engine.Test(conv_ptrs);
}
bool test_conv3d_ndhwc_2gb_input()
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
// >2GB Input
ck::utils::conv::ConvParams params;
conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -86,39 +60,35 @@ bool test_conv3d_ndhwc_2gb_input()
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
try
{
test::conv::RunConv<3>(params, input, weights, device_output);
}
catch(const std::runtime_error& err)
{
std::string err_msg{"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"};
if(err.what() != err_msg)
{
return false;
}
return true;
}
std::cout << "Error: Failure checking oversized tensor!" << std::endl;
return false;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads,
PassThrough{},
PassThrough{},
PassThrough{});
return !(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
bool test_conv3d_ndhwc_2gb_filters()
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
// >2GB Filters
ck::utils::conv::ConvParams params;
conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -130,39 +100,35 @@ bool test_conv3d_ndhwc_2gb_filters()
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
try
{
test::conv::RunConv<3>(params, input, weights, device_output);
}
catch(const std::runtime_error& err)
{
std::string err_msg{"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"};
if(err.what() != err_msg)
{
return false;
}
return true;
}
std::cout << "Error: Failure checking oversized tensor!" << std::endl;
return false;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads,
PassThrough{},
PassThrough{},
PassThrough{});
return !(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
bool test_conv3d_ndhwc_2gb_output()
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
// >2GB Output
ck::utils::conv::ConvParams params;
conv::ConvParams params;
params.num_dim_spatial = 3;
params.N = 2;
params.K = 16;
......@@ -174,39 +140,35 @@ bool test_conv3d_ndhwc_2gb_output()
params.input_left_pads = std::vector<ck::index_t>{2, 2, 2};
params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
auto host_tensors =
ck::utils::conv::get_host_tensors<float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params, false);
const Tensor<float>& input = std::get<0>(host_tensors);
const Tensor<float>& weights = std::get<1>(host_tensors);
Tensor<float>& device_output = std::get<3>(host_tensors);
try
{
test::conv::RunConv<3>(params, input, weights, device_output);
}
catch(const std::runtime_error& err)
{
std::string err_msg{"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"};
if(err.what() != err_msg)
{
return false;
}
return true;
}
std::cout << "Error: Failure checking oversized tensor!" << std::endl;
return false;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N,
params.K,
params.C,
params.input_spatial_lengths,
params.filter_spatial_lengths,
params.GetOutputSpatialLengths(),
params.conv_filter_strides,
params.conv_filter_dilations,
params.input_left_pads,
params.input_right_pads,
PassThrough{},
PassThrough{},
PassThrough{});
return !(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
template <typename T>
bool test_conv3d_ndhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptrs)
bool test_conv3d_ndhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{
ck::utils::conv::ConvParams params;
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
conv::ConvParams params;
params.N = 64;
params.num_dim_spatial = 3;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 2};
......@@ -216,53 +178,36 @@ bool test_conv3d_ndhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_p
params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
auto host_tensors =
ck::utils::conv::get_host_tensors<T,
T,
T,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(params);
const Tensor<T>& input = std::get<0>(host_tensors);
const Tensor<T>& weights = std::get<1>(host_tensors);
Tensor<T>& host_output = std::get<2>(host_tensors);
Tensor<T>& device_output = std::get<3>(host_tensors);
conv::ConvFwdOpInstance<T, T, T, ctl::NDHWC, ctl::KZYXC, ctl::NDHWK> conv_instance(params);
ck::utils::conv::run_reference_convolution_forward<3>(params, input, weights, host_output);
return ck::utils::conv::run_convolution_forward_instances<3>(
params, conv_ptrs, input, weights, device_output, host_output);
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
}
bool test_conv3d_ndhwc_bf16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::bhalf_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::bhalf_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<3>());
}
bool test_conv3d_ndhwc_f16_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::half_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<3>());
}
bool test_conv3d_ndhwc_f32_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances(conv_ptrs);
return test_conv3d_ndhwc_instances<float>(conv_ptrs);
return test_conv3d_ndhwc_instances<float>(
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<3>());
}
bool test_conv3d_ndhwc_int8_instances()
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
ck::tensor_operation::device::device_conv3d_fwd_instance::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances(conv_ptrs);
return test_conv3d_ndhwc_instances<int8_t>(conv_ptrs);
return test_conv3d_ndhwc_instances<int8_t>(
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<3>());
}
} // anonymous namespace
......@@ -293,7 +238,7 @@ int main()
std::cout << "\ntest_conv3d_ndhwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv3d_ndhwc_int8_instances();
std::cout << "\ntest_conv3d_ndhw_cint_8instances ..... " << (res ? "SUCCESS" : "FAILURE")
std::cout << "\ntest_conv3d_ndhwc_int8_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
......
......@@ -10,7 +10,8 @@
#include "host_tensor.hpp"
#include "sequence.hpp"
namespace {
namespace test {
namespace conv {
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
......@@ -19,6 +20,9 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<InElementOp, WeiElementOp, OutElementOp>;
static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
......@@ -62,26 +66,14 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::
1>; // CThreadTransferDstScalarPerVector
// clang-format on
} // namespace
namespace test {
namespace conv {
template <ck::index_t NDim,
typename InDataType = float,
typename WeiDataType = float,
typename OutDataType = float>
void RunConv(const ck::utils::conv::ConvParams& params,
const Tensor<InDataType>& input,
const Tensor<WeiDataType>& weights,
Tensor<OutDataType>& output)
void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
{
ck::utils::conv::run_convolution_forward<NDim,
InDataType,
WeiDataType,
OutDataType,
DeviceConvNDFwdInstance>(
params, input, weights, output);
using ConvInstanceT = DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType>;
instances.emplace_back(std::make_unique<ConvInstanceT>());
}
} // namespace conv
......
add_test_executable(test_reference_conv_fwd reference_conv_fwd.cpp)
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor)
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_fwd_util)
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <half.hpp>
......@@ -10,6 +9,7 @@
#include "config.hpp"
#include "conv_fwd_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "host_tensor.hpp"
#include "reference_conv_fwd.hpp"
#include "tensor_layout.hpp"
......@@ -19,35 +19,6 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
template <typename T>
struct FillMonotonicSeq
{
T m_init_value{0};
T m_step{1};
template <typename ForwardIter>
void operator()(ForwardIter first, ForwardIter last) const
{
std::generate(first, last, [=, n = m_init_value]() mutable {
auto tmp = n;
n += m_step;
return tmp;
});
}
};
template <typename T>
struct FillConstant
{
T m_value{0};
template <typename ForwardIter>
void operator()(ForwardIter first, ForwardIter last) const
{
std::fill(first, last, m_value);
}
};
template <ck::index_t NDim,
typename InDataType = float,
typename WeiDataType = float,
......@@ -55,8 +26,8 @@ template <ck::index_t NDim,
typename InLayout = ck::tensor_layout::convolution::NHWC,
typename WeiLayout = ck::tensor_layout::convolution::KYXC,
typename OutLayout = ck::tensor_layout::convolution::NHWK,
typename FillInputOp = FillMonotonicSeq<InDataType>,
typename FillWeightsOp = FillConstant<WeiDataType>>
typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>,
typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>>
Tensor<OutDataType>
run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
const FillInputOp& fill_input_op = FillInputOp{},
......@@ -251,7 +222,7 @@ bool test_conv1d_nwc()
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
params, FillMonotonicSeq<float>{0.f, 0.1f});
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
ref_dims = std::vector<std::size_t>{2, 16, 16};
ref_data = std::vector<float>{
......@@ -349,7 +320,7 @@ bool test_conv3d_ncdhw()
ck::tensor_layout::convolution::NCDHW,
ck::tensor_layout::convolution::KCZYX,
ck::tensor_layout::convolution::NKDHW>(
params, FillMonotonicSeq<float>{0.f, 0.1f});
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4};
std::vector<float> ref_data{
407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432.,
......@@ -383,7 +354,7 @@ bool test_conv3d_ncdhw()
ck::tensor_layout::convolution::NCDHW,
ck::tensor_layout::convolution::KCZYX,
ck::tensor_layout::convolution::NKDHW>(
params, FillMonotonicSeq<float>{0.f, 0.1f});
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
ref_dims = std::vector<std::size_t>{1, 2, 4, 4, 4};
ref_data = std::vector<float>{
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment