Commit b134b7d6 authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into cpu_avx2

parents 090ba885 9f71ff48
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "host_gemm.hpp" #include "host_gemm.hpp"
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_c_shuffle.hpp" #include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "gemm_specialization.hpp"
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "host_gemm.hpp" #include "host_gemm.hpp"
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_c_shuffle.hpp" #include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "gemm_specialization.hpp"
......
#ifndef GEMM_UTILS_HPP #ifndef GEMM_UTILS_HPP
#define GEMM_UTILS_HPP #define GEMM_UTILS_HPP
#include "check_err.hpp" #include "check_err.hpp"
#include "config.hpp" #include "config.hpp"
#include "device.hpp" #include "device.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "host_tensor_generator.hpp"
#include "reference_gemm.hpp" #include "reference_gemm.hpp"
#include "tensor_layout.hpp" #include "tensor_layout.hpp"
namespace ck { namespace ck {
namespace gemm_util { namespace gemm_util {
struct GemmParams struct GemmParams
{ {
GemmParams() GemmParams()
: M(1024), N(1024), K(1024), StrideA(1024), StrideB(1024), StrideC(1024), alpha(1), beta(0) : M(1024), N(1024), K(1024), StrideA(1024), StrideB(1024), StrideC(1024), alpha(1), beta(0)
{ {
} }
ck::index_t M; ck::index_t M;
ck::index_t N; ck::index_t N;
ck::index_t K; ck::index_t K;
ck::index_t StrideA; ck::index_t StrideA;
ck::index_t StrideB; ck::index_t StrideB;
ck::index_t StrideC; ck::index_t StrideC;
float alpha; float alpha;
float beta; float beta;
}; };
template <typename GemmInstance, template <typename GemmInstance,
typename ADataType, typename ADataType,
typename BDataType, typename BDataType,
typename CDataType, typename CDataType,
typename AElementwiseOperation, typename AElementwiseOperation,
typename BElementwiseOperation, typename BElementwiseOperation,
typename CElementwiseOperation> typename CElementwiseOperation>
void RunHostGEMM(const Tensor<ADataType>& A, void RunHostGEMM(const Tensor<ADataType>& A,
const Tensor<BDataType>& B, const Tensor<BDataType>& B,
Tensor<CDataType>& C, Tensor<CDataType>& C,
AElementwiseOperation a_element_op, AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op, BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op) CElementwiseOperation c_element_op)
{ {
auto ref_gemm = GemmInstance{}; auto ref_gemm = GemmInstance{};
auto ref_invoker = ref_gemm.MakeInvoker(); auto ref_invoker = ref_gemm.MakeInvoker();
auto ref_argument = ref_gemm.MakeArgument(A, B, C, a_element_op, b_element_op, c_element_op); auto ref_argument = ref_gemm.MakeArgument(A, B, C, a_element_op, b_element_op, c_element_op);
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
} }
template <typename DeviceGemmPtr_, template <typename DeviceGemmPtr_,
typename ADataType, typename ADataType,
typename BDataType, typename BDataType,
typename CDataType, typename CDataType,
typename AElementwiseOperation, typename AElementwiseOperation,
typename BElementwiseOperation, typename BElementwiseOperation,
typename CElementwiseOperation> typename CElementwiseOperation>
void RunDeviceGEMM(DeviceGemmPtr_& gemmPtr, void RunDeviceGEMM(DeviceGemmPtr_& gemmPtr,
const ck::gemm_util::GemmParams& params, const ck::gemm_util::GemmParams& params,
const Tensor<ADataType>& A, const Tensor<ADataType>& A,
const Tensor<BDataType>& B, const Tensor<BDataType>& B,
Tensor<CDataType>& C, Tensor<CDataType>& C,
AElementwiseOperation a_element_op, AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op, BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op) CElementwiseOperation c_element_op)
{ {
DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpace()); DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpace());
DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpace()); DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpace());
DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpace()); DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpace());
a_m_k_device_buf.ToDevice(A.mData.data()); a_m_k_device_buf.ToDevice(A.mData.data());
b_k_n_device_buf.ToDevice(B.mData.data()); b_k_n_device_buf.ToDevice(B.mData.data());
auto invoker_ptr = gemmPtr->MakeInvokerPointer(); auto invoker_ptr = gemmPtr->MakeInvokerPointer();
auto argument_ptr = auto argument_ptr =
gemmPtr->MakeArgumentPointer(static_cast<ADataType*>(a_m_k_device_buf.GetDeviceBuffer()), gemmPtr->MakeArgumentPointer(static_cast<ADataType*>(a_m_k_device_buf.GetDeviceBuffer()),
static_cast<BDataType*>(b_k_n_device_buf.GetDeviceBuffer()), static_cast<BDataType*>(b_k_n_device_buf.GetDeviceBuffer()),
static_cast<CDataType*>(c_m_n_device_buf.GetDeviceBuffer()), static_cast<CDataType*>(c_m_n_device_buf.GetDeviceBuffer()),
params.M, params.M,
params.N, params.N,
params.K, params.K,
params.StrideA, params.StrideA,
params.StrideB, params.StrideB,
params.StrideC, params.StrideC,
a_element_op, a_element_op,
b_element_op, b_element_op,
c_element_op); c_element_op);
if(!gemmPtr->IsSupportedArgument(argument_ptr.get())) if(!gemmPtr->IsSupportedArgument(argument_ptr.get()))
{ {
throw std::runtime_error( throw std::runtime_error(
"wrong! device_gemm with the specified compilation parameters does " "wrong! device_gemm with the specified compilation parameters does "
"not support this GEMM problem"); "not support this GEMM problem");
} }
invoker_ptr->Run(argument_ptr.get()); invoker_ptr->Run(argument_ptr.get());
c_m_n_device_buf.FromDevice(C.mData.data()); c_m_n_device_buf.FromDevice(C.mData.data());
} }
template <typename DeviceGemmPtr_, template <typename DeviceGemmPtr_,
typename ADataType, typename ADataType,
typename BDataType, typename BDataType,
typename CDataType, typename CDataType,
typename ALayout, typename ALayout,
typename BLayout, typename BLayout,
typename CLayout, typename CLayout,
typename AElementwiseOperation, typename AElementwiseOperation,
typename BElementwiseOperation, typename BElementwiseOperation,
typename CElementwiseOperation> typename CElementwiseOperation>
struct TestGemm struct TestGemm
{ {
auto PrepareGemmTensor(const ck::gemm_util::GemmParams& params) auto PrepareGemmTensor(const ck::gemm_util::GemmParams& params)
{ {
auto f_host_tensor_descriptor = auto f_host_tensor_descriptor =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) { [](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value) if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
{ {
return HostTensorDescriptor(std::vector<std::size_t>({row, col}), return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
std::vector<std::size_t>({stride, 1})); std::vector<std::size_t>({stride, 1}));
} }
else else
{ {
return HostTensorDescriptor(std::vector<std::size_t>({row, col}), return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
std::vector<std::size_t>({1, stride})); std::vector<std::size_t>({1, stride}));
} }
}; };
Tensor<ADataType> a_m_k( Tensor<ADataType> a_m_k(
f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{})); f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{}));
Tensor<BDataType> b_k_n( Tensor<BDataType> b_k_n(
f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{})); f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{}));
Tensor<CDataType> c_m_n_host_result( Tensor<CDataType> c_m_n_host_result(
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{})); f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
Tensor<CDataType> c_m_n_device_result( Tensor<CDataType> c_m_n_device_result(
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{})); f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
auto f_generate_tensor_value = [](auto desc, auto type) { auto f_generate_tensor_value = [](auto& tensor, auto type) {
using dataType = decltype(type); using dataType = decltype(type);
if(std::is_same<dataType, int8_t>::value) tensor.GenerateTensorValue(GeneratorTensor_2<dataType>{-5, 5});
{ };
desc.GenerateTensorValue(GeneratorTensor_2<int8_t>{-5, 5});
} f_generate_tensor_value(a_m_k, ADataType{});
else f_generate_tensor_value(b_k_n, BDataType{});
{
desc.GenerateTensorValue(GeneratorTensor_3<dataType>{-0.5, 0.5}); return std::make_tuple(a_m_k, b_k_n, c_m_n_host_result, c_m_n_device_result);
} }
};
auto operator()(DeviceGemmPtr_& gemmPtr)
f_generate_tensor_value(a_m_k, ADataType{}); {
f_generate_tensor_value(b_k_n, BDataType{}); std::cout << "ALayout = " << ALayout{}.name << ", BLayout = " << BLayout{}.name
<< ", CLayout = " << CLayout{}.name << std::endl;
return std::make_tuple(a_m_k, b_k_n, c_m_n_host_result, c_m_n_device_result); std::cout << gemmPtr->GetTypeString() << std::endl;
}
// Arrange
auto operator()(DeviceGemmPtr_& gemmPtr) ck::gemm_util::GemmParams params;
{ params.M = 1024;
std::cout << "ALayout = " << ALayout{}.name << ", BLayout = " << BLayout{}.name params.N = 1024;
<< ", CLayout = " << CLayout{}.name << std::endl; params.K = 1024;
std::cout << gemmPtr->GetTypeString() << std::endl; params.StrideA = 1024;
params.StrideB = 1024;
// Arrange params.StrideC = 1024;
ck::gemm_util::GemmParams params;
params.M = 1024; auto host_tensors = PrepareGemmTensor(params);
params.N = 1024;
params.K = 1024; const Tensor<ADataType>& a = std::get<0>(host_tensors);
params.StrideA = 1024; const Tensor<BDataType>& b = std::get<1>(host_tensors);
params.StrideB = 1024; Tensor<CDataType>& c_host = std::get<2>(host_tensors);
params.StrideC = 1024; Tensor<CDataType>& c_device = std::get<3>(host_tensors);
auto host_tensors = PrepareGemmTensor(params); auto a_element_op = AElementwiseOperation{};
auto b_element_op = BElementwiseOperation{};
const Tensor<ADataType>& a = std::get<0>(host_tensors); auto c_element_op = CElementwiseOperation{};
const Tensor<BDataType>& b = std::get<1>(host_tensors);
Tensor<CDataType>& c_host = std::get<2>(host_tensors); using ReferenceGemmInstance =
Tensor<CDataType>& c_device = std::get<3>(host_tensors); ck::tensor_operation::host::ReferenceGemm<ADataType,
BDataType,
auto a_element_op = AElementwiseOperation{}; CDataType,
auto b_element_op = BElementwiseOperation{}; AElementwiseOperation,
auto c_element_op = CElementwiseOperation{}; BElementwiseOperation,
CElementwiseOperation>;
using ReferenceGemmInstance = ck::gemm_util::RunHostGEMM<ReferenceGemmInstance>(
ck::tensor_operation::host::ReferenceGemm<ADataType, a, b, c_host, a_element_op, b_element_op, c_element_op);
BDataType,
CDataType, // Act
AElementwiseOperation, ck::gemm_util::RunDeviceGEMM(
BElementwiseOperation, gemmPtr, params, a, b, c_device, a_element_op, b_element_op, c_element_op);
CElementwiseOperation>;
ck::gemm_util::RunHostGEMM<ReferenceGemmInstance>( // Assert
a, b, c_host, a_element_op, b_element_op, c_element_op); bool res = false;
if(std::is_same<CDataType, float>::value)
// Act {
ck::gemm_util::RunDeviceGEMM( res = ck::utils::check_err(c_device.mData, c_host.mData);
gemmPtr, params, a, b, c_device, a_element_op, b_element_op, c_element_op); std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
}
// Assert else if(std::is_same<CDataType, ck::half_t>::value)
bool res = false; {
if(std::is_same<CDataType, float>::value) res = ck::utils::check_err(c_device.mData, c_host.mData);
{ std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = ck::utils::check_err(c_device.mData, c_host.mData); }
std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl; else if(std::is_same<CDataType, int8_t>::value)
} {
else if(std::is_same<CDataType, ck::half_t>::value) res = ck::utils::check_err(c_device.mData, c_host.mData);
{ std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = ck::utils::check_err(c_device.mData, c_host.mData); }
std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
} return res;
else if(std::is_same<CDataType, int8_t>::value) }
{ };
res = ck::utils::check_err(c_device.mData, c_host.mData);
std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl; template <typename DeviceGemmPtr_,
} typename ALayout,
typename BLayout,
return res; typename CLayout,
} typename AElementwiseOperation,
}; typename BElementwiseOperation,
typename CElementwiseOperation>
template <typename DeviceGemmPtr_, struct TestGemmBF16
typename ALayout, {
typename BLayout, using BF16 = ck::bhalf_t;
typename CLayout,
typename AElementwiseOperation, auto PrepareGemmTensorBF16(const ck::gemm_util::GemmParams& params)
typename BElementwiseOperation, {
typename CElementwiseOperation> auto f_host_tensor_descriptor =
struct TestGemmBF16 [](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
{ if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
using BF16 = ck::bhalf_t; {
return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
auto PrepareGemmTensorBF16(const ck::gemm_util::GemmParams& params) std::vector<std::size_t>({stride, 1}));
{ }
auto f_host_tensor_descriptor = else
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) { {
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value) return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
{ std::vector<std::size_t>({1, stride}));
return HostTensorDescriptor(std::vector<std::size_t>({row, col}), }
std::vector<std::size_t>({stride, 1})); };
}
else // use fp32 host kernel to verify bf16 device kernel
{ Tensor<BF16> a_m_k_bf16(
return HostTensorDescriptor(std::vector<std::size_t>({row, col}), f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{}));
std::vector<std::size_t>({1, stride})); Tensor<BF16> b_k_n_bf16(
} f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{}));
}; Tensor<BF16> c_m_n_device_bf16(
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
// use fp32 host kernel to verify bf16 device kernel
Tensor<BF16> a_m_k_bf16( Tensor<float> a_m_k_fp32(
f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{})); f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{}));
Tensor<BF16> b_k_n_bf16( Tensor<float> b_k_n_fp32(
f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{})); f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{}));
Tensor<BF16> c_m_n_device_bf16( Tensor<float> c_m_n_host_fp32(
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{})); f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
Tensor<float> c_m_n_device_fp32(
Tensor<float> a_m_k_fp32( f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
f_host_tensor_descriptor(params.M, params.K, params.StrideA, ALayout{}));
Tensor<float> b_k_n_fp32( a_m_k_bf16.GenerateTensorValue(GeneratorTensor_3<BF16>{-0.5, 0.5});
f_host_tensor_descriptor(params.K, params.N, params.StrideB, BLayout{})); b_k_n_bf16.GenerateTensorValue(GeneratorTensor_3<BF16>{-0.5, 0.5});
Tensor<float> c_m_n_host_fp32(
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{})); bf16_to_f32_(a_m_k_bf16, a_m_k_fp32);
Tensor<float> c_m_n_device_fp32( bf16_to_f32_(b_k_n_bf16, b_k_n_fp32);
f_host_tensor_descriptor(params.M, params.N, params.StrideC, CLayout{}));
return std::make_tuple(a_m_k_bf16,
a_m_k_bf16.GenerateTensorValue(GeneratorTensor_3<BF16>{-0.5, 0.5}); b_k_n_bf16,
b_k_n_bf16.GenerateTensorValue(GeneratorTensor_3<BF16>{-0.5, 0.5}); c_m_n_device_bf16,
a_m_k_fp32,
bf16_to_f32_(a_m_k_bf16, a_m_k_fp32); b_k_n_fp32,
bf16_to_f32_(b_k_n_bf16, b_k_n_fp32); c_m_n_host_fp32,
c_m_n_device_fp32);
return std::make_tuple(a_m_k_bf16, }
b_k_n_bf16,
c_m_n_device_bf16, auto operator()(DeviceGemmPtr_& gemmPtr)
a_m_k_fp32, {
b_k_n_fp32, // Arrange
c_m_n_host_fp32, ck::gemm_util::GemmParams params;
c_m_n_device_fp32); params.M = 1024;
} params.N = 1024;
params.K = 1024;
auto operator()(DeviceGemmPtr_& gemmPtr) params.StrideA = 1024;
{ params.StrideB = 1024;
// Arrange params.StrideC = 1024;
ck::gemm_util::GemmParams params;
params.M = 1024; auto host_tensors = PrepareGemmTensorBF16(params);
params.N = 1024; const Tensor<BF16>& a_bf16 = std::get<0>(host_tensors);
params.K = 1024; const Tensor<BF16>& b_bf16 = std::get<1>(host_tensors);
params.StrideA = 1024; Tensor<BF16>& c_device_bf16 = std::get<2>(host_tensors);
params.StrideB = 1024; Tensor<float>& a_fp32 = std::get<3>(host_tensors);
params.StrideC = 1024; Tensor<float>& b_fp32 = std::get<4>(host_tensors);
Tensor<float>& c_host_fp32 = std::get<5>(host_tensors);
auto host_tensors = PrepareGemmTensorBF16(params); Tensor<float>& c_device_fp32 = std::get<6>(host_tensors);
const Tensor<BF16>& a_bf16 = std::get<0>(host_tensors);
const Tensor<BF16>& b_bf16 = std::get<1>(host_tensors); auto a_element_op = AElementwiseOperation{};
Tensor<BF16>& c_device_bf16 = std::get<2>(host_tensors); auto b_element_op = BElementwiseOperation{};
Tensor<float>& a_fp32 = std::get<3>(host_tensors); auto c_element_op = CElementwiseOperation{};
Tensor<float>& b_fp32 = std::get<4>(host_tensors);
Tensor<float>& c_host_fp32 = std::get<5>(host_tensors); // use fp32 host kernel to verify bf16 device kernel
Tensor<float>& c_device_fp32 = std::get<6>(host_tensors); using ReferenceGemmInstance =
ck::tensor_operation::host::ReferenceGemm<float,
auto a_element_op = AElementwiseOperation{}; float,
auto b_element_op = BElementwiseOperation{}; float,
auto c_element_op = CElementwiseOperation{}; AElementwiseOperation,
BElementwiseOperation,
// use fp32 host kernel to verify bf16 device kernel CElementwiseOperation>;
using ReferenceGemmInstance = ck::gemm_util::RunHostGEMM<ReferenceGemmInstance>(
ck::tensor_operation::host::ReferenceGemm<float, a_fp32, b_fp32, c_host_fp32, a_element_op, b_element_op, c_element_op);
float,
float, // Act
AElementwiseOperation, ck::gemm_util::RunDeviceGEMM(gemmPtr,
BElementwiseOperation, params,
CElementwiseOperation>; a_bf16,
ck::gemm_util::RunHostGEMM<ReferenceGemmInstance>( b_bf16,
a_fp32, b_fp32, c_host_fp32, a_element_op, b_element_op, c_element_op); c_device_bf16,
a_element_op,
// Act b_element_op,
ck::gemm_util::RunDeviceGEMM(gemmPtr, c_element_op);
params,
a_bf16, bf16_to_f32_(c_device_bf16, c_device_fp32);
b_bf16,
c_device_bf16, // Assert
a_element_op, bool res = ck::utils::check_err(
b_element_op, c_device_fp32.mData, c_host_fp32.mData, "Error: incorrect results!", 1e-2f, 1e-3f);
c_element_op); std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
bf16_to_f32_(c_device_bf16, c_device_fp32); return res;
};
// Assert };
bool res = ck::utils::check_err(
c_device_fp32.mData, c_host_fp32.mData, "Error: incorrect results!", 1e-2f, 1e-3f); } // namespace gemm_util
std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl; } // namespace ck
#endif
return res;
};
};
} // namespace gemm_util
} // namespace ck
#endif
...@@ -16,22 +16,22 @@ int main() ...@@ -16,22 +16,22 @@ int main()
pass = pass && pass = pass &&
ck::profiler:: ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Row, Row>( profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Row, Row>(
true, 1, false, 1, M, N, K, K, N, N); true, 1, false, false, M, N, K, K, N, N);
pass = pass && pass = pass &&
ck::profiler:: ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Col, Row>( profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Col, Row>(
true, 1, false, 1, M, N, K, K, K, N); true, 1, false, false, M, N, K, K, K, N);
pass = pass && pass = pass &&
ck::profiler:: ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Row, Row>( profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Row, Row>(
true, 1, false, 1, M, N, K, M, N, N); true, 1, false, false, M, N, K, M, N, N);
pass = pass && pass = pass &&
ck::profiler:: ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Col, Row>( profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Col, Row>(
true, 1, false, 1, M, N, K, M, K, N); true, 1, false, false, M, N, K, M, K, N);
if(pass) if(pass)
{ {
......
...@@ -45,7 +45,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result) ...@@ -45,7 +45,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
{ {
float max_diff = 1e-6; float max_diff = 1e-6;
for(int i = 0; i < ref.mData.size(); ++i) for(std::size_t i = 0; i < ref.mData.size(); ++i)
{ {
float diff = std::abs(double(ref.mData[i]) - double(result.mData[i])); float diff = std::abs(double(ref.mData[i]) - double(result.mData[i]));
if(max_diff < diff) if(max_diff < diff)
...@@ -187,9 +187,10 @@ int test_gemm(const gemmArgs& args) ...@@ -187,9 +187,10 @@ int test_gemm(const gemmArgs& args)
if(gemm_ptr->IsSupportedArgument(argument_ptr.get())) if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{ {
invoker_ptr->Run(argument_ptr.get(), 0); invoker_ptr->Run(argument_ptr.get());
c_device_buf.FromDevice(c_m_n_device_result.mData.data()); c_device_buf.FromDevice(c_m_n_device_result.mData.data());
if(!check_out(c_m_n_host_result, c_m_n_device_result)) if(!check_out(c_m_n_host_result, c_m_n_device_result))
{ {
success = false; success = false;
......
...@@ -104,7 +104,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr) ...@@ -104,7 +104,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors_device.reserve(group_count); b_tensors_device.reserve(group_count);
c_tensors_device.reserve(group_count); c_tensors_device.reserve(group_count);
for(int i = 0; i < gemm_shapes.size(); i++) for(std::size_t i = 0; i < gemm_shapes.size(); i++)
{ {
a_tensors.emplace_back(Tensor<ADataType>(f_host_tensor_descriptor( a_tensors.emplace_back(Tensor<ADataType>(f_host_tensor_descriptor(
gemm_shapes[i].M, gemm_shapes[i].K, gemm_shapes[i].StrideA, ALayout{}))); gemm_shapes[i].M, gemm_shapes[i].K, gemm_shapes[i].StrideA, ALayout{})));
...@@ -119,7 +119,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr) ...@@ -119,7 +119,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors[i].GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5}); b_tensors[i].GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5});
} }
for(int i = 0; i < gemm_shapes.size(); i++) for(std::size_t i = 0; i < gemm_shapes.size(); i++)
{ {
a_tensors_device.emplace_back( a_tensors_device.emplace_back(
std::make_unique<DeviceMem>(sizeof(ADataType) * a_tensors[i].mDesc.GetElementSize())); std::make_unique<DeviceMem>(sizeof(ADataType) * a_tensors[i].mDesc.GetElementSize()));
...@@ -147,7 +147,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr) ...@@ -147,7 +147,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
invoker_ptr->Run(argument_ptr.get()); invoker_ptr->Run(argument_ptr.get());
for(int i = 0; i < gemm_shapes.size(); i++) for(std::size_t i = 0; i < gemm_shapes.size(); i++)
{ {
c_tensors_device[i]->FromDevice(c_device_tensors[i].mData.data()); c_tensors_device[i]->FromDevice(c_device_tensors[i].mData.data());
......
...@@ -460,7 +460,7 @@ class SimpleAppArgs ...@@ -460,7 +460,7 @@ class SimpleAppArgs
int processArgs(int argc, char* argv[]) int processArgs(int argc, char* argv[])
{ {
unsigned int ch; int ch;
while(1) while(1)
{ {
......
...@@ -9,7 +9,7 @@ namespace reduce_util { ...@@ -9,7 +9,7 @@ namespace reduce_util {
template <typename T> template <typename T>
void to_f32_vector(const Tensor<T>& src, Tensor<float>& dst) void to_f32_vector(const Tensor<T>& src, Tensor<float>& dst)
{ {
for(int i = 0; i < src.mData.size(); ++i) for(std::size_t i = 0; i < src.mData.size(); ++i)
dst.mData[i] = type_convert<float>(src.mData[i]); dst.mData[i] = type_convert<float>(src.mData[i]);
} }
......
...@@ -463,7 +463,7 @@ class SimpleAppArgs ...@@ -463,7 +463,7 @@ class SimpleAppArgs
int processArgs(int argc, char* argv[]) int processArgs(int argc, char* argv[])
{ {
unsigned int ch; int ch;
while(1) while(1)
{ {
......
add_test_executable(test_reference_conv_fwd reference_conv_fwd.cpp) add_gtest_executable(test_reference_conv_fwd reference_conv_fwd.cpp)
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_fwd_util) target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_util)
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
#include <numeric> #include <numeric>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "gtest/gtest.h"
#include "check_err.hpp" #include "check_err.hpp"
#include "config.hpp" #include "config.hpp"
#include "conv_fwd_util.hpp" #include "conv_util.hpp"
#include "element_wise_operation.hpp" #include "element_wise_operation.hpp"
#include "fill.hpp" #include "fill.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
...@@ -33,21 +34,21 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params, ...@@ -33,21 +34,21 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
const FillInputOp& fill_input_op = FillInputOp{}, const FillInputOp& fill_input_op = FillInputOp{},
const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f}) const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f})
{ {
std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N), std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_),
static_cast<std::size_t>(params.C)}; static_cast<std::size_t>(params.C_)};
input_dims.insert(std::end(input_dims), input_dims.insert(std::end(input_dims),
std::begin(params.input_spatial_lengths), std::begin(params.input_spatial_lengths_),
std::end(params.input_spatial_lengths)); std::end(params.input_spatial_lengths_));
std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K), std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_),
static_cast<std::size_t>(params.C)}; static_cast<std::size_t>(params.C_)};
filter_dims.insert(std::end(filter_dims), filter_dims.insert(std::end(filter_dims),
std::begin(params.filter_spatial_lengths), std::begin(params.filter_spatial_lengths_),
std::end(params.filter_spatial_lengths)); std::end(params.filter_spatial_lengths_));
const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths(); const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths();
std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N), std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_),
static_cast<std::size_t>(params.K)}; static_cast<std::size_t>(params.K_)};
output_dims.insert(std::end(output_dims), output_dims.insert(std::end(output_dims),
std::begin(output_spatial_lengths), std::begin(output_spatial_lengths),
std::end(output_spatial_lengths)); std::end(output_spatial_lengths));
...@@ -73,32 +74,32 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params, ...@@ -73,32 +74,32 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
auto ref_argument = ref_conv.MakeArgument(input, auto ref_argument = ref_conv.MakeArgument(input,
weights, weights,
host_output, host_output,
params.conv_filter_strides, params.conv_filter_strides_,
params.conv_filter_dilations, params.conv_filter_dilations_,
params.input_left_pads, params.input_left_pads_,
params.input_right_pads, params.input_right_pads_,
InElementOp{}, InElementOp{},
WeiElementOp{}, WeiElementOp{},
OutElementOp{}); OutElementOp{});
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
// std::cout <<"output: " << host_output.mDesc << std::endl << host_output.mData << std::endl;
return host_output; return host_output;
} }
bool test_conv2d_nhwc() } // anonymous namespace
TEST(ReferenceConvolutionFWD, Conv2DNHWC)
{ {
bool res{true};
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParams params;
params.N = 1; params.N_ = 1;
params.K = 1; params.K_ = 1;
params.C = 2; params.C_ = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths = std::vector<ck::index_t>{6, 6}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6};
params.conv_filter_strides = std::vector<ck::index_t>{1, 1}; params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
params.conv_filter_dilations = std::vector<ck::index_t>{1, 1}; params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1};
params.input_left_pads = std::vector<ck::index_t>{0, 0}; params.input_left_pads_ = std::vector<ck::index_t>{0, 0};
params.input_right_pads = std::vector<ck::index_t>{0, 0}; params.input_right_pads_ = std::vector<ck::index_t>{0, 0};
auto out_tensor = run_reference_convolution_forward<2>(params); auto out_tensor = run_reference_convolution_forward<2>(params);
std::vector<std::size_t> ref_dims{1, 1, 4, 4}; std::vector<std::size_t> ref_dims{1, 1, 4, 4};
...@@ -118,51 +119,50 @@ bool test_conv2d_nhwc() ...@@ -118,51 +119,50 @@ bool test_conv2d_nhwc()
472.5, 472.5,
490.5, 490.5,
508.5}; 508.5};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(
ref_dims, out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
"Error: wrong output tensor dimensions!"); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
res = res && ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"); }
params.N = 1; TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
params.K = 2; {
params.C = 2; ck::utils::conv::ConvParams params;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3}; params.N_ = 1;
params.input_spatial_lengths = std::vector<ck::index_t>{12, 12}; params.K_ = 2;
params.conv_filter_strides = std::vector<ck::index_t>{2, 2}; params.C_ = 2;
params.conv_filter_dilations = std::vector<ck::index_t>{2, 2}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_left_pads = std::vector<ck::index_t>{1, 1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12};
params.input_right_pads = std::vector<ck::index_t>{1, 1}; params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
out_tensor = run_reference_convolution_forward<2>(params); auto out_tensor = run_reference_convolution_forward<2>(params);
ref_dims = std::vector<std::size_t>{1, 2, 5, 5}; std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5};
ref_data = std::vector<float>{ std::vector<float> ref_data{
210., 210., 327., 327., 351., 351., 375., 375., 399., 399., 210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5, 459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
747., 747., 1138.5, 1138.5, 1174.5, 1174.5, 1210.5, 1210.5, 1246.5, 1246.5, 747., 747., 1138.5, 1138.5, 1174.5, 1174.5, 1210.5, 1210.5, 1246.5, 1246.5,
1035., 1035., 1570.5, 1570.5, 1606.5, 1606.5, 1642.5, 1642.5, 1678.5, 1678.5, 1035., 1035., 1570.5, 1570.5, 1606.5, 1606.5, 1642.5, 1642.5, 1678.5, 1678.5,
1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5}; 1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(
ref_dims, out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
"Error: wrong output tensor dimensions!"); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
res = res && ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!");
return res;
} }
bool test_conv1d_nwc() TEST(ReferenceConvolutionFWD, Conv1DNWC)
{ {
bool res{true};
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1; params.num_dim_spatial_ = 1;
params.N = 1; params.N_ = 1;
params.K = 1; params.K_ = 1;
params.C = 2; params.C_ = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{6}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6};
params.conv_filter_strides = std::vector<ck::index_t>{1}; params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations = std::vector<ck::index_t>{1}; params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads = std::vector<ck::index_t>{0}; params.input_left_pads_ = std::vector<ck::index_t>{0};
params.input_right_pads = std::vector<ck::index_t>{0}; params.input_right_pads_ = std::vector<ck::index_t>{0};
auto out_tensor = auto out_tensor =
run_reference_convolution_forward<1, run_reference_convolution_forward<1,
...@@ -174,46 +174,53 @@ bool test_conv1d_nwc() ...@@ -174,46 +174,53 @@ bool test_conv1d_nwc()
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::NWK>(params);
std::vector<std::size_t> ref_dims{1, 1, 4}; std::vector<std::size_t> ref_dims{1, 1, 4};
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5}; std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(
ref_dims, out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
"Error: wrong output tensor dimensions!"); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
res = res && ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"); }
params.num_dim_spatial = 1; TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding)
params.N = 1; {
params.K = 2; ck::utils::conv::ConvParams params;
params.C = 2; params.num_dim_spatial_ = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.N_ = 1;
params.input_spatial_lengths = std::vector<ck::index_t>{12}; params.K_ = 2;
params.conv_filter_strides = std::vector<ck::index_t>{2}; params.C_ = 2;
params.conv_filter_dilations = std::vector<ck::index_t>{2}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12};
params.input_right_pads = std::vector<ck::index_t>{1}; params.conv_filter_strides_ = std::vector<ck::index_t>{2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
out_tensor = run_reference_convolution_forward<1, auto out_tensor =
float, run_reference_convolution_forward<1,
float, float,
float, float,
ck::tensor_layout::convolution::NWC, float,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::KXC,
ref_dims = std::vector<std::size_t>{1, 2, 5}; ck::tensor_layout::convolution::NWK>(params);
ref_data = std::vector<float>{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5}; std::vector<std::size_t> ref_dims{1, 2, 5};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
ref_dims, EXPECT_TRUE(ck::utils::check_err(
"Error: wrong output tensor dimensions!"); out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
res = res && ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
params.num_dim_spatial = 1; TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
params.N = 2; {
params.K = 16; ck::utils::conv::ConvParams params;
params.C = 4; params.num_dim_spatial_ = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3}; params.N_ = 2;
params.input_spatial_lengths = std::vector<ck::index_t>{16}; params.K_ = 16;
params.conv_filter_strides = std::vector<ck::index_t>{1}; params.C_ = 4;
params.conv_filter_dilations = std::vector<ck::index_t>{1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_left_pads = std::vector<ck::index_t>{1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{16};
params.input_right_pads = std::vector<ck::index_t>{1}; params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
auto out_tensor2 = run_reference_convolution_forward<1, auto out_tensor2 = run_reference_convolution_forward<1,
float, float,
...@@ -224,8 +231,8 @@ bool test_conv1d_nwc() ...@@ -224,8 +231,8 @@ bool test_conv1d_nwc()
ck::tensor_layout::convolution::NWK>( ck::tensor_layout::convolution::NWK>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
ref_dims = std::vector<std::size_t>{2, 16, 16}; std::vector<std::size_t> ref_dims{2, 16, 16};
ref_data = std::vector<float>{ std::vector<float> ref_data{
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3,
...@@ -290,28 +297,24 @@ bool test_conv1d_nwc() ...@@ -290,28 +297,24 @@ bool test_conv1d_nwc()
72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9,
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4,
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4}; 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4};
res = res && ck::utils::check_err(out_tensor2.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(
ref_dims, out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
"Error: wrong output tensor dimensions!"); EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
res = res && ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!");
return res;
} }
bool test_conv3d_ncdhw() TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
{ {
bool res{true};
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParams params;
params.num_dim_spatial = 3; params.num_dim_spatial_ = 3;
params.N = 1; params.N_ = 1;
params.K = 1; params.K_ = 1;
params.C = 2; params.C_ = 2;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 3}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_spatial_lengths = std::vector<ck::index_t>{6, 6, 6}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6};
params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1}; params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations = std::vector<ck::index_t>{1, 1, 1}; params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads = std::vector<ck::index_t>{0, 0, 0}; params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
params.input_right_pads = std::vector<ck::index_t>{0, 0, 0}; params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
auto out_tensor = run_reference_convolution_forward<3, auto out_tensor = run_reference_convolution_forward<3,
float, float,
...@@ -331,32 +334,37 @@ bool test_conv3d_ncdhw() ...@@ -331,32 +334,37 @@ bool test_conv3d_ncdhw()
634.5, 637.2, 639.9, 642.60004, 650.7, 653.4, 656.10004, 658.8, 634.5, 637.2, 639.9, 642.60004, 650.7, 653.4, 656.10004, 658.8,
699.3, 702., 704.7, 707.4, 715.5, 718.2, 720.9, 723.60004, 699.3, 702., 704.7, 707.4, 715.5, 718.2, 720.9, 723.60004,
731.7, 734.4001, 737.10004, 739.8, 747.9001, 750.60004, 753.3, 756.}; 731.7, 734.4001, 737.10004, 739.8, 747.9001, 750.60004, 753.3, 756.};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(),
ref_dims, ref_dims,
"Error [case 1]: wrong output tensor dimensions!"); "Error [case 1]: wrong output tensor dimensions!"));
res = res && EXPECT_TRUE(
ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"); ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"));
}
params.N = 1; TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations)
params.K = 2; {
params.C = 2; ck::utils::conv::ConvParams params;
params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 3}; params.num_dim_spatial_ = 3;
params.input_spatial_lengths = std::vector<ck::index_t>{12, 12, 12}; params.N_ = 1;
params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3}; params.K_ = 2;
params.conv_filter_dilations = std::vector<ck::index_t>{1, 1, 1}; params.C_ = 2;
params.input_left_pads = std::vector<ck::index_t>{0, 0, 0}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_right_pads = std::vector<ck::index_t>{0, 0, 0}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12};
params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
out_tensor = run_reference_convolution_forward<3, auto out_tensor = run_reference_convolution_forward<3,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NCDHW, ck::tensor_layout::convolution::NCDHW,
ck::tensor_layout::convolution::KCZYX, ck::tensor_layout::convolution::KCZYX,
ck::tensor_layout::convolution::NKDHW>( ck::tensor_layout::convolution::NKDHW>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
ref_dims = std::vector<std::size_t>{1, 2, 4, 4, 4}; std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4};
ref_data = std::vector<float>{ std::vector<float> ref_data{
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002, 2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6, 2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,
3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999, 3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999,
...@@ -373,26 +381,9 @@ bool test_conv3d_ncdhw() ...@@ -373,26 +381,9 @@ bool test_conv3d_ncdhw()
5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004, 5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004,
6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4, 6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4,
6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801}; 6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801};
res = res && ck::utils::check_err(out_tensor.mDesc.GetLengths(), EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(),
ref_dims, ref_dims,
"Error [case 2]: wrong output tensor dimensions!"); "Error [case 2]: wrong output tensor dimensions!"));
res = EXPECT_TRUE(ck::utils::check_err(
res && ck::utils::check_err( out_tensor.mData, ref_data, "Error [case 2]: incorrect results!", 1e-4f, 1e-6f));
out_tensor.mData, ref_data, "Error [case 2]: incorrect results!", 1e-4f, 1e-6f);
return res;
}
} // anonymous namespace
int main(void)
{
bool res{true};
res = test_conv2d_nhwc();
std::cout << "test_conv2d_nhwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv1d_nwc();
std::cout << "TestConv1DNHWC ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv3d_ncdhw();
std::cout << "test_conv3d_ncdhw ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment