Commit aea62819 authored by Chaitanya Inumella's avatar Chaitanya Inumella
Browse files

Rebase branch 'develop' of...

Rebase branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into contraction_hipTENSOR
parents 75af5450 75ab874e
add_custom_target(test_layernorm)
add_gtest_executable(test_layernorm_fp32 test_layernorm_fp32.cpp)
add_gtest_executable(test_layernorm_fp16 test_layernorm_fp16.cpp)
target_link_libraries(test_layernorm_fp32 PRIVATE utility)
target_link_libraries(test_layernorm_fp16 PRIVATE utility)
add_dependencies(test_layernorm test_layernorm_fp32)
add_dependencies(test_layernorm test_layernorm_fp16)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_layernorm_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestLayernormFP16 : public ck::TestLayernorm<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, , GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>
>;
// clang-format on
TYPED_TEST_SUITE(TestLayernormFP16, KernelTypes);
TYPED_TEST(TestLayernormFP16, Test_FP16) { this->Run(); }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_layernorm_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestLayernormFP32 : public ck::TestLayernorm<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, , GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>
>;
// clang-format on
TYPED_TEST_SUITE(TestLayernormFP32, KernelTypes);
TYPED_TEST(TestLayernormFP32, Test_FP32) { this->Run(); }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/device_layernorm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
namespace ck {
template <typename Range>
std::string serialize_range(const Range& range)
{
std::stringstream ss;
for(auto& r : range)
{
ss << r << ", ";
}
std::string str = ss.str();
return std::string(str.begin(), str.end() - 2);
}
template <typename Tuple>
class TestLayernorm : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using AccDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
static constexpr index_t Rank = std::tuple_element_t<5, Tuple>{}.value;
static constexpr index_t NumReduceDim = std::tuple_element_t<6, Tuple>{}.value;
static constexpr index_t BlockSize = std::tuple_element_t<7, Tuple>{}.value;
static constexpr index_t MThreadClusterSize = std::tuple_element_t<8, Tuple>{}.value;
static constexpr index_t KThreadClusterSize = std::tuple_element_t<9, Tuple>{}.value;
static constexpr index_t MThreadSliceSize = std::tuple_element_t<10, Tuple>{}.value;
static constexpr index_t KThreadSliceSize = std::tuple_element_t<11, Tuple>{}.value;
static constexpr index_t XYSrcVectorDim = std::tuple_element_t<12, Tuple>{}.value;
static constexpr index_t XSrcVectorSize = std::tuple_element_t<13, Tuple>{}.value;
static constexpr index_t GammaSrcVectorSize = std::tuple_element_t<14, Tuple>{}.value;
static constexpr index_t BetaSrcVectorSize = std::tuple_element_t<15, Tuple>{}.value;
static constexpr index_t YDstVectorSize = std::tuple_element_t<16, Tuple>{}.value;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using ReferenceInstance = tensor_operation::host::ReferenceLayernorm<XDataType,
GammaDataType,
BetaDataType,
YDataType,
AccDataType,
PassThrough,
Rank,
NumReduceDim>;
using DeviceInstance = tensor_operation::device::DeviceLayernorm<XDataType,
GammaDataType,
BetaDataType,
AccDataType,
YDataType,
PassThrough,
Rank,
NumReduceDim,
BlockSize,
MThreadClusterSize,
KThreadClusterSize,
MThreadSliceSize,
KThreadSliceSize,
XYSrcVectorDim,
XSrcVectorSize,
GammaSrcVectorSize,
BetaSrcVectorSize,
YDstVectorSize>;
TestLayernorm() : ref_instance_invoker_(ReferenceInstance{}.MakeInvoker()) {}
void RunSingle(std::vector<index_t> lengths, std::vector<index_t> reduceDims)
{
std::vector<index_t> reduceLength(reduceDims.size());
for(int i = 0; i < NumReduceDim; ++i)
{
reduceLength[i] = lengths[reduceDims[i]];
}
Tensor<XDataType> x(lengths);
Tensor<GammaDataType> gamma(reduceLength);
Tensor<BetaDataType> beta(reduceLength);
Tensor<YDataType> y(lengths);
Tensor<YDataType> y_ref(lengths);
x.GenerateTensorValue(GeneratorTensor_3<XDataType>{0, 1.0});
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
beta.GenerateTensorValue(GeneratorTensor_3<BetaDataType>{0.0, 1.0});
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpaceSize());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpaceSize());
x_dev.ToDevice(x.mData.data());
gamma_dev.ToDevice(gamma.mData.data());
beta_dev.ToDevice(beta.mData.data());
auto device_instance = DeviceInstance{};
auto argument_ptr = device_instance.MakeArgumentPointer(
lengths,
std::vector<ck::index_t>{x.mDesc.GetStrides().begin(), x.mDesc.GetStrides().end()},
std::vector<ck::index_t>{gamma.mDesc.GetStrides().begin(),
gamma.mDesc.GetStrides().end()},
std::vector<ck::index_t>{beta.mDesc.GetStrides().begin(),
beta.mDesc.GetStrides().end()},
reduceDims,
1e-4,
x_dev.GetDeviceBuffer(),
gamma_dev.GetDeviceBuffer(),
beta_dev.GetDeviceBuffer(),
y_dev.GetDeviceBuffer(),
PassThrough{});
if(!device_instance.IsSupportedArgument(argument_ptr.get()))
{
return;
}
auto invoker_ptr = device_instance.MakeInvokerPointer();
invoker_ptr->Run(argument_ptr.get());
ref_instance_invoker_.Run(
{x, gamma, beta, y_ref, PassThrough{}, lengths, reduceDims, 1e-4});
y_dev.FromDevice(y.mData.data());
bool pass;
if(std::is_same<XDataType, int8_t>::value)
{
EXPECT_TRUE(pass = ck::utils::check_err(
y.mData, y_ref.mData, "Error: Incorrect results!", 0, 1));
}
else
{
EXPECT_TRUE(pass = ck::utils::check_err(
y.mData, y_ref.mData, "Error: Incorrect results d1", 1e-3, 1e-3));
}
if(!pass)
{
FAIL() << "Failure in input lengths = [" << serialize_range(lengths) << "], "
<< "reduce dim = [" << serialize_range(reduceDims) << "].";
}
}
void Run()
{
for(auto length : this->lengths_)
{
this->RunSingle(length, reduceDims_[0]);
}
}
std::vector<std::vector<index_t>> lengths_ = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
std::vector<std::vector<index_t>> reduceDims_ = {{1}};
typename ReferenceInstance::Invoker ref_instance_invoker_;
};
} // namespace ck
add_test_executable(test_magic_number_division magic_number_division.cpp) add_test_executable(test_magic_number_division magic_number_division.cpp)
target_link_libraries(test_magic_number_division PRIVATE host_tensor) target_link_libraries(test_magic_number_division PRIVATE utility)
...@@ -9,9 +9,9 @@ ...@@ -9,9 +9,9 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/utility/magic_division.hpp" #include "ck/utility/magic_division.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp" #include "ck/library/utility/host_tensor_generator.hpp"
__global__ void gpu_magic_number_division(uint32_t magic_multiplier, __global__ void gpu_magic_number_division(uint32_t magic_multiplier,
uint32_t magic_shift, uint32_t magic_shift,
......
add_test_executable(test_reduce_no_index reduce_no_index.cpp) add_test_executable(test_reduce_no_index reduce_no_index.cpp)
add_test_executable(test_reduce_with_index reduce_with_index.cpp) add_test_executable(test_reduce_with_index reduce_with_index.cpp)
target_link_libraries(test_reduce_no_index PRIVATE host_tensor) target_link_libraries(test_reduce_no_index PRIVATE utility)
target_link_libraries(test_reduce_no_index PRIVATE device_reduce_instance) target_link_libraries(test_reduce_no_index PRIVATE device_reduce_instance)
target_link_libraries(test_reduce_with_index PRIVATE host_tensor) target_link_libraries(test_reduce_with_index PRIVATE utility)
target_link_libraries(test_reduce_with_index PRIVATE device_reduce_instance) target_link_libraries(test_reduce_with_index PRIVATE device_reduce_instance)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <getopt.h> #include <getopt.h>
#include "ck/library/host_tensor/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp" #include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <getopt.h> #include <getopt.h>
#include "ck/library/host_tensor/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp" #include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
add_gtest_executable(test_reference_conv_fwd reference_conv_fwd.cpp) add_gtest_executable(test_reference_conv_fwd reference_conv_fwd.cpp)
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_util) target_link_libraries(test_reference_conv_fwd PRIVATE utility)
...@@ -13,74 +13,64 @@ ...@@ -13,74 +13,64 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp" #include "ck/library/utility/fill.hpp"
#include "ck/library/host_tensor/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
namespace { namespace {
using InElementOp = ck::tensor_operation::element_wise::PassThrough; using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough; using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough; using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
template <ck::index_t NDim, template <ck::index_t NDimSpatial,
typename InDataType = float, typename InDataType = float,
typename WeiDataType = float, typename WeiDataType = float,
typename OutDataType = float, typename OutDataType = float,
typename InLayout = ck::tensor_layout::convolution::NHWC, typename InLayout = ck::tensor_layout::convolution::GNHWC,
typename WeiLayout = ck::tensor_layout::convolution::KYXC, typename WeiLayout = ck::tensor_layout::convolution::GKYXC,
typename OutLayout = ck::tensor_layout::convolution::NHWK, typename OutLayout = ck::tensor_layout::convolution::GNHWK,
typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>, typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>,
typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>> typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>>
Tensor<OutDataType> Tensor<OutDataType>
run_reference_convolution_forward(const ck::utils::conv::ConvParams& params, run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
const FillInputOp& fill_input_op = FillInputOp{}, const FillInputOp& fill_input_op = FillInputOp{},
const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f}) const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f})
{ {
std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_), const auto in_g_n_c_wis_desc =
static_cast<std::size_t>(params.C_)}; ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_param);
input_dims.insert(std::end(input_dims),
std::begin(params.input_spatial_lengths_),
std::end(params.input_spatial_lengths_));
std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_), const auto wei_g_k_c_xs_desc =
static_cast<std::size_t>(params.C_)}; ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(conv_param);
filter_dims.insert(std::end(filter_dims),
std::begin(params.filter_spatial_lengths_),
std::end(params.filter_spatial_lengths_));
const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths(); const auto out_g_n_k_wos_desc =
std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_), ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(conv_param);
static_cast<std::size_t>(params.K_)};
output_dims.insert(std::end(output_dims),
std::begin(output_spatial_lengths),
std::end(output_spatial_lengths));
Tensor<InDataType> input(ck::utils::conv::get_host_tensor_descriptor(input_dims, InLayout{})); Tensor<InDataType> input(in_g_n_c_wis_desc);
Tensor<WeiDataType> weights( Tensor<WeiDataType> weights(wei_g_k_c_xs_desc);
ck::utils::conv::get_host_tensor_descriptor(filter_dims, WeiLayout{})); Tensor<OutDataType> host_output(out_g_n_k_wos_desc);
Tensor<OutDataType> host_output(
ck::utils::conv::get_host_tensor_descriptor(output_dims, OutLayout{}));
fill_input_op(input.begin(), input.end()); fill_input_op(input.begin(), input.end());
fill_weights_op(weights.begin(), weights.end()); fill_weights_op(weights.begin(), weights.end());
std::fill(host_output.begin(), host_output.end(), OutDataType(0.f)); std::fill(host_output.begin(), host_output.end(), OutDataType(0.f));
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<InDataType, auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
InDataType,
WeiDataType, WeiDataType,
OutDataType, OutDataType,
InElementOp, InElementOp,
WeiElementOp, WeiElementOp,
OutElementOp, OutElementOp>();
NDim>();
auto ref_invoker = ref_conv.MakeInvoker(); auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(input, auto ref_argument = ref_conv.MakeArgument(input,
weights, weights,
host_output, host_output,
params.conv_filter_strides_, conv_param.conv_filter_strides_,
params.conv_filter_dilations_, conv_param.conv_filter_dilations_,
params.input_left_pads_, conv_param.input_left_pads_,
params.input_right_pads_, conv_param.input_right_pads_,
InElementOp{}, InElementOp{},
WeiElementOp{}, WeiElementOp{},
OutElementOp{}); OutElementOp{});
...@@ -91,21 +81,29 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params, ...@@ -91,21 +81,29 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
} // anonymous namespace } // anonymous namespace
TEST(ReferenceConvolutionFWD, Conv2DNHWC) // Eeference convolution assume dimensions of tensor descriptors are in GNCDHW/GKCZYX/GNKDHW order,
// regardless of physical tensor layouts in memory.
// Some tests below assume dimensions of tensor descriptors can be in other order, and therefore
// are disabled
// TODO: add more tests, which comply with assumption about dimension order of reference convolution
// and add tests for more physical layout
#if 0
TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(2,
params.N_ = 1; 1,
params.K_ = 1; 1,
params.C_ = 2; 1,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3}; 2,
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6}; std::vector<ck::index_t>{3, 3},
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1}; std::vector<ck::index_t>{6, 6},
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1}; std::vector<ck::index_t>{1, 1},
params.input_left_pads_ = std::vector<ck::index_t>{0, 0}; std::vector<ck::index_t>{1, 1},
params.input_right_pads_ = std::vector<ck::index_t>{0, 0}; std::vector<ck::index_t>{0, 0},
std::vector<ck::index_t>{0, 0});
auto out_tensor = run_reference_convolution_forward<2>(params); auto out_tensor = run_reference_convolution_forward<2>(conv_param);
std::vector<std::size_t> ref_dims{1, 1, 4, 4}; std::vector<std::size_t> ref_dims{1, 1, 4, 4, 1};
std::vector<float> ref_data{130.5, std::vector<float> ref_data{130.5,
148.5, 148.5,
166.5, 166.5,
...@@ -127,21 +125,22 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWC) ...@@ -127,21 +125,22 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWC)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
} }
TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding) TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(2,
params.N_ = 1; 1,
params.K_ = 2; 1,
params.C_ = 2; 2,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3}; 2,
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12}; std::vector<ck::index_t>{3, 3},
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2}; std::vector<ck::index_t>{12, 12},
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2}; std::vector<ck::index_t>{2, 2},
params.input_left_pads_ = std::vector<ck::index_t>{1, 1}; std::vector<ck::index_t>{2, 2},
params.input_right_pads_ = std::vector<ck::index_t>{1, 1}; std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{1, 1});
auto out_tensor = run_reference_convolution_forward<2>(params); auto out_tensor = run_reference_convolution_forward<2>(conv_param);
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5}; std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 5, 5, 2};
std::vector<float> ref_data{ std::vector<float> ref_data{
210., 210., 327., 327., 351., 351., 375., 375., 399., 399., 210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5, 459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
...@@ -153,88 +152,88 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding) ...@@ -153,88 +152,88 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
} }
TEST(ReferenceConvolutionFWD, Conv1DNWC) TEST(ReferenceConvolutionFWD, Conv1DGNWC)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(1,
params.num_dim_spatial_ = 1; 1,
params.N_ = 1; 1,
params.K_ = 1; 1,
params.C_ = 2; 2,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; std::vector<ck::index_t>{3},
params.input_spatial_lengths_ = std::vector<ck::index_t>{6}; std::vector<ck::index_t>{6},
params.conv_filter_strides_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.conv_filter_dilations_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.input_left_pads_ = std::vector<ck::index_t>{0}; std::vector<ck::index_t>{0},
params.input_right_pads_ = std::vector<ck::index_t>{0}; std::vector<ck::index_t>{0});
auto out_tensor = auto out_tensor =
run_reference_convolution_forward<1, run_reference_convolution_forward<1,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NWC, ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::GNWK>(conv_param);
std::vector<std::size_t> ref_dims{1, 1, 4}; std::vector<std::size_t> ref_dims{1, 1, 4, 1};
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5}; std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
} }
TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding) TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(1,
params.num_dim_spatial_ = 1; 1,
params.N_ = 1; 1,
params.K_ = 2; 2,
params.C_ = 2; 2,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; std::vector<ck::index_t>{3},
params.input_spatial_lengths_ = std::vector<ck::index_t>{12}; std::vector<ck::index_t>{12},
params.conv_filter_strides_ = std::vector<ck::index_t>{2}; std::vector<ck::index_t>{2},
params.conv_filter_dilations_ = std::vector<ck::index_t>{2}; std::vector<ck::index_t>{2},
params.input_left_pads_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.input_right_pads_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1});
auto out_tensor = auto out_tensor =
run_reference_convolution_forward<1, run_reference_convolution_forward<1,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NWC, ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::NWK>(params); ck::tensor_layout::convolution::GNWK>(conv_param);
std::vector<std::size_t> ref_dims{1, 2, 5}; std::vector<std::size_t> ref_dims{1, 1, 5, 2};
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5}; std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
} }
TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize) TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(1,
params.num_dim_spatial_ = 1; 1,
params.N_ = 2; 2,
params.K_ = 16; 16,
params.C_ = 4; 4,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; std::vector<ck::index_t>{3},
params.input_spatial_lengths_ = std::vector<ck::index_t>{16}; std::vector<ck::index_t>{16},
params.conv_filter_strides_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.conv_filter_dilations_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.input_left_pads_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1},
params.input_right_pads_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t>{1});
auto out_tensor2 = run_reference_convolution_forward<1, auto out_tensor2 = run_reference_convolution_forward<1,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NWC, ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::KXC, ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::NWK>( ck::tensor_layout::convolution::GNWK>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{2, 16, 16}; std::vector<std::size_t> ref_dims{1, 2, 16, 16};
std::vector<float> ref_data{ std::vector<float> ref_data{
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
...@@ -304,30 +303,31 @@ TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize) ...@@ -304,30 +303,31 @@ TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!")); EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
} }
#endif
TEST(ReferenceConvolutionFWD, Conv3DNCDHW) TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(3,
params.num_dim_spatial_ = 3; 1,
params.N_ = 1; 1,
params.K_ = 1; 1,
params.C_ = 2; 2,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; std::vector<ck::index_t>{3, 3, 3},
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6}; std::vector<ck::index_t>{6, 6, 6},
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t>{1, 1, 1},
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t>{1, 1, 1},
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0}; std::vector<ck::index_t>{0, 0, 0},
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0}; std::vector<ck::index_t>{0, 0, 0});
auto out_tensor = run_reference_convolution_forward<3, auto out_tensor = run_reference_convolution_forward<3,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NCDHW, ck::tensor_layout::convolution::GNCDHW,
ck::tensor_layout::convolution::KCZYX, ck::tensor_layout::convolution::GKCZYX,
ck::tensor_layout::convolution::NKDHW>( ck::tensor_layout::convolution::GNKDHW>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4}; std::vector<std::size_t> ref_dims{1, 1, 1, 4, 4, 4};
std::vector<float> ref_data{ std::vector<float> ref_data{
407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432., 407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432.,
440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002, 440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002,
...@@ -344,29 +344,29 @@ TEST(ReferenceConvolutionFWD, Conv3DNCDHW) ...@@ -344,29 +344,29 @@ TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!")); ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"));
} }
TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations) TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
{ {
ck::utils::conv::ConvParams params; ck::utils::conv::ConvParam conv_param(3,
params.num_dim_spatial_ = 3; 1,
params.N_ = 1; 1,
params.K_ = 2; 2,
params.C_ = 2; 2,
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; std::vector<ck::index_t>{3, 3, 3},
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12}; std::vector<ck::index_t>{12, 12, 12},
params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3}; std::vector<ck::index_t>{3, 3, 3},
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t>{1, 1, 1},
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0}; std::vector<ck::index_t>{0, 0, 0},
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0}; std::vector<ck::index_t>{0, 0, 0});
auto out_tensor = run_reference_convolution_forward<3, auto out_tensor = run_reference_convolution_forward<3,
float, float,
float, float,
float, float,
ck::tensor_layout::convolution::NCDHW, ck::tensor_layout::convolution::GNCDHW,
ck::tensor_layout::convolution::KCZYX, ck::tensor_layout::convolution::GKCZYX,
ck::tensor_layout::convolution::NKDHW>( ck::tensor_layout::convolution::GNKDHW>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4}; std::vector<std::size_t> ref_dims{1, 1, 2, 4, 4, 4};
std::vector<float> ref_data{ std::vector<float> ref_data{
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002, 2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6, 2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,
......
...@@ -3,9 +3,9 @@ add_custom_target(test_softmax) ...@@ -3,9 +3,9 @@ add_custom_target(test_softmax)
add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp) add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp)
add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp) add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp)
add_gtest_executable(test_softmax_int8 test_softmax_int8.cpp) add_gtest_executable(test_softmax_int8 test_softmax_int8.cpp)
target_link_libraries(test_softmax_fp32 PRIVATE host_tensor) target_link_libraries(test_softmax_fp32 PRIVATE utility)
target_link_libraries(test_softmax_fp16 PRIVATE host_tensor) target_link_libraries(test_softmax_fp16 PRIVATE utility)
target_link_libraries(test_softmax_int8 PRIVATE host_tensor) target_link_libraries(test_softmax_int8 PRIVATE utility)
add_dependencies(test_softmax test_softmax_fp32) add_dependencies(test_softmax test_softmax_fp32)
add_dependencies(test_softmax test_softmax_fp16) add_dependencies(test_softmax test_softmax_fp16)
add_dependencies(test_softmax test_softmax_int8) add_dependencies(test_softmax test_softmax_int8)
\ No newline at end of file
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
#include "ck/tensor_operation/gpu/device/device_softmax.hpp" #include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace ck { namespace ck {
...@@ -80,8 +80,8 @@ class TestSoftmax : public ::testing::Test ...@@ -80,8 +80,8 @@ class TestSoftmax : public ::testing::Test
Tensor<OutDataType> out_ref(out); Tensor<OutDataType> out_ref(out);
DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpace()); DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpace()); DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpaceSize());
in_dev.ToDevice(in.mData.data()); in_dev.ToDevice(in.mData.data());
out_dev.ToDevice(out.mData.data()); out_dev.ToDevice(out.mData.data());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment