Commit 7a3b49e5 authored by Chao Liu's avatar Chao Liu
Browse files

Merge remote-tracking branch 'origin/develop' into contraction

parents e07b3d8e d3051d75
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp) add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util) target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector> #include <vector>
#include "conv_util.hpp" #include "test/convnd_fwd/conv_util.hpp"
#include "profile_conv_bwd_weight_impl.hpp" #include "profiler/include/profile_conv_bwd_weight_impl.hpp"
int test_self() int test_self()
{ {
......
#include <iostream> // SPDX-License-Identifier: MIT
#include <string> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include <gtest/gtest.h> #include <iostream>
#include <string>
#include "config.hpp" #include <vector>
#include "conv_util.hpp" #include <gtest/gtest.h>
#include "tensor_layout.hpp"
#include "check_err.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
namespace {
#include "ck/library/utility/check_err.hpp"
class TestConvUtil : public ::testing::Test #include "ck/library/utility/conv_util.hpp"
{
public: namespace {
void SetNDParams(std::size_t ndims)
{ class TestConvUtil : public ::testing::Test
conv_params.num_dim_spatial_ = ndims; {
conv_params.filter_spatial_lengths_ = std::vector<ck::index_t>(ndims, 3); public:
conv_params.input_spatial_lengths_ = std::vector<ck::index_t>(ndims, 71); void SetNDParams(std::size_t ndims)
conv_params.conv_filter_strides_ = std::vector<ck::index_t>(ndims, 2); {
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>(ndims, 1); conv_params.num_dim_spatial_ = ndims;
conv_params.input_left_pads_ = std::vector<ck::index_t>(ndims, 1); conv_params.filter_spatial_lengths_ = std::vector<ck::index_t>(ndims, 3);
conv_params.input_right_pads_ = std::vector<ck::index_t>(ndims, 1); conv_params.input_spatial_lengths_ = std::vector<ck::index_t>(ndims, 71);
} conv_params.conv_filter_strides_ = std::vector<ck::index_t>(ndims, 2);
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>(ndims, 1);
protected: conv_params.input_left_pads_ = std::vector<ck::index_t>(ndims, 1);
// ------- default 2D ------- conv_params.input_right_pads_ = std::vector<ck::index_t>(ndims, 1);
// input NCHW {128,192,71,71}, }
// weights KCYX {256,192,3,3},
// stride {2,2}, protected:
// dilations {1,1}, // ------- default 2D -------
// padding {{1,1}, {1,1}} // input NCHW {128,192,71,71},
ck::utils::conv::ConvParams conv_params; // weights KCYX {256,192,3,3},
}; // stride {2,2},
// dilations {1,1},
} // namespace // padding {{1,1}, {1,1}}
ck::utils::conv::ConvParams conv_params;
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D) };
{
ck::utils::conv::ConvParams conv_params; } // namespace
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
std::vector<ck::index_t>{36, 36}, {
"Error: ConvParams 2D default constructor.")); ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t>{36, 36},
EXPECT_TRUE(ck::utils::check_err( "Error: ConvParams 2D default constructor."));
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2}; out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2}; EXPECT_TRUE(ck::utils::check_err(
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2}; out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
std::vector<ck::index_t>{37, 37}, conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2};
"Error: ConvParams 2D padding left/right {2,2}.")); conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t>{37, 37},
EXPECT_TRUE(ck::utils::check_err( "Error: ConvParams 2D padding left/right {2,2}."));
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3}; out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1}; EXPECT_TRUE(ck::utils::check_err(
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1}; out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3};
EXPECT_TRUE( conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
ck::utils::check_err(out_spatial_len, conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
std::vector<ck::index_t>{23, 23}, conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.")); out_spatial_len = conv_params.GetOutputSpatialLengths();
} EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D) std::vector<ck::index_t>{23, 23},
{ "Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
SetNDParams(1); }
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
EXPECT_TRUE(ck::utils::check_err( {
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.")); SetNDParams(1);
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1}; std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
out_spatial_len = conv_params.GetOutputSpatialLengths(); EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err( out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."));
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1};
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2}; out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.input_left_pads_ = std::vector<ck::index_t>{2}; EXPECT_TRUE(ck::utils::check_err(
conv_params.input_right_pads_ = std::vector<ck::index_t>{2}; out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2};
std::vector<ck::index_t>{37}, conv_params.input_left_pads_ = std::vector<ck::index_t>{2};
"Error: ConvParams 1D padding left/right {2}.")); conv_params.input_right_pads_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t>{37},
EXPECT_TRUE(ck::utils::check_err( "Error: ConvParams 1D padding left/right {2}."));
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3}; out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.input_left_pads_ = std::vector<ck::index_t>{1}; EXPECT_TRUE(ck::utils::check_err(
conv_params.input_right_pads_ = std::vector<ck::index_t>{1}; out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3};
EXPECT_TRUE( conv_params.input_left_pads_ = std::vector<ck::index_t>{1};
ck::utils::check_err(out_spatial_len, conv_params.input_right_pads_ = std::vector<ck::index_t>{1};
std::vector<ck::index_t>{23}, conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.")); out_spatial_len = conv_params.GetOutputSpatialLengths();
} EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D) std::vector<ck::index_t>{23},
{ "Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
SetNDParams(3); }
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
EXPECT_TRUE(ck::utils::check_err( {
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.")); SetNDParams(3);
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
out_spatial_len = conv_params.GetOutputSpatialLengths(); EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."));
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.")); conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2}; std::vector<ck::index_t>{71, 71, 71},
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2}; "Error: ConvParams 3D stride {1, 1, 1}."));
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2};
std::vector<ck::index_t>{37, 37, 37}, conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
"Error: ConvParams 3D padding left/right {2, 2, 2}.")); conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::index_t>{37, 37, 37},
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, "Error: ConvParams 3D padding left/right {2, 2, 2}."));
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.")); conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3}; EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1}; std::vector<ck::index_t>{36, 36, 36},
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1}; "Error: ConvParams 3D dilation {2, 2, 2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths(); conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
EXPECT_TRUE(ck::utils::check_err( conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len, conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
std::vector<ck::index_t>{23, 23, 23}, conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.")); out_spatial_len = conv_params.GetOutputSpatialLengths();
} EXPECT_TRUE(ck::utils::check_err(
out_spatial_len,
TEST(ConvUtil, GetHostTensorDescriptor) std::vector<ck::index_t>{23, 23, 23},
{ "Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
namespace tl = ck::tensor_layout::convolution; }
std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{}); TEST(ConvUtil, GetHostTensorDescriptor)
EXPECT_TRUE(ck::utils::check_err( {
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!")); namespace tl = ck::tensor_layout::convolution;
EXPECT_TRUE(ck::utils::check_err( std::vector<std::size_t> dims{2, 3, 4, 5};
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!")); HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
EXPECT_TRUE(ck::utils::check_err(
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{}); h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!")); h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!")); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
EXPECT_TRUE(ck::utils::check_err(
dims = std::vector<std::size_t>{2, 3, 4}; h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{}); EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE( h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"));
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err( dims = std::vector<std::size_t>{2, 3, 4};
h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!")); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
EXPECT_TRUE(
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{}); ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
EXPECT_TRUE( EXPECT_TRUE(ck::utils::check_err(
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!")); h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!")); h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
EXPECT_TRUE(
dims = std::vector<std::size_t>{2, 3, 4, 5, 6}; ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{}); EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE( h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"));
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(), dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
{3 * 4 * 5 * 6, // N h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
1, // C EXPECT_TRUE(
3 * 5 * 6, // D ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
3 * 6, // H EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
3}, // W {3 * 4 * 5 * 6, // N
"Error: wrong NDHWC dimensions strides!")); 1, // C
3 * 5 * 6, // D
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{}); 3 * 6, // H
EXPECT_TRUE( 3}, // W
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!")); "Error: wrong NDHWC dimensions strides!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
4 * 5 * 6, // C EXPECT_TRUE(
5 * 6, // D ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"));
6, // H EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
1}, // W {3 * 4 * 5 * 6, // N
"Error: wrong NCDHW dimensions strides!")); 4 * 5 * 6, // C
} 5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!"));
}
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp) add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_util) target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_util)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector> #include <vector>
#include "profile_convnd_bwd_data_impl.hpp" #include "profiler/include/profile_convnd_bwd_data_impl.hpp"
int main() int main()
{ {
......
...@@ -5,7 +5,7 @@ target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_inst ...@@ -5,7 +5,7 @@ target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_inst
add_dependencies(test_convnd_fwd test_conv1d_fwd) add_dependencies(test_convnd_fwd test_conv1d_fwd)
add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp) add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_util) target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance device_convnd_2d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv2d_fwd) add_dependencies(test_convnd_fwd test_conv2d_fwd)
add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp) add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp)
......
#include <iostream> // SPDX-License-Identifier: MIT
#include <stdexcept> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include <vector> #include <iostream>
#include "gtest/gtest.h" #include <tuple>
#include <vector>
#include "data_type.hpp" #include <gtest/gtest.h>
#include "element_wise_operation.hpp"
#include "library/include/ck/library/utility/conv_util.hpp" #include "ck/utility/data_type.hpp"
#include "conv_util.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
namespace { #include "test/convnd_fwd/conv_util.hpp"
template <typename T> namespace {
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{ class Conv1dFwdNWCInstances : public ::testing::Test
using namespace std::placeholders; {
using namespace ck::utils; public:
namespace ctl = ck::tensor_layout::convolution; template <typename T>
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
ck::utils::conv::ConvParams params; const ck::utils::conv::ConvParams& params)
params.num_dim_spatial_ = 1; {
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; using namespace std::placeholders;
params.input_spatial_lengths_ = std::vector<ck::index_t>{71}; using namespace ck::utils;
params.conv_filter_strides_ = std::vector<ck::index_t>{2}; namespace ctl = ck::tensor_layout::convolution;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{1}; conv::ConvFwdOpInstance<T,
params.input_right_pads_ = std::vector<ck::index_t>{1}; T,
T,
conv::ConvFwdOpInstance<T, T, T, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(params); ctl::NWC,
ctl::KXC,
auto reference_conv_fwd_fun = ctl::NWK,
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3); ck::tensor_operation::element_wise::PassThrough,
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun); ck::tensor_operation::element_wise::PassThrough,
return run_engine.Test(conv_ptrs); ck::tensor_operation::element_wise::PassThrough,
} FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
} // anonymous namespace conv_instance(params,
true,
TEST(Conv1DFwdNWC, TestConv1D) FillUniformDistributionIntegerValue<T>{},
{ FillUniformDistributionIntegerValue<T>{});
using namespace std::placeholders; auto reference_conv_fwd_fun =
using namespace ck::utils; std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
namespace ctl = ck::tensor_layout::convolution; OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(atol_);
ck::utils::conv::ConvParams params; run_engine.SetRtol(rtol_);
params.num_dim_spatial_ = 1; return run_engine.Test(conv_ptrs);
params.N_ = 2; }
params.K_ = 16;
params.C_ = 4; template <typename T>
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; bool test_default()
params.input_spatial_lengths_ = std::vector<ck::index_t>{16}; {
params.conv_filter_strides_ = std::vector<ck::index_t>{1}; return test_conv1d_nwc_instances<T>(
params.conv_filter_dilations_ = std::vector<ck::index_t>{1}; ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(), params_default_);
params.input_left_pads_ = std::vector<ck::index_t>{1}; }
params.input_right_pads_ = std::vector<ck::index_t>{1};
template <typename T>
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; bool test_filter1x1_stride1_pad0()
test::conv::get_test_convolution_fwd_instance<1>(conv_ptrs); {
conv::ConvFwdOpInstance<float, float, float, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance( return test_conv1d_nwc_instances<T>(
params); ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
params_filter1x1_stride1_pad0_);
auto reference_conv_fwd_fun = std::bind( }
conv::run_reference_convolution_forward<1, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun); template <typename T>
run_engine.SetAtol(1e-5); bool test_filter1x1_pad0()
run_engine.SetRtol(1e-4); {
EXPECT_TRUE(run_engine.Test(conv_ptrs)); return test_conv1d_nwc_instances<T>(
} ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
params_filter1x1_pad0_);
TEST(Conv1DFwdNWC, Bf16Iinstances) }
{
EXPECT_TRUE(test_conv1d_nwc_instances<ck::bhalf_t>( static inline ck::utils::conv::ConvParams params_default_{
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<1>())); 1, 4, 256, 64, {3}, {71}, {2}, {2}, {2}, {2}};
} static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
1, 4, 256, 64, {1}, {28}, {1}, {1}, {0}, {0}};
TEST(Conv1DFwdNWC, F16Instances) static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
{ 1, 4, 256, 64, {1}, {28}, {2}, {1}, {0}, {0}};
EXPECT_TRUE(test_conv1d_nwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<1>())); private:
} double atol_{1e-5};
double rtol_{1e-4};
TEST(Conv1DFwdNWC, F32Instances) };
{
EXPECT_TRUE(test_conv1d_nwc_instances<float>( } // anonymous namespace
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<1>()));
} TEST(Conv1DFwdNWC, IntegerValues)
{
TEST(Conv1DFwdNWC, Int8Instances) using namespace std::placeholders;
{ using namespace ck::utils;
EXPECT_TRUE(test_conv1d_nwc_instances<int8_t>( namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<1>())); using T = float;
}
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1, T, T, T, T>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NWC,
ctl::KXC,
ctl::NWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv1DFwdNWC, FloatingPointValues)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
using T = ck::half_t;
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1, T, T, T, float>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NWC,
ctl::KXC,
ctl::NWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistribution<T>,
FillUniformDistribution<T>>
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(0.1);
run_engine.SetRtol(1e-2);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST_F(Conv1dFwdNWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv1dFwdNWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv1dFwdNWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv1dFwdNWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}
#include <half.hpp> // SPDX-License-Identifier: MIT
#include <iostream> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include <vector> #include <tuple>
#include "gtest/gtest.h" #include <vector>
#include <gtest/gtest.h>
#include "data_type.hpp"
#include "element_wise_operation.hpp" #include "ck/utility/data_type.hpp"
#include "ck/library/utility/conv_util.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp" #include "ck/library/utility/conv_util.hpp"
#include "test/convnd_fwd/conv_util.hpp"
namespace {
namespace {
template <typename T>
bool test_conv2d_nhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs) class Conv2dFwdNHWCInstances : public ::testing::Test
{ {
using namespace std::placeholders; public:
using namespace ck::utils; template <typename T>
bool test_conv2d_nhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
conv::ConvParams params; const ck::utils::conv::ConvParams& params)
params.num_dim_spatial_ = 2; {
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3}; using namespace std::placeholders;
params.input_spatial_lengths_ = std::vector<ck::index_t>{71, 71}; using namespace ck::utils;
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1}; conv::ConvFwdOpInstance<T,
params.input_left_pads_ = std::vector<ck::index_t>{1, 1}; T,
params.input_right_pads_ = std::vector<ck::index_t>{1, 1}; T,
ck::tensor_layout::convolution::NHWC,
conv::ConvFwdOpInstance<T, T, T> conv_instance(params); ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
auto reference_conv_fwd_fun = ck::tensor_operation::element_wise::PassThrough,
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3); ck::tensor_operation::element_wise::PassThrough,
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun); ck::tensor_operation::element_wise::PassThrough,
return run_engine.Test(conv_ptrs); FillUniformDistributionIntegerValue<T>,
} FillUniformDistributionIntegerValue<T>>
conv_instance(params,
} // anonymous namespace true,
FillUniformDistributionIntegerValue<T>{},
TEST(Conv2DFwdNHWC, TestConv2D) FillUniformDistributionIntegerValue<T>{});
{ auto reference_conv_fwd_fun =
using namespace std::placeholders; std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
using namespace ck::utils; OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(atol_);
ck::utils::conv::ConvParams params; run_engine.SetRtol(rtol_);
params.N_ = 2; return run_engine.Test(conv_ptrs);
params.K_ = 16; }
params.C_ = 4;
params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16}; template <typename T>
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1}; bool test_default(bool use_convnd = false)
{
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; if(use_convnd)
test::conv::get_test_convolution_fwd_instance<2>(conv_ptrs); {
conv::ConvFwdOpInstance<float, float, float> conv_instance(params); return test_conv2d_nhwc_instances<T>(
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_default_);
auto reference_conv_fwd_fun = std::bind( }
conv::run_reference_convolution_forward<2, float, float, float>, params, _1, _2, _3); else
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun); {
run_engine.SetAtol(1e-5); return test_conv2d_nhwc_instances<T>(
run_engine.SetRtol(1e-4); ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
EXPECT_TRUE(run_engine.Test(conv_ptrs)); params_default_);
} }
}
TEST(Conv2DFwdNHWC, Bf16Instances)
{ template <typename T>
EXPECT_TRUE(test_conv2d_nhwc_instances<ck::bhalf_t>( bool test_filter1x1_stride1_pad0(bool use_convnd = false)
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<2>())); {
} if(use_convnd)
{
TEST(Conv2DFwdNHWC, F16Instances) return test_conv2d_nhwc_instances<T>(
{ test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2),
EXPECT_TRUE(test_conv2d_nhwc_instances<ck::half_t>( params_filter1x1_stride1_pad0_);
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<2>())); }
} else
{
TEST(Conv2DFwdNHWC, BF32Instances) return test_conv2d_nhwc_instances<T>(
{ ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
EXPECT_TRUE(test_conv2d_nhwc_instances<float>( params_filter1x1_stride1_pad0_);
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<2>())); }
} }
TEST(Conv2DFwdNHWC, F32Instances) template <typename T>
{ bool test_filter1x1_pad0(bool use_convnd = false)
EXPECT_TRUE(test_conv2d_nhwc_instances<float>( {
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<2>())); if(use_convnd)
} {
return test_conv2d_nhwc_instances<T>(
TEST(Conv2DFwdNHWC, Int8Instances) test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_filter1x1_pad0_);
{ }
EXPECT_TRUE(test_conv2d_nhwc_instances<int8_t>( else
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<2>())); {
} return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
params_filter1x1_pad0_);
}
}
template <typename T>
bool test_oddC()
{
return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(), params_oddC_);
}
static inline ck::utils::conv::ConvParams params_default_{
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
2, 4, 256, 64, {1, 1}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
2, 4, 256, 64, {1, 1}, {28, 28}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
static inline ck::utils::conv::ConvParams params_oddC_{
2, 4, 256, 3, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
private:
double atol_{1e-5};
double rtol_{1e-4};
};
} // anonymous namespace
TEST(Conv2DFwdNHWC, IntegerValues)
{
using namespace std::placeholders;
using namespace ck::utils;
using T = float;
ck::utils::conv::ConvParams params{
2, 4, 256, 64, {3, 3}, {36, 36}, {1, 1}, {2, 2}, {2, 2}, {2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<2, T, T, T, T>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv2DFwdNHWC, FloatingPointValues)
{
using namespace std::placeholders;
using namespace ck::utils;
using T = ck::half_t;
ck::utils::conv::ConvParams params{
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<2, T, T, T, float>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistribution<T>,
FillUniformDistribution<T>>
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(2e-4);
run_engine.SetRtol(1e-3);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST_F(Conv2dFwdNHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_oddC) { EXPECT_TRUE(this->test_oddC<ck::half_t>()); }
TEST_F(Conv2dFwdNHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv2dFwdNHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_default)
{
EXPECT_TRUE(this->test_default<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_default)
{
EXPECT_TRUE(this->test_default<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F32_default) { EXPECT_TRUE(this->test_default<float>(true)); }
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_I8_default) { EXPECT_TRUE(this->test_default<int8_t>(true)); }
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>(true));
}
#include <half.hpp> // SPDX-License-Identifier: MIT
#include <iostream> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <stdexcept>
#include <tuple> #include <iostream>
#include <vector> #include <stdexcept>
#include "gtest/gtest.h" #include <tuple>
#include <vector>
#include "data_type.hpp" #include <gtest/gtest.h>
#include "element_wise_operation.hpp"
#include "library/include/ck/library/utility/conv_util.hpp" #include "ck/utility/data_type.hpp"
#include "conv_util.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
namespace { #include "ck/library/utility/conv_util.hpp"
template <typename T> #include "test/convnd_fwd/conv_util.hpp"
bool test_conv3d_ndhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{ namespace {
using namespace std::placeholders;
using namespace ck::utils; class Conv3dFwdNDHWCInstances : public ::testing::Test
namespace ctl = ck::tensor_layout::convolution; {
public:
conv::ConvParams params; template <typename T>
params.N_ = 64; bool test_conv3d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
params.num_dim_spatial_ = 3; const ck::utils::conv::ConvParams& params)
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 2}; {
params.input_spatial_lengths_ = std::vector<ck::index_t>{32, 32, 2}; using namespace std::placeholders;
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2}; using namespace ck::utils;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; namespace ctl = ck::tensor_layout::convolution;
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1}; conv::ConvFwdOpInstance<T,
T,
conv::ConvFwdOpInstance<T, T, T, ctl::NDHWC, ctl::KZYXC, ctl::NDHWK> conv_instance(params); T,
ctl::NDHWC,
auto reference_conv_fwd_fun = ctl::KZYXC,
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3); ctl::NDHWK,
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun); ck::tensor_operation::element_wise::PassThrough,
return run_engine.Test(conv_ptrs); ck::tensor_operation::element_wise::PassThrough,
} ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
} // anonymous namespace FillUniformDistributionIntegerValue<T>>
conv_instance(params,
TEST(Conv3DFwdNDHWC, TestConv3D) true,
{ FillUniformDistributionIntegerValue<T>{},
using namespace std::placeholders; FillUniformDistributionIntegerValue<T>{});
using namespace ck::utils; auto reference_conv_fwd_fun =
namespace ctl = ck::tensor_layout::convolution; std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
conv::ConvParams params; run_engine.SetAtol(atol_);
params.num_dim_spatial_ = 3; run_engine.SetRtol(rtol_);
params.N_ = 2; return run_engine.Test(conv_ptrs);
params.K_ = 16; }
params.C_ = 4;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; template <typename T>
params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16, 16}; bool test_default()
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; {
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; return test_conv3d_nwc_instances<T>(
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1}; ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(), params_default_);
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1}; }
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; template <typename T>
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs); bool test_filter1x1_stride1_pad0()
conv::ConvFwdOpInstance<float, float, float, ctl::NDHWC, ctl::KZYXC, ctl::NDHWK> conv_instance( {
params); return test_conv3d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
auto reference_conv_fwd_fun = std::bind( params_filter1x1_stride1_pad0_);
conv::run_reference_convolution_forward<3, float, float, float>, params, _1, _2, _3); }
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5); template <typename T>
run_engine.SetRtol(1e-4); bool test_filter1x1_pad0()
EXPECT_TRUE(run_engine.Test(conv_ptrs)); {
} return test_conv3d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
TEST(Conv3DFwdNDHWC, InputOver2GB) params_filter1x1_pad0_);
{ }
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils; static inline ck::utils::conv::ConvParams params_default_{
3, 4, 256, 64, {3, 3, 3}, {28, 28, 28}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
// >2GB Input static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
conv::ConvParams params; 3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
params.num_dim_spatial_ = 3; static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
params.N_ = 2; 3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
params.K_ = 16;
params.C_ = 32; private:
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; double atol_{1e-5};
params.input_spatial_lengths_ = std::vector<ck::index_t>{32, 1000, 1000}; double rtol_{1e-4};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; };
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1}; } // anonymous namespace
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
TEST(Conv3DFwdNDHWC, IntegerValues)
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; {
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs); using namespace std::placeholders;
using namespace ck::utils;
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr, namespace ctl = ck::tensor_layout::convolution;
nullptr, using T = float;
nullptr,
params.N_, ck::utils::conv::ConvParams params{
params.K_, 3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
params.C_,
params.input_spatial_lengths_, std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
params.filter_spatial_lengths_, test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
params.GetOutputSpatialLengths(), conv::ConvFwdOpInstance<T,
params.conv_filter_strides_, T,
params.conv_filter_dilations_, T,
params.input_left_pads_, ctl::NDHWC,
params.input_right_pads_, ctl::KZYXC,
PassThrough{}, ctl::NDHWK,
PassThrough{}, ck::tensor_operation::element_wise::PassThrough,
PassThrough{}); ck::tensor_operation::element_wise::PassThrough,
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get())); ck::tensor_operation::element_wise::PassThrough,
} FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
TEST(Conv3DFwdNDHWC, FiltersOver2GB) conv_instance(params,
{ true,
using PassThrough = ck::tensor_operation::element_wise::PassThrough; FillUniformDistributionIntegerValue<T>{},
using namespace ck::utils; FillUniformDistributionIntegerValue<T>{});
// >2GB Filters auto reference_conv_fwd_fun =
conv::ConvParams params; std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
params.num_dim_spatial_ = 3; OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
params.N_ = 2; run_engine.SetAtol(1e-5);
params.K_ = 16; run_engine.SetRtol(1e-3);
params.C_ = 32; EXPECT_TRUE(run_engine.Test(conv_ptrs));
params.filter_spatial_lengths_ = std::vector<ck::index_t>{4, 1000, 1000}; }
params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16, 16};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; TEST(Conv3DFwdNDHWC, FloatingPointValues)
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; {
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1}; using namespace std::placeholders;
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1}; using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; using T = ck::half_t;
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs);
ck::utils::conv::ConvParams params{
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr, 3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
nullptr,
nullptr, std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
params.N_, test::conv::get_test_convolution_fwd_instance<3, T, T, T, float>(conv_ptrs);
params.K_, conv::ConvFwdOpInstance<T,
params.C_, T,
params.input_spatial_lengths_, T,
params.filter_spatial_lengths_, ctl::NDHWC,
params.GetOutputSpatialLengths(), ctl::KZYXC,
params.conv_filter_strides_, ctl::NDHWK,
params.conv_filter_dilations_, ck::tensor_operation::element_wise::PassThrough,
params.input_left_pads_, ck::tensor_operation::element_wise::PassThrough,
params.input_right_pads_, ck::tensor_operation::element_wise::PassThrough,
PassThrough{}, FillUniformDistribution<T>,
PassThrough{}, FillUniformDistribution<T>>
PassThrough{}); conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
} auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
TEST(Conv3DFwdNDHWC, OutputOver2GB) OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
{ run_engine.SetAtol(1e-3);
using PassThrough = ck::tensor_operation::element_wise::PassThrough; run_engine.SetRtol(1e-3);
using namespace ck::utils; EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
// >2GB Output
conv::ConvParams params; TEST(Conv3DFwdNDHWC, InputOver2GB)
params.num_dim_spatial_ = 3; {
params.N_ = 2; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
params.K_ = 16; using namespace ck::utils;
params.C_ = 2; using T = float;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{1, 1, 1};
params.input_spatial_lengths_ = std::vector<ck::index_t>{1000, 1000, 30}; // >2GB Input
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; conv::ConvParams params;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; params.num_dim_spatial_ = 3;
params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2}; params.N_ = 2;
params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2}; params.K_ = 16;
params.C_ = 32;
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
test::conv::get_test_convolution_fwd_instance<3>(conv_ptrs); params.input_spatial_lengths_ = std::vector<ck::index_t>{32, 1000, 1000};
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr, params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
nullptr, params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
nullptr, params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.N_, params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.K_,
params.C_, std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
params.input_spatial_lengths_, test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
params.filter_spatial_lengths_, auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
params.GetOutputSpatialLengths(), nullptr,
params.conv_filter_strides_, nullptr,
params.conv_filter_dilations_, params.N_,
params.input_left_pads_, params.K_,
params.input_right_pads_, params.C_,
PassThrough{}, params.input_spatial_lengths_,
PassThrough{}, params.filter_spatial_lengths_,
PassThrough{}); params.GetOutputSpatialLengths(),
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get())); params.conv_filter_strides_,
} params.conv_filter_dilations_,
params.input_left_pads_,
TEST(Conv3DFwdNDHWC, Bf16Instances) params.input_right_pads_,
{ PassThrough{},
EXPECT_TRUE(test_conv3d_ndhwc_instances<ck::bhalf_t>( PassThrough{},
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<3>())); PassThrough{});
} EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST(Conv3DFwdNDHWC, F16Instances)
{ TEST(Conv3DFwdNDHWC, FiltersOver2GB)
EXPECT_TRUE(test_conv3d_ndhwc_instances<ck::half_t>( {
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<3>())); using PassThrough = ck::tensor_operation::element_wise::PassThrough;
} using namespace ck::utils;
using T = float;
TEST(Conv3DFwdNDHWC, F32Instances)
{ // >2GB Filters
EXPECT_TRUE(test_conv3d_ndhwc_instances<float>( conv::ConvParams params;
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<3>())); params.num_dim_spatial_ = 3;
} params.N_ = 2;
params.K_ = 16;
TEST(Conv3DFwdNDHWC, Int8Instances) params.C_ = 32;
{ params.filter_spatial_lengths_ = std::vector<ck::index_t>{4, 1000, 1000};
EXPECT_TRUE(test_conv3d_ndhwc_instances<int8_t>( params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16, 16};
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<3>())); params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
} params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST(Conv3DFwdNDHWC, OutputOver2GB)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
using T = float;
// >2GB Output
conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 2;
params.K_ = 16;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{1, 1, 1};
params.input_spatial_lengths_ = std::vector<ck::index_t>{1000, 1000, 30};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST_F(Conv3dFwdNDHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv3dFwdNDHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}
#ifndef TEST_CONV_UTIL_HPP // SPDX-License-Identifier: MIT
#define TEST_CONV_UTIL_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <tuple> #include <tuple>
#include "config.hpp" #include "ck/ck.hpp"
#include "device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp" #include "ck/utility/sequence.hpp"
#include "element_wise_operation.hpp" #include "ck/utility/data_type.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "sequence.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<element_wise::PassThrough,
element_wise::PassThrough,
element_wise::PassThrough>;
namespace device_conv2d_fwd_instance {
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv2d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace test { namespace test {
namespace conv { namespace conv {
...@@ -25,57 +47,128 @@ using DeviceConvFwdNoOpPtr = ...@@ -25,57 +47,128 @@ using DeviceConvFwdNoOpPtr =
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default; ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType> template <ck::index_t SpatialDims,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename AccDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device:: using DeviceConvNDFwdInstance = ck::tensor_operation::device::
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K< DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
// clang-format off // clang-format off
InDataType, // InDataType, //
WeiDataType, // WeiDataType, //
OutDataType, // OutDataType, //
InDataType, // AccDataType, // Accumulator data type.
InElementOp, // Input Elementwise Operation InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation OutElementOp, // Output Elementwise Operation
ConvFwdDefault, // ConvForwardSpecialization ConvFwdDefault, // ConvForwardSpecialization
SpatialDims, // SptialDims SpatialDims, // SptialDims
64, // BlockSize 256, // BlockSize
16, // MPerBlock 128, // MPerBlock
16, // NPerBlock 256, // NPerBlock
4, // K0PerBlock 4, // K0PerBlock
1, // K1 8, // K1
16, // MPerXDL 32, // MPerXdl
16, // NPerXDL 32, // NPerXdl
1, // MXdlPerWave 2, // MXdlPerWave
1, // NXdlPerWave 4, // NXdlPerWave
S<1, 16, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1 S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim 2, // ABlockTransferSrcVectorDim
1, // ABlockTransferSrcScalarPerVector 8, // ABlockTransferSrcScalarPerVector
1, // ABlockTransferDstScalarPerVector_K1 8, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM true, // ABlockLdsAddExtraM
S<1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1 S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim 2, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcScalarPerVector 8, // BBlockTransferSrcScalarPerVector
1, // BBlockTransferDstScalarPerVector_K1 8, // BBlockTransferDstScalarPerVector_K1
true, // BBlockTransferAddExtraN true, // BBlockLdsAddExtraN
7, // CThreadTransferSrcDstVectorDim 7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector 1>; // CThreadTransferDstScalarPerVector
// clang-format on // clang-format on
template <ck::index_t NDim, template <ck::index_t NDim,
typename InDataType = float, typename InDataType,
typename WeiDataType = float, typename WeiDataType,
typename OutDataType = float> typename OutDataType,
typename AccDataType>
void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances) void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
{ {
using ConvInstanceT = DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType>; using ConvInstanceT =
DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType, AccDataType>;
instances.emplace_back(std::make_unique<ConvInstanceT>()); instances.emplace_back(std::make_unique<ConvInstanceT>());
} }
// TODO (aosewski)
// Temporary solution to get all DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
// instances. When switched over to DeviceConvNDFwdXdl for 2D remove ConvolutionNDFwdInstances
// structures.
template <typename InDataType, typename WeiDataType, typename OutDataType>
struct ConvolutionNDFwdInstances;
template <>
struct ConvolutionNDFwdInstances<float, float, float>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::half_t, ck::half_t, ck::half_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<int8_t, int8_t, int8_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
}
return conv_ptrs;
}
};
} // namespace conv } // namespace conv
} // namespace test } // namespace test
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "../gemm/gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "../gemm/gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
namespace device {
void add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = float; int main()
using BDataType = float; {
using CDataType = float; using ADataType = float;
using AccDataType = float; using BDataType = float;
using CDataType = float;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = float;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
bool res = true; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(gemmPtrs); std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "../gemm/gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#ifndef GEMM_UTILS_HPP // SPDX-License-Identifier: MIT
#define GEMM_UTILS_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "check_err.hpp" #pragma once
#include "config.hpp"
#include "device.hpp" #include "ck/ck.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "tensor_layout.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace ck { namespace ck {
namespace gemm_util { namespace gemm_util {
...@@ -350,4 +352,3 @@ struct TestGemmBF16 ...@@ -350,4 +352,3 @@ struct TestGemmBF16
} // namespace gemm_util } // namespace gemm_util
} // namespace ck } // namespace ck
#endif
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "reference_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough, ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation { namespace ck {
namespace device { namespace tensor_operation {
namespace device_gemm_instance { namespace device {
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances( namespace device_gemm_instance {
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(
} // namespace device_gemm_instance std::vector<DeviceGemmNoOpPtr>&);
} // namespace device } // namespace device_gemm_instance
} // namespace tensor_operation } // namespace device
} // namespace ck } // namespace tensor_operation
} // namespace ck
int main()
{ int main()
using RowMajor = ck::tensor_layout::gemm::RowMajor; {
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using RowMajor = ck::tensor_layout::gemm::RowMajor;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
ColumnMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
RowMajor, ColumnMajor,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
ColumnMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
ColumnMajor, ColumnMajor,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
RowMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
RowMajor, RowMajor,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
RowMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
ColumnMajor, RowMajor,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1; std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
} return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "device_gemm_xdl.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp" #include "ck/library/utility/check_err.hpp"
#include "gemm_specialization.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, #include "test/gemm/gemm_util.hpp"
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
namespace ck { using DeviceGemmNoOpPtr =
namespace tensor_operation { ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
namespace device { ck::tensor_operation::element_wise::PassThrough,
namespace device_gemm_instance { ck::tensor_operation::element_wise::PassThrough>;
void add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace ck {
void add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace tensor_operation {
void add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
namespace device_gemm_instance {
void add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances( void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation
} // namespace ck void add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(
std::vector<DeviceGemmNoOpPtr>&);
int main() } // namespace device_gemm_instance
{ } // namespace device
using ADataType = ck::half_t; } // namespace tensor_operation
using BDataType = ck::half_t; } // namespace ck
using CDataType = ck::half_t;
using AccDataType = float; int main()
{
using RowMajor = ck::tensor_layout::gemm::RowMajor; using ADataType = ck::half_t;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using BDataType = ck::half_t;
using CDataType = ck::half_t;
bool res = true; using AccDataType = float;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: using RowMajor = ck::tensor_layout::gemm::RowMajor;
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemmPtrs); using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(gemmPtrs); bool res = true;
ck::tensor_operation::device::device_gemm_instance:: std::vector<DeviceGemmNoOpPtr> gemmPtrs;
add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
ColumnMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
RowMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); ColumnMajor,
} RowMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
ColumnMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ColumnMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); ColumnMajor,
} ColumnMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
RowMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
RowMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); RowMajor,
} RowMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
ck::tensor_operation::device::device_gemm_instance:: add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
RowMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ColumnMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); RowMajor,
} ColumnMajor,
RowMajor,
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; PassThrough,
return res ? 0 : 1; PassThrough,
} PassThrough>{}(gemmPtr);
}
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
void add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = float; int main()
using BDataType = float; {
using CDataType = float; using ADataType = float;
using AccDataType = float; using BDataType = float;
using CDataType = float;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = float;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
bool res = true; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemmPtrs); std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "gemm_specialization.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
#include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough,
namespace tensor_operation { ck::tensor_operation::element_wise::PassThrough>;
namespace device {
namespace device_gemm_instance { namespace ck {
void add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace tensor_operation {
void add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation
} // namespace ck } // namespace device_gemm_instance
} // namespace device
inline std::string get_device_name() } // namespace tensor_operation
{ } // namespace ck
hipDeviceProp_t props{};
int device; inline std::string get_device_name()
auto status = hipGetDevice(&device); {
if(status != hipSuccess) hipDeviceProp_t props{};
{ int device;
return std::string(); auto status = hipGetDevice(&device);
} if(status != hipSuccess)
{
status = hipGetDeviceProperties(&props, device); return std::string();
if(status != hipSuccess) }
{
return std::string(); status = hipGetDeviceProperties(&props, device);
} if(status != hipSuccess)
const std::string name(props.gcnArchName); {
return std::string();
return name; }
} const std::string name(props.gcnArchName);
int main() return name;
{ }
if(get_device_name().find("gfx90a") == std::string::npos)
{ int main()
std::cout << "TestGemm ..... SUCCESS" << std::endl; {
return 0; if(get_device_name().find("gfx90a") == std::string::npos)
} {
using ADataType = double; std::cout << "TestGemm ..... SUCCESS" << std::endl;
using BDataType = double; return 0;
using CDataType = double; }
using AccDataType = double; using ADataType = double;
using BDataType = double;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using CDataType = double;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using AccDataType = double;
bool res = true; using RowMajor = ck::tensor_layout::gemm::RowMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(gemmPtrs); bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
ColumnMajor, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
ColumnMajor, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
PassThrough, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
RowMajor, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
RowMajor, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
PassThrough, RowMajor,
PassThrough, ColumnMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; PassThrough,
return res ? 0 : 1; PassThrough>{}(gemmPtr);
} }
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = int8_t; int main()
using BDataType = int8_t; {
using CDataType = int8_t; using ADataType = int8_t;
using AccDataType = int32_t; using BDataType = int8_t;
using CDataType = int8_t;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = int32_t;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp) add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor) target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance) target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment