Commit cba8f7f2 authored by Anthony Chang's avatar Anthony Chang
Browse files

Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4

parents cc50b687 b653c5eb
#ifndef TEST_CONV_UTIL_HPP // SPDX-License-Identifier: MIT
#define TEST_CONV_UTIL_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <tuple> #include <tuple>
#include "config.hpp" #include "ck/ck.hpp"
#include "device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp" #include "ck/utility/sequence.hpp"
#include "element_wise_operation.hpp" #include "ck/utility/data_type.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "sequence.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<element_wise::PassThrough,
element_wise::PassThrough,
element_wise::PassThrough>;
namespace device_conv2d_fwd_instance {
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace device_conv2d_fwd_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace test { namespace test {
namespace conv { namespace conv {
...@@ -25,57 +47,128 @@ using DeviceConvFwdNoOpPtr = ...@@ -25,57 +47,128 @@ using DeviceConvFwdNoOpPtr =
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default; ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType> template <ck::index_t SpatialDims,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename AccDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device:: using DeviceConvNDFwdInstance = ck::tensor_operation::device::
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K< DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
// clang-format off // clang-format off
InDataType, // InDataType, //
WeiDataType, // WeiDataType, //
OutDataType, // OutDataType, //
InDataType, // AccDataType, // Accumulator data type.
InElementOp, // Input Elementwise Operation InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation OutElementOp, // Output Elementwise Operation
ConvFwdDefault, // ConvForwardSpecialization ConvFwdDefault, // ConvForwardSpecialization
SpatialDims, // SptialDims SpatialDims, // SptialDims
64, // BlockSize 256, // BlockSize
16, // MPerBlock 128, // MPerBlock
16, // NPerBlock 256, // NPerBlock
4, // K0PerBlock 4, // K0PerBlock
1, // K1 8, // K1
16, // MPerXDL 32, // MPerXdl
16, // NPerXDL 32, // NPerXdl
1, // MXdlPerWave 2, // MXdlPerWave
1, // NXdlPerWave 4, // NXdlPerWave
S<1, 16, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1 S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim 2, // ABlockTransferSrcVectorDim
1, // ABlockTransferSrcScalarPerVector 8, // ABlockTransferSrcScalarPerVector
1, // ABlockTransferDstScalarPerVector_K1 8, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM true, // ABlockLdsAddExtraM
S<1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1 S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim 2, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcScalarPerVector 8, // BBlockTransferSrcScalarPerVector
1, // BBlockTransferDstScalarPerVector_K1 8, // BBlockTransferDstScalarPerVector_K1
true, // BBlockTransferAddExtraN true, // BBlockLdsAddExtraN
7, // CThreadTransferSrcDstVectorDim 7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector 1>; // CThreadTransferDstScalarPerVector
// clang-format on // clang-format on
template <ck::index_t NDim, template <ck::index_t NDim,
typename InDataType = float, typename InDataType,
typename WeiDataType = float, typename WeiDataType,
typename OutDataType = float> typename OutDataType,
typename AccDataType>
void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances) void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
{ {
using ConvInstanceT = DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType>; using ConvInstanceT =
DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType, AccDataType>;
instances.emplace_back(std::make_unique<ConvInstanceT>()); instances.emplace_back(std::make_unique<ConvInstanceT>());
} }
// TODO (aosewski)
// Temporary solution to get all DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
// instances. When switched over to DeviceConvNDFwdXdl for 2D remove ConvolutionNDFwdInstances
// structures.
template <typename InDataType, typename WeiDataType, typename OutDataType>
struct ConvolutionNDFwdInstances;
template <>
struct ConvolutionNDFwdInstances<float, float, float>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::half_t, ck::half_t, ck::half_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<int8_t, int8_t, int8_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::device_conv2d_fwd_instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
}
return conv_ptrs;
}
};
} // namespace conv } // namespace conv
} // namespace test } // namespace test
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "../gemm/gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "../gemm/gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
namespace device {
void add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = float; int main()
using BDataType = float; {
using CDataType = float; using ADataType = float;
using AccDataType = float; using BDataType = float;
using CDataType = float;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = float;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
bool res = true; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(gemmPtrs); std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_dl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "../gemm/gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#ifndef GEMM_UTILS_HPP // SPDX-License-Identifier: MIT
#define GEMM_UTILS_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "check_err.hpp" #pragma once
#include "config.hpp"
#include "device.hpp" #include "ck/ck.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "tensor_layout.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace ck { namespace ck {
namespace gemm_util { namespace gemm_util {
...@@ -350,4 +352,3 @@ struct TestGemmBF16 ...@@ -350,4 +352,3 @@ struct TestGemmBF16
} // namespace gemm_util } // namespace gemm_util
} // namespace ck } // namespace ck
#endif
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "reference_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough, ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
namespace ck {
namespace tensor_operation { namespace ck {
namespace device { namespace tensor_operation {
namespace device_gemm_instance { namespace device {
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances( namespace device_gemm_instance {
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances( std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(
} // namespace device_gemm_instance std::vector<DeviceGemmNoOpPtr>&);
} // namespace device } // namespace device_gemm_instance
} // namespace tensor_operation } // namespace device
} // namespace ck } // namespace tensor_operation
} // namespace ck
int main()
{ int main()
using RowMajor = ck::tensor_layout::gemm::RowMajor; {
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using RowMajor = ck::tensor_layout::gemm::RowMajor;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
ColumnMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
RowMajor, ColumnMajor,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
ColumnMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
ColumnMajor, ColumnMajor,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
RowMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
RowMajor, RowMajor,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs)
{ for(auto& gemmPtr : gemmPtrs)
res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr, {
RowMajor, res &= ck::gemm_util::TestGemmBF16<DeviceGemmNoOpPtr,
ColumnMajor, RowMajor,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, PassThrough,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough>{}(gemmPtr);
}
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1; std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
} return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "device_gemm_xdl.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp" #include "ck/library/utility/check_err.hpp"
#include "gemm_specialization.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, #include "test/gemm/gemm_util.hpp"
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
namespace ck { using DeviceGemmNoOpPtr =
namespace tensor_operation { ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
namespace device { ck::tensor_operation::element_wise::PassThrough,
namespace device_gemm_instance { ck::tensor_operation::element_wise::PassThrough>;
void add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace ck {
void add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace tensor_operation {
void add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
namespace device_gemm_instance {
void add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances( void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation
} // namespace ck void add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(
std::vector<DeviceGemmNoOpPtr>&);
int main() } // namespace device_gemm_instance
{ } // namespace device
using ADataType = ck::half_t; } // namespace tensor_operation
using BDataType = ck::half_t; } // namespace ck
using CDataType = ck::half_t;
using AccDataType = float; int main()
{
using RowMajor = ck::tensor_layout::gemm::RowMajor; using ADataType = ck::half_t;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using BDataType = ck::half_t;
using CDataType = ck::half_t;
bool res = true; using AccDataType = float;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: using RowMajor = ck::tensor_layout::gemm::RowMajor;
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemmPtrs); using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(gemmPtrs); bool res = true;
ck::tensor_operation::device::device_gemm_instance:: std::vector<DeviceGemmNoOpPtr> gemmPtrs;
add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
ColumnMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
RowMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); ColumnMajor,
} RowMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
ColumnMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ColumnMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); ColumnMajor,
} ColumnMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
RowMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
RowMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); RowMajor,
} RowMajor,
RowMajor,
gemmPtrs.clear(); PassThrough,
ck::tensor_operation::device::device_gemm_instance:: PassThrough,
add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: gemmPtrs.clear();
add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
ck::tensor_operation::device::device_gemm_instance:: add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(gemmPtrs); ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, ck::tensor_operation::device::device_gemm_instance::
ADataType, add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(gemmPtrs);
BDataType,
CDataType, for(auto& gemmPtr : gemmPtrs)
AccDataType, {
RowMajor, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ColumnMajor, ADataType,
RowMajor, BDataType,
PassThrough, CDataType,
PassThrough, AccDataType,
PassThrough>{}(gemmPtr); RowMajor,
} ColumnMajor,
RowMajor,
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; PassThrough,
return res ? 0 : 1; PassThrough,
} PassThrough>{}(gemmPtr);
}
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
void add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = float; int main()
using BDataType = float; {
using CDataType = float; using ADataType = float;
using AccDataType = float; using BDataType = float;
using CDataType = float;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = float;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
bool res = true; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemmPtrs); std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance:: ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(gemmPtrs); add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "gemm_specialization.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
#include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough, using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>; using DeviceGemmNoOpPtr =
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough,
namespace tensor_operation { ck::tensor_operation::element_wise::PassThrough>;
namespace device {
namespace device_gemm_instance { namespace ck {
void add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace tensor_operation {
void add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation
} // namespace ck } // namespace device_gemm_instance
} // namespace device
inline std::string get_device_name() } // namespace tensor_operation
{ } // namespace ck
hipDeviceProp_t props{};
int device; inline std::string get_device_name()
auto status = hipGetDevice(&device); {
if(status != hipSuccess) hipDeviceProp_t props{};
{ int device;
return std::string(); auto status = hipGetDevice(&device);
} if(status != hipSuccess)
{
status = hipGetDeviceProperties(&props, device); return std::string();
if(status != hipSuccess) }
{
return std::string(); status = hipGetDeviceProperties(&props, device);
} if(status != hipSuccess)
const std::string name(props.gcnArchName); {
return std::string();
return name; }
} const std::string name(props.gcnArchName);
int main() return name;
{ }
if(get_device_name().find("gfx90a") == std::string::npos)
{ int main()
std::cout << "TestGemm ..... SUCCESS" << std::endl; {
return 0; if(get_device_name().find("gfx90a") == std::string::npos)
} {
using ADataType = double; std::cout << "TestGemm ..... SUCCESS" << std::endl;
using BDataType = double; return 0;
using CDataType = double; }
using AccDataType = double; using ADataType = double;
using BDataType = double;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using CDataType = double;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor; using AccDataType = double;
bool res = true; using RowMajor = ck::tensor_layout::gemm::RowMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(gemmPtrs); bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_km_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
ColumnMajor, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_km_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
ColumnMajor, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
PassThrough, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_mk_kn_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
RowMajor, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
PassThrough,
gemmPtrs.clear(); PassThrough>{}(gemmPtr);
ck::tensor_operation::device::device_gemm_instance:: }
add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(gemmPtrs);
gemmPtrs.clear();
for(auto& gemmPtr : gemmPtrs) ck::tensor_operation::device::device_gemm_instance::
{ add_device_gemm_xdl_f64_f64_f64_mk_nk_mn_instances(gemmPtrs);
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
ADataType, for(auto& gemmPtr : gemmPtrs)
BDataType, {
CDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
AccDataType, ADataType,
RowMajor, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
PassThrough, RowMajor,
PassThrough, ColumnMajor,
PassThrough>{}(gemmPtr); RowMajor,
} PassThrough,
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; PassThrough,
return res ? 0 : 1; PassThrough>{}(gemmPtr);
} }
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
#include <algorithm> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <iostream> #include <algorithm>
#include <numeric> #include <cstdlib>
#include <tuple> #include <iostream>
#include <vector> #include <numeric>
#include <tuple>
#include "gemm_util.hpp" #include <vector>
#include "config.hpp"
#include "print.hpp" #include "ck/ck.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; #include "test/gemm/gemm_util.hpp"
using DeviceGemmNoOpPtr = using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough, using DeviceGemmNoOpPtr =
ck::tensor_operation::element_wise::PassThrough>; ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
namespace ck { ck::tensor_operation::element_wise::PassThrough>;
namespace tensor_operation {
namespace device { namespace ck {
namespace device_gemm_instance { namespace tensor_operation {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); namespace device_gemm_instance {
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&); void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device_gemm_instance void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace device void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
} // namespace tensor_operation } // namespace device_gemm_instance
} // namespace ck } // namespace device
} // namespace tensor_operation
int main() } // namespace ck
{
using ADataType = int8_t; int main()
using BDataType = int8_t; {
using CDataType = int8_t; using ADataType = int8_t;
using AccDataType = int32_t; using BDataType = int8_t;
using CDataType = int8_t;
using RowMajor = ck::tensor_layout::gemm::RowMajor; using AccDataType = int32_t;
using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
using RowMajor = ck::tensor_layout::gemm::RowMajor;
std::vector<DeviceGemmNoOpPtr> gemmPtrs; using ColumnMajor = ck::tensor_layout::gemm::ColumnMajor;
bool res = true;
std::vector<DeviceGemmNoOpPtr> gemmPtrs;
ck::tensor_operation::device::device_gemm_instance:: bool res = true;
add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(gemmPtrs);
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
RowMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
ColumnMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, ColumnMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
RowMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, RowMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
gemmPtrs.clear(); }
ck::tensor_operation::device::device_gemm_instance::
add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(gemmPtrs); gemmPtrs.clear();
ck::tensor_operation::device::device_gemm_instance::
for(auto& gemmPtr : gemmPtrs) add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(gemmPtrs);
{
res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr, for(auto& gemmPtr : gemmPtrs)
ADataType, {
BDataType, res &= ck::gemm_util::TestGemm<DeviceGemmNoOpPtr,
CDataType, ADataType,
AccDataType, BDataType,
RowMajor, CDataType,
ColumnMajor, AccDataType,
RowMajor, RowMajor,
PassThrough, ColumnMajor,
PassThrough, RowMajor,
PassThrough>{}(gemmPtr); PassThrough,
} PassThrough,
PassThrough>{}(gemmPtr);
std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl; }
return res ? 0 : 1;
} std::cout << "TestGemm ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
return res ? 0 : 1;
}
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp) add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor) target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance) target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include "profile_gemm_reduce_impl.hpp" #include "profiler/include/profile_gemm_reduce_impl.hpp"
int main() int main()
{ {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include "config.hpp" #include "ck/ck.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "host_gemm.hpp" #include "ck/library/utility/check_err.hpp"
#include "tensor_layout.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_splitk.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/host_tensor/host_gemm.hpp"
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
{ {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_grouped_gemm_xdl.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/utility/magic_division.hpp"
#include "check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "config.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "magic_division.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
__global__ void gpu_magic_number_division(uint32_t magic_multiplier, __global__ void gpu_magic_number_division(uint32_t magic_multiplier,
uint32_t magic_shift, uint32_t magic_shift,
......
#include "getopt.h" // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "host_common_util.hpp" #include <getopt.h>
#include "profile_reduce_impl.hpp"
#include "ck/library/host_tensor/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
#include "getopt.h" // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "host_common_util.hpp" #include <getopt.h>
#include "profile_reduce_impl.hpp"
#include "ck/library/host_tensor/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
#include <cmath> // SPDX-License-Identifier: MIT
#include <cstdlib> // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <half.hpp>
#include <numeric> #include <cmath>
#include <type_traits> #include <cstdlib>
#include <vector> #include <numeric>
#include "gtest/gtest.h" #include <type_traits>
#include <vector>
#include "check_err.hpp" #include <gtest/gtest.h>
#include "config.hpp"
#include "conv_util.hpp" #include "ck/ck.hpp"
#include "element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "fill.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "host_tensor.hpp"
#include "reference_conv_fwd.hpp" #include "ck/library/utility/check_err.hpp"
#include "tensor_layout.hpp" #include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp"
namespace { #include "ck/library/host_tensor/host_tensor.hpp"
using InElementOp = ck::tensor_operation::element_wise::PassThrough; #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough; namespace {
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
template <ck::index_t NDim, using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
typename InDataType = float, using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
typename WeiDataType = float,
typename OutDataType = float, template <ck::index_t NDim,
typename InLayout = ck::tensor_layout::convolution::NHWC, typename InDataType = float,
typename WeiLayout = ck::tensor_layout::convolution::KYXC, typename WeiDataType = float,
typename OutLayout = ck::tensor_layout::convolution::NHWK, typename OutDataType = float,
typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>, typename InLayout = ck::tensor_layout::convolution::NHWC,
typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>> typename WeiLayout = ck::tensor_layout::convolution::KYXC,
Tensor<OutDataType> typename OutLayout = ck::tensor_layout::convolution::NHWK,
run_reference_convolution_forward(const ck::utils::conv::ConvParams& params, typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>,
const FillInputOp& fill_input_op = FillInputOp{}, typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>>
const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f}) Tensor<OutDataType>
{ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_), const FillInputOp& fill_input_op = FillInputOp{},
static_cast<std::size_t>(params.C_)}; const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f})
input_dims.insert(std::end(input_dims), {
std::begin(params.input_spatial_lengths_), std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_),
std::end(params.input_spatial_lengths_)); static_cast<std::size_t>(params.C_)};
input_dims.insert(std::end(input_dims),
std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_), std::begin(params.input_spatial_lengths_),
static_cast<std::size_t>(params.C_)}; std::end(params.input_spatial_lengths_));
filter_dims.insert(std::end(filter_dims),
std::begin(params.filter_spatial_lengths_), std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_),
std::end(params.filter_spatial_lengths_)); static_cast<std::size_t>(params.C_)};
filter_dims.insert(std::end(filter_dims),
const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths(); std::begin(params.filter_spatial_lengths_),
std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_), std::end(params.filter_spatial_lengths_));
static_cast<std::size_t>(params.K_)};
output_dims.insert(std::end(output_dims), const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths();
std::begin(output_spatial_lengths), std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_),
std::end(output_spatial_lengths)); static_cast<std::size_t>(params.K_)};
output_dims.insert(std::end(output_dims),
Tensor<InDataType> input(ck::utils::conv::get_host_tensor_descriptor(input_dims, InLayout{})); std::begin(output_spatial_lengths),
Tensor<WeiDataType> weights( std::end(output_spatial_lengths));
ck::utils::conv::get_host_tensor_descriptor(filter_dims, WeiLayout{}));
Tensor<OutDataType> host_output( Tensor<InDataType> input(ck::utils::conv::get_host_tensor_descriptor(input_dims, InLayout{}));
ck::utils::conv::get_host_tensor_descriptor(output_dims, OutLayout{})); Tensor<WeiDataType> weights(
ck::utils::conv::get_host_tensor_descriptor(filter_dims, WeiLayout{}));
fill_input_op(input.begin(), input.end()); Tensor<OutDataType> host_output(
fill_weights_op(weights.begin(), weights.end()); ck::utils::conv::get_host_tensor_descriptor(output_dims, OutLayout{}));
std::fill(host_output.begin(), host_output.end(), OutDataType(0.f));
fill_input_op(input.begin(), input.end());
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<InDataType, fill_weights_op(weights.begin(), weights.end());
WeiDataType, std::fill(host_output.begin(), host_output.end(), OutDataType(0.f));
OutDataType,
InElementOp, auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<InDataType,
WeiElementOp, WeiDataType,
OutElementOp, OutDataType,
NDim>(); InElementOp,
auto ref_invoker = ref_conv.MakeInvoker(); WeiElementOp,
auto ref_argument = ref_conv.MakeArgument(input, OutElementOp,
weights, NDim>();
host_output, auto ref_invoker = ref_conv.MakeInvoker();
params.conv_filter_strides_, auto ref_argument = ref_conv.MakeArgument(input,
params.conv_filter_dilations_, weights,
params.input_left_pads_, host_output,
params.input_right_pads_, params.conv_filter_strides_,
InElementOp{}, params.conv_filter_dilations_,
WeiElementOp{}, params.input_left_pads_,
OutElementOp{}); params.input_right_pads_,
InElementOp{},
ref_invoker.Run(ref_argument); WeiElementOp{},
return host_output; OutElementOp{});
}
ref_invoker.Run(ref_argument);
} // anonymous namespace return host_output;
}
TEST(ReferenceConvolutionFWD, Conv2DNHWC)
{ } // anonymous namespace
ck::utils::conv::ConvParams params;
params.N_ = 1; TEST(ReferenceConvolutionFWD, Conv2DNHWC)
params.K_ = 1; {
params.C_ = 2; ck::utils::conv::ConvParams params;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6}; params.K_ = 1;
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0}; params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1};
auto out_tensor = run_reference_convolution_forward<2>(params); params.input_left_pads_ = std::vector<ck::index_t>{0, 0};
std::vector<std::size_t> ref_dims{1, 1, 4, 4}; params.input_right_pads_ = std::vector<ck::index_t>{0, 0};
std::vector<float> ref_data{130.5,
148.5, auto out_tensor = run_reference_convolution_forward<2>(params);
166.5, std::vector<std::size_t> ref_dims{1, 1, 4, 4};
184.5, std::vector<float> ref_data{130.5,
238.5, 148.5,
256.5, 166.5,
274.5, 184.5,
292.5, 238.5,
346.5, 256.5,
364.5, 274.5,
382.5, 292.5,
400.5, 346.5,
454.5, 364.5,
472.5, 382.5,
490.5, 400.5,
508.5}; 454.5,
EXPECT_TRUE(ck::utils::check_err( 472.5,
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); 490.5,
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); 508.5};
} EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding) EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.N_ = 1; TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
params.K_ = 2; {
params.C_ = 2; ck::utils::conv::ConvParams params;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12}; params.K_ = 2;
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1}; params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
auto out_tensor = run_reference_convolution_forward<2>(params); params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5}; params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
std::vector<float> ref_data{
210., 210., 327., 327., 351., 351., 375., 375., 399., 399., auto out_tensor = run_reference_convolution_forward<2>(params);
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5, std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5};
747., 747., 1138.5, 1138.5, 1174.5, 1174.5, 1210.5, 1210.5, 1246.5, 1246.5, std::vector<float> ref_data{
1035., 1035., 1570.5, 1570.5, 1606.5, 1606.5, 1642.5, 1642.5, 1678.5, 1678.5, 210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5}; 459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
EXPECT_TRUE(ck::utils::check_err( 747., 747., 1138.5, 1138.5, 1174.5, 1174.5, 1210.5, 1210.5, 1246.5, 1246.5,
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); 1035., 1035., 1570.5, 1570.5, 1606.5, 1606.5, 1642.5, 1642.5, 1678.5, 1678.5,
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); 1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5};
} EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
TEST(ReferenceConvolutionFWD, Conv1DNWC) EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1; TEST(ReferenceConvolutionFWD, Conv1DNWC)
params.N_ = 1; {
params.K_ = 1; ck::utils::conv::ConvParams params;
params.C_ = 2; params.num_dim_spatial_ = 1;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{6}; params.K_ = 1;
params.conv_filter_strides_ = std::vector<ck::index_t>{1}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_left_pads_ = std::vector<ck::index_t>{0}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6};
params.input_right_pads_ = std::vector<ck::index_t>{0}; params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
auto out_tensor = params.input_left_pads_ = std::vector<ck::index_t>{0};
run_reference_convolution_forward<1, params.input_right_pads_ = std::vector<ck::index_t>{0};
float,
float, auto out_tensor =
float, run_reference_convolution_forward<1,
ck::tensor_layout::convolution::NWC, float,
ck::tensor_layout::convolution::KXC, float,
ck::tensor_layout::convolution::NWK>(params); float,
std::vector<std::size_t> ref_dims{1, 1, 4}; ck::tensor_layout::convolution::NWC,
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5}; ck::tensor_layout::convolution::KXC,
EXPECT_TRUE(ck::utils::check_err( ck::tensor_layout::convolution::NWK>(params);
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); std::vector<std::size_t> ref_dims{1, 1, 4};
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
} EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding) EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1; TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding)
params.N_ = 1; {
params.K_ = 2; ck::utils::conv::ConvParams params;
params.C_ = 2; params.num_dim_spatial_ = 1;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{12}; params.K_ = 2;
params.conv_filter_strides_ = std::vector<ck::index_t>{2}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{2}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_left_pads_ = std::vector<ck::index_t>{1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12};
params.input_right_pads_ = std::vector<ck::index_t>{1}; params.conv_filter_strides_ = std::vector<ck::index_t>{2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
auto out_tensor = params.input_left_pads_ = std::vector<ck::index_t>{1};
run_reference_convolution_forward<1, params.input_right_pads_ = std::vector<ck::index_t>{1};
float,
float, auto out_tensor =
float, run_reference_convolution_forward<1,
ck::tensor_layout::convolution::NWC, float,
ck::tensor_layout::convolution::KXC, float,
ck::tensor_layout::convolution::NWK>(params); float,
std::vector<std::size_t> ref_dims{1, 2, 5}; ck::tensor_layout::convolution::NWC,
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5}; ck::tensor_layout::convolution::KXC,
EXPECT_TRUE(ck::utils::check_err( ck::tensor_layout::convolution::NWK>(params);
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); std::vector<std::size_t> ref_dims{1, 2, 5};
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!")); std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
} EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize) EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1; TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
params.N_ = 2; {
params.K_ = 16; ck::utils::conv::ConvParams params;
params.C_ = 4; params.num_dim_spatial_ = 1;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3}; params.N_ = 2;
params.input_spatial_lengths_ = std::vector<ck::index_t>{16}; params.K_ = 16;
params.conv_filter_strides_ = std::vector<ck::index_t>{1}; params.C_ = 4;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_left_pads_ = std::vector<ck::index_t>{1}; params.input_spatial_lengths_ = std::vector<ck::index_t>{16};
params.input_right_pads_ = std::vector<ck::index_t>{1}; params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
auto out_tensor2 = run_reference_convolution_forward<1, params.input_left_pads_ = std::vector<ck::index_t>{1};
float, params.input_right_pads_ = std::vector<ck::index_t>{1};
float,
float, auto out_tensor2 = run_reference_convolution_forward<1,
ck::tensor_layout::convolution::NWC, float,
ck::tensor_layout::convolution::KXC, float,
ck::tensor_layout::convolution::NWK>( float,
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
std::vector<std::size_t> ref_dims{2, 16, 16}; ck::tensor_layout::convolution::NWK>(
std::vector<float> ref_data{ params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, std::vector<std::size_t> ref_dims{2, 16, 16};
3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, std::vector<float> ref_data{
3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3,
8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3,
8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7, 5.7,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1,
12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1,
12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001,
17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001, 12.900001,
17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3,
20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3, 15.3,
20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7,
22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7, 17.7,
22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1,
24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1, 20.1,
24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5,
27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5, 22.5,
27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002,
29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002, 24.900002,
29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001,
32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001, 27.300001,
32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7,
34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7, 29.7,
34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002,
23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002, 32.100002,
23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5,
27., 27., 27., 27., 27., 27., 27., 27., 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5, 34.5,
27., 27., 27., 27., 27., 27., 27., 27., 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8,
41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8, 23.8,
41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 27., 27., 27., 27., 27., 27., 27., 27.,
44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 27., 27., 27., 27., 27., 27., 27., 27.,
44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7,
46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7, 41.7,
46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002,
48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002, 44.100002,
48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5,
51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5, 46.5,
51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998,
53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998, 48.899998,
53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3,
56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3, 51.3,
56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7,
58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7, 53.7,
58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002,
60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002, 56.100002,
60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5,
63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5, 58.5,
63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998,
65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998, 60.899998,
65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3,
68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3, 63.3,
68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7,
70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7, 65.7,
70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1,
72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1, 68.1,
72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5,
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5, 70.5,
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4}; 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9,
EXPECT_TRUE(ck::utils::check_err( 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9, 72.9,
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!")); 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4,
EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!")); 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4};
} EXPECT_TRUE(ck::utils::check_err(
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
TEST(ReferenceConvolutionFWD, Conv3DNCDHW) EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 3; TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
params.N_ = 1; {
params.K_ = 1; ck::utils::conv::ConvParams params;
params.C_ = 2; params.num_dim_spatial_ = 3;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6}; params.K_ = 1;
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0}; params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0}; params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
auto out_tensor = run_reference_convolution_forward<3, params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
float, params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
float,
float, auto out_tensor = run_reference_convolution_forward<3,
ck::tensor_layout::convolution::NCDHW, float,
ck::tensor_layout::convolution::KCZYX, float,
ck::tensor_layout::convolution::NKDHW>( float,
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); ck::tensor_layout::convolution::NCDHW,
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4}; ck::tensor_layout::convolution::KCZYX,
std::vector<float> ref_data{ ck::tensor_layout::convolution::NKDHW>(
407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432., params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002, std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4};
504.90002, 507.6, 510.30002, 513., 521.1, 523.8, 526.5, 529.2001, std::vector<float> ref_data{
537.3, 540., 542.7001, 545.4, 553.5, 556.2001, 558.9, 561.6, 407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432.,
602.10004, 604.8, 607.5, 610.2, 618.3, 621., 623.7, 626.4, 440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002,
634.5, 637.2, 639.9, 642.60004, 650.7, 653.4, 656.10004, 658.8, 504.90002, 507.6, 510.30002, 513., 521.1, 523.8, 526.5, 529.2001,
699.3, 702., 704.7, 707.4, 715.5, 718.2, 720.9, 723.60004, 537.3, 540., 542.7001, 545.4, 553.5, 556.2001, 558.9, 561.6,
731.7, 734.4001, 737.10004, 739.8, 747.9001, 750.60004, 753.3, 756.}; 602.10004, 604.8, 607.5, 610.2, 618.3, 621., 623.7, 626.4,
EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(), 634.5, 637.2, 639.9, 642.60004, 650.7, 653.4, 656.10004, 658.8,
ref_dims, 699.3, 702., 704.7, 707.4, 715.5, 718.2, 720.9, 723.60004,
"Error [case 1]: wrong output tensor dimensions!")); 731.7, 734.4001, 737.10004, 739.8, 747.9001, 750.60004, 753.3, 756.};
EXPECT_TRUE( EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(),
ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!")); ref_dims,
} "Error [case 1]: wrong output tensor dimensions!"));
EXPECT_TRUE(
TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations) ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"));
{ }
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 3; TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations)
params.N_ = 1; {
params.K_ = 2; ck::utils::conv::ConvParams params;
params.C_ = 2; params.num_dim_spatial_ = 3;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3}; params.N_ = 1;
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12}; params.K_ = 2;
params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3}; params.C_ = 2;
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1}; params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0}; params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0}; params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
auto out_tensor = run_reference_convolution_forward<3, params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
float, params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
float,
float, auto out_tensor = run_reference_convolution_forward<3,
ck::tensor_layout::convolution::NCDHW, float,
ck::tensor_layout::convolution::KCZYX, float,
ck::tensor_layout::convolution::NKDHW>( float,
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f}); ck::tensor_layout::convolution::NCDHW,
std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4}; ck::tensor_layout::convolution::KCZYX,
std::vector<float> ref_data{ ck::tensor_layout::convolution::NKDHW>(
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002, params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6, std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4};
3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999, std::vector<float> ref_data{
4117.5, 4125.6, 4133.7, 4141.8, 4214.7, 4222.8, 4230.9004, 4239., 2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
5089.5, 5097.5996, 5105.7, 5113.8, 5186.7, 5194.8, 5202.9, 5211., 2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,
5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004, 3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999,
6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4, 4117.5, 4125.6, 4133.7, 4141.8, 4214.7, 4222.8, 4230.9004, 4239.,
6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801, 5089.5, 5097.5996, 5105.7, 5113.8, 5186.7, 5194.8, 5202.9, 5211.,
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002, 5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004,
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6, 6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4,
3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999, 6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801,
4117.5, 4125.6, 4133.7, 4141.8, 4214.7, 4222.8, 4230.9004, 4239., 2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
5089.5, 5097.5996, 5105.7, 5113.8, 5186.7, 5194.8, 5202.9, 5211., 2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,
5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004, 3923.1, 3931.2, 3939.2998, 3947.4, 4020.2998, 4028.4001, 4036.5002, 4044.5999,
6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4, 4117.5, 4125.6, 4133.7, 4141.8, 4214.7, 4222.8, 4230.9004, 4239.,
6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801}; 5089.5, 5097.5996, 5105.7, 5113.8, 5186.7, 5194.8, 5202.9, 5211.,
EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(), 5283.9004, 5292., 5300.0996, 5308.2, 5381.0996, 5389.2, 5397.3, 5405.4004,
ref_dims, 6255.9004, 6264.0005, 6272.1, 6280.2, 6353.1, 6361.2, 6369.301, 6377.4,
"Error [case 2]: wrong output tensor dimensions!")); 6450.301, 6458.4, 6466.5, 6474.6, 6547.5, 6555.6, 6563.699, 6571.801};
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(out_tensor.mDesc.GetLengths(),
out_tensor.mData, ref_data, "Error [case 2]: incorrect results!", 1e-4f, 1e-6f)); ref_dims,
} "Error [case 2]: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mData, ref_data, "Error [case 2]: incorrect results!", 1e-4f, 1e-6f));
}
add_custom_target(test_softmax)
add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp)
add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp)
target_link_libraries(test_softmax_fp32 PRIVATE host_tensor)
target_link_libraries(test_softmax_fp16 PRIVATE host_tensor)
add_dependencies(test_softmax test_softmax_fp32)
add_dependencies(test_softmax test_softmax_fp16)
\ No newline at end of file
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestSoftmaxFP16 : public ck::TestSoftmax<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>
>;
// clang-format on
TYPED_TEST_SUITE(TestSoftmaxFP16, KernelTypes);
TYPED_TEST(TestSoftmaxFP16, Test_FP16) { this->Run(); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment