Commit a4522ae3 authored by illsilin's avatar illsilin
Browse files

sync from public repo

parents 1f127242 e0594d08
...@@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops) ...@@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops)
# headers for building lib # headers for building lib
file(COPY ${MHA_HEADERS} DESTINATION ${FMHA_CPP_FOLDER}) file(COPY ${MHA_HEADERS} DESTINATION ${FMHA_CPP_FOLDER})
# Delete the blob file if it exists to avoid append of old content.
if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt)
endif()
set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd") set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
# generate a list of kernels, but not actually emit files at config stage # generate a list of kernels, but not actually emit files at config stage
......
...@@ -271,10 +271,12 @@ bool profile_gemm_multiply_multiply_impl(int do_verification, ...@@ -271,10 +271,12 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
<< " TFlops, " << gb_per_sec << " GB/s, " << op_name << ", KBatch " << " TFlops, " << gb_per_sec << " GB/s, " << op_name << ", KBatch "
<< kbatch_curr << std::endl; << kbatch_curr << std::endl;
#if defined CK_ENABLE_FP8 #if defined CK_ENABLE_FP8 || defined CK_ENABLE_INT8
// set softer tolerances for fp8 // set softer tolerances for fp8
if constexpr(is_same_v<ADataType, f8_t> || is_same_v<BDataType, f8_t> || if constexpr((is_same_v<ADataType, f8_t> || is_same_v<BDataType, f8_t> ||
is_same_v<EDataType, f8_t>) is_same_v<EDataType, f8_t>) ||
(is_same_v<ADataType, int8_t> || is_same_v<BDataType, int8_t> ||
is_same_v<EDataType, int8_t>))
{ {
std::string msg = "Error: Incorrect results!"; std::string msg = "Error: Incorrect results!";
double rtol = 1e-1; double rtol = 1e-1;
...@@ -286,7 +288,7 @@ bool profile_gemm_multiply_multiply_impl(int do_verification, ...@@ -286,7 +288,7 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
{ {
#endif #endif
pass = pass & ck::utils::check_err(e_m_n_device_result, e_m_n_host_result); pass = pass & ck::utils::check_err(e_m_n_device_result, e_m_n_host_result);
#if defined CK_ENABLE_FP8 #if defined CK_ENABLE_FP8 || defined CK_ENABLE_INT8
} }
#endif #endif
......
...@@ -102,11 +102,22 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams& ...@@ -102,11 +102,22 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
Tensor<IndexDataType> out_indices_n_c_do_ho_wo_device( Tensor<IndexDataType> out_indices_n_c_do_ho_wo_device(
f_host_tensor_descriptor(N, C, Do, Ho, Wo)); f_host_tensor_descriptor(N, C, Do, Ho, Wo));
constexpr int inDataRangeTensor1{1};
constexpr int inDataRangeTensor2{5};
constexpr double inDataRangeTensor3{0.5};
switch(in_params.init_method) switch(in_params.init_method)
{ {
case 0: in_n_c_di_hi_wi.GenerateTensorValue(GeneratorTensor_1<InDataType>{}); break; case 0:
case 1: in_n_c_di_hi_wi.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5}); break; in_n_c_di_hi_wi.GenerateTensorValue(GeneratorTensor_1<InDataType>{inDataRangeTensor1});
default: in_n_c_di_hi_wi.GenerateTensorValue(GeneratorTensor_3<InDataType>{-0.5, 0.5}); break;
case 1:
in_n_c_di_hi_wi.GenerateTensorValue(
GeneratorTensor_2<InDataType>{-inDataRangeTensor2, inDataRangeTensor2});
break;
default:
in_n_c_di_hi_wi.GenerateTensorValue(
GeneratorTensor_3<InDataType>{-inDataRangeTensor3, inDataRangeTensor3});
} }
DeviceMem in_device_buf(sizeof(InDataType) * in_n_c_di_hi_wi.mDesc.GetElementSpaceSize()); DeviceMem in_device_buf(sizeof(InDataType) * in_n_c_di_hi_wi.mDesc.GetElementSpaceSize());
...@@ -229,12 +240,25 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams& ...@@ -229,12 +240,25 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
{ {
out_device_buf.FromDevice(out_n_c_do_ho_wo_device.mData.data()); out_device_buf.FromDevice(out_n_c_do_ho_wo_device.mData.data());
auto tolerance = 1e-3; auto absolute_error_threshold = 1.0;
bool pass = ck::utils::check_err(out_n_c_do_ho_wo_device.mData, switch(in_params.init_method)
{
case 0: absolute_error_threshold = static_cast<double>(inDataRangeTensor1); break;
case 1: absolute_error_threshold = static_cast<double>(inDataRangeTensor2); break;
default: absolute_error_threshold = inDataRangeTensor3;
}
absolute_error_threshold =
ck::utils::get_absolute_threshold<ComputeDataType, OutDataType>(
absolute_error_threshold);
auto relative_error_threshold =
ck::utils::get_relative_threshold<ComputeDataType, OutDataType>();
bool pass = ck::utils::check_err(out_n_c_do_ho_wo_device.mData,
out_n_c_do_ho_wo_host.mData, out_n_c_do_ho_wo_host.mData,
"Error: Incorrect results", "Error: Incorrect results",
tolerance, relative_error_threshold,
tolerance); absolute_error_threshold);
if constexpr(OutputIndex) if constexpr(OutputIndex)
{ {
......
...@@ -27,6 +27,7 @@ enum struct GemmDataType ...@@ -27,6 +27,7 @@ enum struct GemmDataType
F16_F8_F16, // 5 F16_F8_F16, // 5
F16_F16_F16_F8, // 6 F16_F16_F16_F8, // 6
F8_F8_BF16, // 7 F8_F8_BF16, // 7
INT8_INT8_BF16, // 8
}; };
#define OP_NAME "gemm_multiply_multiply" #define OP_NAME "gemm_multiply_multiply"
...@@ -39,7 +40,7 @@ int profile_gemm_multiply_multiply(int argc, char* argv[]) ...@@ -39,7 +40,7 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8; 4: f8@f16; 5: f16@f8; 6: " printf("arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8; 4: f8@f16; 5: f16@f8; 6: "
"f16->f8; 7: f8->bf16, " "f16->f8; 7: f8->bf16, "
"comp f8)\n"); "comp f8; 8: int8->bf16)\n");
printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"); printf("arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n");
printf(" 1: A[m, k] * B[n, k] = C[m, n];\n"); printf(" 1: A[m, k] * B[n, k] = C[m, n];\n");
printf(" 2: A[k, m] * B[k, n] = C[m, n];\n"); printf(" 2: A[k, m] * B[k, n] = C[m, n];\n");
...@@ -89,6 +90,8 @@ int profile_gemm_multiply_multiply(int argc, char* argv[]) ...@@ -89,6 +90,8 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
using F32 = float; using F32 = float;
using BF16 = ck::bhalf_t; using BF16 = ck::bhalf_t;
using F8 = ck::f8_t; using F8 = ck::f8_t;
using I8 = int8_t;
using I32 = int;
using Row = ck::tensor_layout::gemm::RowMajor; using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor; using Col = ck::tensor_layout::gemm::ColumnMajor;
...@@ -162,6 +165,11 @@ int profile_gemm_multiply_multiply(int argc, char* argv[]) ...@@ -162,6 +165,11 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
return profile( return profile(
F8{}, F8{}, F8{}, F32{}, F32{}, F32{}, BF16{}, Row{}, Col{}, Row{}, Col{}, Row{}); F8{}, F8{}, F8{}, F32{}, F32{}, F32{}, BF16{}, Row{}, Col{}, Row{}, Col{}, Row{});
} }
else if(data_type == GemmDataType::INT8_INT8_BF16 && layout == GemmMatrixLayout::MK_NK_MN)
{
return profile(
I8{}, I8{}, I8{}, I32{}, F32{}, F32{}, BF16{}, Row{}, Col{}, Row{}, Col{}, Row{});
}
else else
{ {
std::cout << "this data_type & layout is not implemented" << std::endl; std::cout << "this data_type & layout is not implemented" << std::endl;
......
...@@ -57,6 +57,25 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -57,6 +57,25 @@ int profile_gemm_universal(int argc, char* argv[])
exit(1); exit(1);
} }
int M;
int N;
int StrideA;
int StrideB;
// Analyze the unsupported matrix shapes, switch the M and N number
if(std::stoi(argv[9]) % 8 != 0 && std::stoi(argv[8]) % 8 == 0)
{
M = std::stoi(argv[9]);
StrideA = std::stoi(argv[12]);
N = std::stoi(argv[8]);
StrideB = std::stoi(argv[11]);
}
else
{
M = std::stoi(argv[8]);
StrideA = std::stoi(argv[11]);
N = std::stoi(argv[9]);
StrideB = std::stoi(argv[12]);
}
const auto data_type = static_cast<GemmDataType>(std::stoi(argv[2])); const auto data_type = static_cast<GemmDataType>(std::stoi(argv[2]));
const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3])); const auto layout = static_cast<GemmMatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[4]);
...@@ -64,12 +83,8 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -64,12 +83,8 @@ int profile_gemm_universal(int argc, char* argv[])
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const bool time_kernel = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int M = std::stoi(argv[8]);
const int N = std::stoi(argv[9]);
const int K = std::stoi(argv[10]); const int K = std::stoi(argv[10]);
const int StrideA = std::stoi(argv[11]);
const int StrideB = std::stoi(argv[12]);
const int StrideC = std::stoi(argv[13]); const int StrideC = std::stoi(argv[13]);
const int KBatch = std::stoi(argv[14]); const int KBatch = std::stoi(argv[14]);
...@@ -86,7 +101,9 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -86,7 +101,9 @@ int profile_gemm_universal(int argc, char* argv[])
using F32 = float; using F32 = float;
using F16 = ck::half_t; using F16 = ck::half_t;
using BF16 = ck::bhalf_t; using BF16 = ck::bhalf_t;
using F8 = ck::f8_t; #if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
using F8 = ck::f8_t;
#endif
using Row = ck::tensor_layout::gemm::RowMajor; using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor; using Col = ck::tensor_layout::gemm::ColumnMajor;
...@@ -147,6 +164,7 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -147,6 +164,7 @@ int profile_gemm_universal(int argc, char* argv[])
{ {
return profile(F16{}, F16{}, F16{}, F32{}, F16{}, Row{}, Col{}, Row{}); return profile(F16{}, F16{}, F16{}, F32{}, F16{}, Row{}, Col{}, Row{});
} }
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else if(data_type == GemmDataType::F16_F8_F16 && layout == GemmMatrixLayout::MK_KN_MN) else if(data_type == GemmDataType::F16_F8_F16 && layout == GemmMatrixLayout::MK_KN_MN)
{ {
return profile(F16{}, F8{}, F16{}, F32{}, F16{}, Row{}, Row{}, Row{}); return profile(F16{}, F8{}, F16{}, F32{}, F16{}, Row{}, Row{}, Row{});
...@@ -163,6 +181,7 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -163,6 +181,7 @@ int profile_gemm_universal(int argc, char* argv[])
{ {
return profile(F8{}, F16{}, F16{}, F32{}, F16{}, Row{}, Col{}, Row{}); return profile(F8{}, F16{}, F16{}, F32{}, F16{}, Row{}, Col{}, Row{});
} }
#endif
else if(data_type == GemmDataType::BF16_BF16_BF16 && layout == GemmMatrixLayout::MK_KN_MN) else if(data_type == GemmDataType::BF16_BF16_BF16 && layout == GemmMatrixLayout::MK_KN_MN)
{ {
return profile(BF16{}, BF16{}, BF16{}, F32{}, BF16{}, Row{}, Row{}, Row{}); return profile(BF16{}, BF16{}, BF16{}, F32{}, BF16{}, Row{}, Row{}, Row{});
...@@ -179,6 +198,7 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -179,6 +198,7 @@ int profile_gemm_universal(int argc, char* argv[])
{ {
return profile(BF16{}, BF16{}, BF16{}, F32{}, BF16{}, Col{}, Row{}, Row{}); return profile(BF16{}, BF16{}, BF16{}, F32{}, BF16{}, Col{}, Row{}, Row{});
} }
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else if(data_type == GemmDataType::F8_F8_BF16 && layout == GemmMatrixLayout::MK_KN_MN) else if(data_type == GemmDataType::F8_F8_BF16 && layout == GemmMatrixLayout::MK_KN_MN)
{ {
return profile(F8{}, F8{}, F8{}, F32{}, BF16{}, Row{}, Row{}, Row{}); return profile(F8{}, F8{}, F8{}, F32{}, BF16{}, Row{}, Row{}, Row{});
...@@ -187,6 +207,7 @@ int profile_gemm_universal(int argc, char* argv[]) ...@@ -187,6 +207,7 @@ int profile_gemm_universal(int argc, char* argv[])
{ {
return profile(F8{}, F8{}, F8{}, F32{}, BF16{}, Row{}, Col{}, Row{}); return profile(F8{}, F8{}, F8{}, F32{}, BF16{}, Row{}, Col{}, Row{});
} }
#endif
else else
{ {
std::cout << "this data_type & layout is not implemented" << std::endl; std::cout << "this data_type & layout is not implemented" << std::endl;
......
...@@ -25,7 +25,8 @@ enum struct ConvDataType ...@@ -25,7 +25,8 @@ enum struct ConvDataType
F16_F16_F16, // 1 F16_F16_F16, // 1
BF16_F32_BF16, // 2 BF16_F32_BF16, // 2
F16_F16_F16_BF8_F8, // 3 F16_F16_F16_BF8_F8, // 3
I8_I8_I8 // 4 I8_I8_I8, // 4
BF16_BF16_BF16, // 5
}; };
#define OP_NAME "grouped_conv_bwd_weight" #define OP_NAME "grouped_conv_bwd_weight"
...@@ -38,7 +39,8 @@ static void print_helper_msg() ...@@ -38,7 +39,8 @@ static void print_helper_msg()
<< " 1: Input fp16, Weight fp16, Output fp16\n" << " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 2: Input bf16, Weight fp32, Output bf16\n" << " 2: Input bf16, Weight fp32, Output bf16\n"
<< " 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8\n" << " 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8\n"
<< " 4: Input int8, Weight int8, Output int8)\n" << " 4: Input int8, Weight int8, Output int8\n"
<< " 5: Input bf16, Weight bf16, Output bf16)\n"
<< "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, " << "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
"N, K, Ho, Wo]\n" "N, K, Ho, Wo]\n"
<< " 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, " << " 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
...@@ -180,6 +182,10 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) ...@@ -180,6 +182,10 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
// fp32 atomic add is used for weight tensor in bf16 kernel // fp32 atomic add is used for weight tensor in bf16 kernel
return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, BF16{}, F32{}, BF16{}, BF16{}, BF16{}); return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, BF16{}, F32{}, BF16{}, BF16{}, BF16{});
} }
if(data_type == ConvDataType::BF16_BF16_BF16)
{
return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{});
}
} }
else if(num_dim_spatial == 2 && layout == ConvLayout::NGCHW_GKYXC_NGKHW) else if(num_dim_spatial == 2 && layout == ConvLayout::NGCHW_GKYXC_NGKHW)
{ {
...@@ -187,6 +193,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) ...@@ -187,6 +193,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
{ {
return profile(I2, NGCHW{}, GKYXC{}, NGKHW{}, F16{}, F16{}, F16{}, F16{}, F16{}); return profile(I2, NGCHW{}, GKYXC{}, NGKHW{}, F16{}, F16{}, F16{}, F16{}, F16{});
} }
if(data_type == ConvDataType::BF16_BF16_BF16)
{
// fp32 atomic add is used for weight tensor in bf16 kernel
return profile(I2, NGCHW{}, GKYXC{}, NGKHW{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{});
}
} }
if(num_dim_spatial == 3 && layout == ConvLayout::GNHWC_GKYXC_GNHWK) if(num_dim_spatial == 3 && layout == ConvLayout::GNHWC_GKYXC_GNHWK)
{ {
...@@ -224,6 +235,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) ...@@ -224,6 +235,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
// fp32 atomic add is used for weight tensor in bf16 kernel // fp32 atomic add is used for weight tensor in bf16 kernel
return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, BF16{}, F32{}, BF16{}, BF16{}, BF16{}); return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, BF16{}, F32{}, BF16{}, BF16{}, BF16{});
} }
if(data_type == ConvDataType::BF16_BF16_BF16)
{
return profile(
I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{});
}
if(data_type == ConvDataType::F16_F16_F16_BF8_F8) if(data_type == ConvDataType::F16_F16_F16_BF8_F8)
{ {
return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, F16{}, F16{}, F16{}, BF8{}, F8{}); return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, F16{}, F16{}, F16{}, BF8{}, F8{});
...@@ -240,6 +256,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[]) ...@@ -240,6 +256,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
{ {
return profile(I3, NGCDHW{}, GKZYXC{}, NGKDHW{}, F16{}, F16{}, F16{}, F16{}, F16{}); return profile(I3, NGCDHW{}, GKZYXC{}, NGKDHW{}, F16{}, F16{}, F16{}, F16{}, F16{});
} }
if(data_type == ConvDataType::BF16_BF16_BF16)
{
return profile(
I3, NGCDHW{}, GKZYXC{}, NGKDHW{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{});
}
} }
std::cout << "this data_type & layout is not implemented" << std::endl; std::cout << "this data_type & layout is not implemented" << std::endl;
......
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import logging
import os
import subprocess
from dataclasses import replace
from functools import lru_cache
from typing import List
from ..util import library_path
from .op import CKGroupedConvFwdOp
log = logging.getLogger(__name__)
def _ck_conv_instances_path():
conv_instances_path = os.path.join( # noqa: F821
library_path(),
"include",
"ck",
"library",
"tensor_operation_instance",
"gpu",
"grouped_conv_fwd",
)
if not os.path.exists(conv_instances_path):
log.error(
"CK library conv instances path %s does not exist", conv_instances_path
)
return None
return conv_instances_path
def parse_instances(str_instances: List[str]) -> List[CKGroupedConvFwdOp]:
"""
Parse the lines containing Grouped Convolution Forward template instances
into `CKGroupedConvFwdOp` instances
"""
def maybe_int(s):
try:
return int(s)
except ValueError:
return s
op_instances = []
# TODO: maybe use libclang for parsing C++ code in the future
# to avoid this hacky parsing logic below ? :) - copilot
for line in str_instances:
s_template_args = line.split("DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3")[
-1
].strip("<>, ")
template_args = []
i_current = 0
while i_current < len(s_template_args):
if s_template_args[i_current] == " ":
# skip whitespace
i_current += 1
continue
elif s_template_args[i_current : i_current + 2] == "S<":
# parse template S<Index...>
i_next = s_template_args.find(">", i_current)
template_args.append(
tuple(map(int, s_template_args[i_current + 2 : i_next].split(",")))
)
i_current = i_next + 2
else:
# all string attributes must be either type aliases or global constants in C++
i_next = s_template_args.find(",", i_current)
template_args.append(
maybe_int(
s_template_args[i_current : i_next if i_next != -1 else None]
)
)
if i_next != -1:
i_current = i_next + 1
if i_next == -1:
break
template_args[0] = -1 # n_dim_spatial
template_args[3] = tuple() # ds_layout
template_args[9] = tuple() # ds_element_dtype
new_instance = CKGroupedConvFwdOp(
*template_args, # type: ignore[arg-type]
)
op_instances.append(new_instance)
return op_instances
@lru_cache(None)
def gen_conv_ops_library() -> List[CKGroupedConvFwdOp]:
"""
Parse the Grouped Convolution Forward instances
defined in the Composable Kernel library folder.
"""
ck_library_dir = _ck_conv_instances_path()
if not ck_library_dir:
return []
grep_result = subprocess.run(
[
"grep",
"-inR",
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3",
ck_library_dir,
],
capture_output=True,
text=True,
)
op_instances = parse_instances(grep_result.stdout.strip().split("\n"))
log.debug("ck instances from library: %d", len(op_instances))
schedulers = [
"BlockGemmPipelineScheduler::Intrawave",
"BlockGemmPipelineScheduler::Interwave",
]
conv_specs = [
"ConvolutionForwardSpecialization::Default",
"ConvolutionForwardSpecialization::Filter1x1Pad0",
"ConvolutionForwardSpecialization::Filter1x1Stride1Pad0",
"ConvolutionForwardSpecialization::OddC",
]
# substitute templated args by looping through their domains
substitute_instances = []
for instance in op_instances:
sub_scheduler = (
instance.block_gemm_pipeline_scheduler == "BlkGemmPipeSched"
)
sub_spec = instance.conv_forward_specialization == "ConvSpec"
schedulers_range = (
schedulers if sub_scheduler else [instance.block_gemm_pipeline_scheduler]
)
spec_range = conv_specs if sub_spec else [instance.conv_forward_specialization]
for scheduler in schedulers_range:
for spec in spec_range:
for channels_last in [True, False]:
if channels_last:
a_layout = "NHWGC"
e_layout = "NHWGK"
else:
a_layout = "NGCHW"
e_layout = "NGKHW"
substitute_instances.append(
replace(
instance,
block_gemm_pipeline_scheduler=scheduler,
conv_forward_specialization=spec,
gemm_specialization="GemmSpecialization::MNKPadding",
n_dim_spatial=2,
a_layout=a_layout,
b_layout="GKYXC",
e_layout=e_layout,
)
)
return substitute_instances
if __name__ == "__main__":
print(gen_conv_ops_library())
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
from dataclasses import asdict, dataclass
from typing import Optional, Tuple
@dataclass
class CKGroupedConvFwdOp:
n_dim_spatial: int
a_layout: str
b_layout: str
ds_layout: Tuple[str]
e_layout: str
a_element_dtype: str
b_element_dtype: str
acc_dtype: str
c_shuffle_dtype: str
ds_element_dtype: Tuple[str]
e_element_dtype: str
a_elementwise_op: str
b_elementwise_op: str
cde_elementwise_op: str
conv_forward_specialization: str
gemm_specialization: str
block_size: int
m_per_block: int
n_per_block: int
k_per_block: int
ak1: int
bk1: int
m_per_xdl: int
n_per_xdl: int
m_xdl_per_wave: int
n_xdl_per_wave: int
a_block_transfer_thread_cluster_lengths_ak0_m_ak1: Tuple[int, int, int]
a_block_transfer_thread_cluster_arrange_order: Tuple[int, int, int]
a_block_transfer_src_access_order: Tuple[int, int, int]
a_block_transfer_src_vector_dim: int
a_block_transfer_src_scalar_per_vector: int
a_block_transfer_dst_scalar_per_vector_ak1: int
a_block_lds_extra_m: bool
b_block_transfer_thread_cluster_lengths_bk0_n_bk1: Tuple[int, int, int]
b_block_transfer_thread_cluster_arrange_order: Tuple[int, int, int]
b_block_transfer_src_access_order: Tuple[int, int, int]
b_block_transfer_src_vector_dim: int
b_block_transfer_src_scalar_per_vector: int
b_block_transfer_dst_scalar_per_vector_bk1: int
b_block_lds_extra_n: bool
c_shuffle_m_xdl_per_wave_per_shuffle: int
c_shuffle_n_xdl_per_wave_per_shuffle: int
cde_block_transfer_cluster_lengths_m_block_m_per_block_n_block_n_per_block: Tuple[ # noqa
int,
int,
int,
int,
]
cde_block_transfer_scalar_per_vector_n_per_block: int
block_gemm_pipeline_scheduler: str
block_gemm_pipeline_version: str
a_compute_dtype: Optional[str] = None
b_compute_dtype: Optional[str] = None
def name(self):
# cpp alias for template instance
return (
f"ck_device_grouped_convolution_fwd_multiple_abd_xdl_c_shuffle_v3_"
f"{self.key_name()}"
)
def key_name(self):
# TBD; must be unique per instance. Intended to use as dict key
return "_".join(
[
"K"
+ field_name.replace("_", "").lower()
+ "V"
+ (
"x".join(map(str, iter(field_value)))
if isinstance(field_value, tuple)
else str(field_value).replace(":", "")
)
for field_name, field_value in self.dict_items()
]
)
def dict_items(self):
return asdict(self).items()
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import logging import logging
import os import os
import subprocess import subprocess
from dataclasses import fields, replace from dataclasses import replace
from functools import lru_cache, partial from functools import lru_cache, partial
from typing import List from typing import List
......
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
......
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import functools import functools
import os import os
@functools.lru_cache(None) @functools.lru_cache(None)
def library_path(): def library_path():
return os.path.join(os.path.dirname(__file__), 'library') return os.path.join(os.path.dirname(__file__), "library")
...@@ -65,8 +65,9 @@ def parse_data_type(args): ...@@ -65,8 +65,9 @@ def parse_data_type(args):
if args.ck_profier_op == "grouped_conv_fwd": if args.ck_profier_op == "grouped_conv_fwd":
args.data_type = 3 args.data_type = 3
if args.data_type == "bfp16": if args.data_type == "bfp16":
if args.ck_profier_op == "grouped_conv_bwd_weight" or \ if args.ck_profier_op == "grouped_conv_bwd_weight":
args.ck_profier_op == "grouped_conv_bwd_data" or \ args.data_type = 5
if args.ck_profier_op == "grouped_conv_bwd_data" or \
args.ck_profier_op == "grouped_conv_fwd": args.ck_profier_op == "grouped_conv_fwd":
args.data_type = 2 args.data_type = 2
......
...@@ -64,11 +64,11 @@ function(add_test_executable TEST_NAME) ...@@ -64,11 +64,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx950)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
...@@ -141,11 +141,11 @@ function(add_gtest_executable TEST_NAME) ...@@ -141,11 +141,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx950)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
...@@ -210,3 +210,4 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL ...@@ -210,3 +210,4 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL
add_subdirectory(smfmac_op) add_subdirectory(smfmac_op)
endif() endif()
add_subdirectory(position_embedding) add_subdirectory(position_embedding)
add_subdirectory(scatter_gather)
add_subdirectory(image_to_column) add_subdirectory(image_to_column)
add_subdirectory(gemm)
# Currently ck_tile is only built on gfx9
if(GPU_TARGETS MATCHES "gfx9")
add_gtest_executable(test_ck_tile_gemm_mem_pipeline test_gemm_mem_pipeline.cpp)
endif()
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck_tile/host.hpp"
#include "test_gemm_mem_pipeline_util.hpp"
using F16 = ck_tile::half_t;
using F32 = float;
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
// clang-format off
using KernelTypes = ::testing::Types<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
std::tuple< Row, Col, Row, F16, F16, F32, F16>,
std::tuple< Col, Row, Row, F16, F16, F32, F16>,
std::tuple< Row, Row, Row, F16, F16, F32, F16>,
std::tuple< Col, Col, Row, F16, F16, F32, F16>
>;
// clang-format on
TYPED_TEST_SUITE(TestCkTileGemmMemPipeline, KernelTypes);
#include "test_gemm_mem_pipeline_ut_cases.inc"
#pragma once
TYPED_TEST(TestCkTileGemmMemPipeline, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 1024;
constexpr int K = 320;
for(int M : Ms)
this->Run(M, N, K);
}
TYPED_TEST(TestCkTileGemmMemPipeline, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 1024;
constexpr int K = 320;
for(int M : Ms)
this->Run(M, N, K);
}
TYPED_TEST(TestCkTileGemmMemPipeline, PaddK)
{
std::vector<int> Ms{127};
constexpr int N = 1024;
constexpr int K = 432;
for(int M : Ms)
this->Run(M, N, K);
}
TYPED_TEST(TestCkTileGemmMemPipeline, Regular)
{
std::vector<int> Ms{512};
constexpr int N = 1024;
constexpr int K = 512;
for(int M : Ms)
this->Run(M, N, K);
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <sstream>
#include <gtest/gtest.h>
#include "ck_tile/core.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/epilogue.hpp"
#include "ck_tile/ops/gemm.hpp"
template <typename Tuple>
class TestCkTileGemmMemPipeline : public ::testing::Test
{
protected:
using ALayout = std::tuple_element_t<0, Tuple>;
using BLayout = std::tuple_element_t<1, Tuple>;
using CLayout = std::tuple_element_t<2, Tuple>;
using ADataType = std::tuple_element_t<3, Tuple>;
using BDataType = std::tuple_element_t<4, Tuple>;
using AccDataType = std::tuple_element_t<5, Tuple>;
using CDataType = std::tuple_element_t<6, Tuple>;
// TODO: expose tile size through test t-param ?
struct gemm_basic_args
{
const void* p_a;
const void* p_b;
void* p_c;
ck_tile::index_t kbatch;
ck_tile::index_t M;
ck_tile::index_t N;
ck_tile::index_t K;
ck_tile::index_t stride_A;
ck_tile::index_t stride_B;
ck_tile::index_t stride_C;
};
void invoke_gemm(const gemm_basic_args& args, const ck_tile::stream_config& s)
{
// TODO: This should be parameterized in tests
constexpr ck_tile::index_t M_Tile = 128;
constexpr ck_tile::index_t N_Tile = 128;
constexpr ck_tile::index_t K_Tile = 32;
constexpr ck_tile::index_t M_Warp = 2;
constexpr ck_tile::index_t N_Warp = 2;
constexpr ck_tile::index_t K_Warp = 1;
constexpr ck_tile::index_t M_Warp_Tile = 32;
constexpr ck_tile::index_t N_Warp_Tile = 32;
constexpr ck_tile::index_t K_Warp_Tile = 8;
constexpr bool kPadA = true;
constexpr bool kPadB = true;
constexpr bool kPadC = true;
constexpr int kBlockPerCu = 1;
// ===============================================
using GemmShape =
ck_tile::TileGemmShape<ck_tile::sequence<M_Tile, N_Tile, K_Tile>,
ck_tile::sequence<M_Warp, N_Warp, K_Warp>,
ck_tile::sequence<M_Warp_Tile, N_Warp_Tile, K_Warp_Tile>>;
using TilePartitioner = ck_tile::GemmTilePartitioner<GemmShape>;
using GemmEpilogue = ck_tile::Default2DEpilogue<
ck_tile::Default2DEpilogueProblem<AccDataType, CDataType, false, kPadC>>;
using Traits = ck_tile::TileGemmTraits<kPadA, kPadB, kPadC, ALayout, BLayout, CLayout>;
using BaseGemmPipeline = ck_tile::BaseGemmPipelineAgBgCrMem<
ck_tile::GemmPipelineProblem<ADataType, BDataType, AccDataType, GemmShape, Traits>>;
const ck_tile::index_t num_loop = TilePartitioner::GetLoopNum(args.K);
const bool has_hot_loop = BaseGemmPipeline::BlockHasHotloop(num_loop);
const ck_tile::TailNumber tail_num = BaseGemmPipeline::GetBlockLoopTailNum(num_loop);
const auto Run = [&](const auto has_hot_loop_, const auto tail_number_) {
constexpr bool has_hot_loop_v = has_hot_loop_.value;
constexpr auto tail_number_v = tail_number_.value;
using GemmPipeline = ck_tile::GemmPipelineAgBgCrMem<
ck_tile::UniversalGemmPipelineProblem<ADataType,
BDataType,
AccDataType,
GemmShape,
Traits,
ck_tile::GemmPipelineScheduler::Intrawave,
has_hot_loop_v,
tail_number_v>>;
using Kernel = ck_tile::GemmKernel<TilePartitioner, GemmPipeline, GemmEpilogue>;
auto kargs = Kernel::MakeKargs(args.p_a,
args.p_b,
args.p_c,
args.M,
args.N,
args.K,
args.stride_A,
args.stride_B,
args.stride_C);
const dim3 grids = Kernel::GridSize(args.M, args.N, args.kbatch);
constexpr dim3 blocks = Kernel::BlockSize();
if(s.log_level_ > 0)
{
std::cout << "Lunching kernel with args:"
<< " grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}"
<< ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z
<< "}" << std::endl;
}
ck_tile::launch_kernel(
s, ck_tile::make_kernel<blocks.x, kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
};
if(has_hot_loop)
{
// Tail pipeline One to Seven
if(tail_num == ck_tile::TailNumber::One)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber, ck_tile::TailNumber::One>{});
}
else if(tail_num == ck_tile::TailNumber::Full)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber, ck_tile::TailNumber::Full>{});
}
if constexpr(BaseGemmPipeline::PrefetchStages > 2)
{
if(tail_num == ck_tile::TailNumber::Two)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Two>{});
}
}
if constexpr(BaseGemmPipeline::PrefetchStages > 3)
{
if(tail_num == ck_tile::TailNumber::Three)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Three>{});
}
}
if constexpr(BaseGemmPipeline::PrefetchStages > 4)
{
if(tail_num == ck_tile::TailNumber::Four)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Four>{});
}
}
if constexpr(BaseGemmPipeline::PrefetchStages > 5)
{
if(tail_num == ck_tile::TailNumber::Five)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Five>{});
}
}
if constexpr(BaseGemmPipeline::PrefetchStages > 6)
{
if(tail_num == ck_tile::TailNumber::Six)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Six>{});
}
}
if constexpr(BaseGemmPipeline::PrefetchStages > 7)
{
if(tail_num == ck_tile::TailNumber::Seven)
{
Run(ck_tile::bool_constant<true>{},
ck_tile::integral_constant<ck_tile::TailNumber,
ck_tile::TailNumber::Seven>{});
}
}
}
else
{
// Tail number always Full - #PrefetchStages
if(tail_num == ck_tile::TailNumber::Full)
{
Run(ck_tile::bool_constant<false>{},
ck_tile::integral_constant<ck_tile::TailNumber, ck_tile::TailNumber::Full>{});
}
else
{
std::ostringstream err;
err << "When there's no hot loop, this tail number \"" << tail_num
<< "\" is not supported! " << __FILE__ << ":" << __LINE__
<< ", in function: " << __func__;
throw std::runtime_error(err.str());
}
}
}
public:
std::vector<int> k_batches_;
void SetUp() override { k_batches_ = {1}; }
void Run(const int M,
const int N,
const int K,
const int StrideA = 0,
const int StrideB = 0,
const int StrideC = 0)
{
for(auto kb : k_batches_)
{
RunSingle(M, N, K, StrideA, StrideB, StrideC, kb);
}
}
void RunSingle(const int M,
const int N,
const int K,
const int StrideA,
const int StrideB,
const int StrideC,
int kbatch = 1)
{
using namespace ck_tile::literals;
auto f_host_tensor_descriptor = [](std::size_t row,
std::size_t col,
std::size_t stride,
auto layout) {
if constexpr(std::is_same_v<decltype(layout), ck_tile::tensor_layout::gemm::RowMajor>)
{
return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz});
}
else
{
return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride});
}
};
auto f_get_default_stride =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
if(stride == 0)
{
// give a chance if stride is zero, return a default packed stride
if constexpr(std::is_same_v<decltype(layout),
ck_tile::tensor_layout::gemm::RowMajor>)
{
return col;
}
else
{
return row;
}
}
else
return stride;
};
std::size_t stride_A = f_get_default_stride(M, K, StrideA, ALayout{});
std::size_t stride_B = f_get_default_stride(K, N, StrideB, BLayout{});
std::size_t stride_C = f_get_default_stride(M, N, StrideC, CLayout{});
ck_tile::HostTensor<ADataType> a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{}));
ck_tile::HostTensor<BDataType> b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{}));
ck_tile::HostTensor<CDataType> c_m_n_dev_result(
f_host_tensor_descriptor(M, N, stride_C, CLayout{}));
ck_tile::FillUniformDistributionIntegerValue<ADataType>{-5, 5}(a_m_k);
ck_tile::FillUniformDistributionIntegerValue<BDataType>{-5, 5}(b_k_n);
ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes());
ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes());
ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes());
a_m_k_dev_buf.ToDevice(a_m_k.data());
b_k_n_dev_buf.ToDevice(b_k_n.data());
c_m_n_dev_buf.SetZero();
c_m_n_dev_result.SetZero();
gemm_basic_args args;
args.p_a = a_m_k_dev_buf.GetDeviceBuffer();
args.p_b = b_k_n_dev_buf.GetDeviceBuffer();
args.p_c = c_m_n_dev_buf.GetDeviceBuffer();
args.kbatch = kbatch;
args.M = M;
args.N = N;
args.K = K;
args.stride_A = stride_A;
args.stride_B = stride_B;
args.stride_C = stride_C;
invoke_gemm(args, ck_tile::stream_config{nullptr, false});
c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data());
bool pass = true;
ck_tile::HostTensor<CDataType> c_m_n_host_ref(
f_host_tensor_descriptor(M, N, stride_C, CLayout{}));
c_m_n_host_ref.SetZero();
ck_tile::reference_gemm<ADataType, BDataType, AccDataType, CDataType>(
a_m_k, b_k_n, c_m_n_host_ref);
pass = ck_tile::check_err(c_m_n_dev_result, c_m_n_host_ref);
EXPECT_TRUE(pass);
}
};
...@@ -18,4 +18,9 @@ if(result EQUAL 0) ...@@ -18,4 +18,9 @@ if(result EQUAL 0)
target_link_libraries(test_bf8 PRIVATE utility) target_link_libraries(test_bf8 PRIVATE utility)
endif() endif()
add_gtest_executable(test_custom_type test_custom_type.cpp)
if(result EQUAL 0)
target_link_libraries(test_custom_type PRIVATE utility)
endif()
add_gtest_executable(test_type_convert_const type_convert_const.cpp) add_gtest_executable(test_type_convert_const type_convert_const.cpp)
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using ck::bf8_t;
using ck::bhalf_t;
using ck::f8_t;
using ck::half_t;
using ck::Number;
using ck::type_convert;
using ck::vector_type;
TEST(Custom_bool, TestSize)
{
struct custom_bool_t
{
bool data;
};
ASSERT_EQ(sizeof(custom_bool_t), sizeof(bool));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 2>), sizeof(vector_type<bool, 2>));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 4>), sizeof(vector_type<bool, 4>));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 8>), sizeof(vector_type<bool, 8>));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 16>), sizeof(vector_type<bool, 16>));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 32>), sizeof(vector_type<bool, 32>));
ASSERT_EQ(sizeof(vector_type<custom_bool_t, 64>), sizeof(vector_type<bool, 64>));
}
TEST(Custom_bool, TestAsType)
{
struct custom_bool_t
{
using type = bool;
type data;
custom_bool_t() : data{type{}} {}
custom_bool_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<bool> test_vec = {false, true, false, true};
// reference vector
vector_type<custom_bool_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_bool_t>()(Number<i>{}).data, false);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bool_t>()(Number<i>{}) = custom_bool_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_bool_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bool_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_bool, TestAsTypeReshape)
{
struct custom_bool_t
{
using type = bool;
type data;
custom_bool_t() : data{type{}} {}
custom_bool_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<bool> test_vec = {false, true, false, true};
// reference vector
vector_type<custom_bool_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_bool_t>()(Number<i>{}).data, false);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bool_t>()(Number<i>{}) = custom_bool_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_bool_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_bool_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bool_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_int8, TestSize)
{
struct custom_int8_t
{
int8_t data;
};
ASSERT_EQ(sizeof(custom_int8_t), sizeof(int8_t));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 2>), sizeof(vector_type<int8_t, 2>));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 4>), sizeof(vector_type<int8_t, 4>));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 8>), sizeof(vector_type<int8_t, 8>));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 16>), sizeof(vector_type<int8_t, 16>));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 32>), sizeof(vector_type<int8_t, 32>));
ASSERT_EQ(sizeof(vector_type<custom_int8_t, 64>), sizeof(vector_type<int8_t, 64>));
}
TEST(Custom_int8, TestAsType)
{
struct custom_int8_t
{
using type = int8_t;
type data;
custom_int8_t() : data{type{}} {}
custom_int8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<int8_t> test_vec = {3, -6, 8, -2};
// reference vector
vector_type<custom_int8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_int8_t>()(Number<i>{}).data, 0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_int8_t>()(Number<i>{}) = custom_int8_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_int8_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_int8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_int8, TestAsTypeReshape)
{
struct custom_int8_t
{
using type = int8_t;
type data;
custom_int8_t() : data{type{}} {}
custom_int8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<int8_t> test_vec = {3, -6, 8, -2};
// reference vector
vector_type<custom_int8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_int8_t>()(Number<i>{}).data, 0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_int8_t>()(Number<i>{}) = custom_int8_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_int8_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_int8_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_int8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_uint8, TestSize)
{
struct custom_uint8_t
{
uint8_t data;
};
ASSERT_EQ(sizeof(custom_uint8_t), sizeof(uint8_t));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 2>), sizeof(vector_type<uint8_t, 2>));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 4>), sizeof(vector_type<uint8_t, 4>));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 8>), sizeof(vector_type<uint8_t, 8>));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 16>), sizeof(vector_type<uint8_t, 16>));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 32>), sizeof(vector_type<uint8_t, 32>));
ASSERT_EQ(sizeof(vector_type<custom_uint8_t, 64>), sizeof(vector_type<uint8_t, 64>));
}
TEST(Custom_uint8, TestAsType)
{
struct custom_uint8_t
{
using type = uint8_t;
type data;
custom_uint8_t() : data{type{}} {}
custom_uint8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<uint8_t> test_vec = {3, 6, 8, 2};
// reference vector
vector_type<custom_uint8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_uint8_t>()(Number<i>{}).data, 0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_uint8_t>()(Number<i>{}) = custom_uint8_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_uint8_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_uint8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_uint8, TestAsTypeReshape)
{
struct custom_uint8_t
{
using type = uint8_t;
type data;
custom_uint8_t() : data{type{}} {}
custom_uint8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<uint8_t> test_vec = {3, 6, 8, 2};
// reference vector
vector_type<custom_uint8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_uint8_t>()(Number<i>{}).data, 0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_uint8_t>()(Number<i>{}) = custom_uint8_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_uint8_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_uint8_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_uint8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_f8, TestSize)
{
struct custom_f8_t
{
_BitInt(8) data;
};
ASSERT_EQ(sizeof(custom_f8_t), sizeof(_BitInt(8)));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 2>), sizeof(vector_type<_BitInt(8), 2>));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 4>), sizeof(vector_type<_BitInt(8), 4>));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 8>), sizeof(vector_type<_BitInt(8), 8>));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 16>), sizeof(vector_type<_BitInt(8), 16>));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 32>), sizeof(vector_type<_BitInt(8), 32>));
ASSERT_EQ(sizeof(vector_type<custom_f8_t, 64>), sizeof(vector_type<_BitInt(8), 64>));
}
TEST(Custom_f8, TestAsType)
{
struct custom_f8_t
{
using type = _BitInt(8);
type data;
custom_f8_t() : data{type{}} {}
custom_f8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<_BitInt(8)> test_vec = {type_convert<_BitInt(8)>(0.3f),
type_convert<_BitInt(8)>(-0.6f),
type_convert<_BitInt(8)>(0.8f),
type_convert<_BitInt(8)>(-0.2f)};
// reference vector
vector_type<custom_f8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}(
[&](auto i) { ASSERT_EQ(right_vec.template AsType<custom_f8_t>()(Number<i>{}).data, 0); });
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_f8_t>()(Number<i>{}) = custom_f8_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_f8_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_f8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_f8, TestAsTypeReshape)
{
struct custom_f8_t
{
using type = _BitInt(8);
type data;
custom_f8_t() : data{type{}} {}
custom_f8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<_BitInt(8)> test_vec = {type_convert<_BitInt(8)>(0.3f),
type_convert<_BitInt(8)>(-0.6f),
type_convert<_BitInt(8)>(0.8f),
type_convert<_BitInt(8)>(-0.2f)};
// reference vector
vector_type<custom_f8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}(
[&](auto i) { ASSERT_EQ(right_vec.template AsType<custom_f8_t>()(Number<i>{}).data, 0); });
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_f8_t>()(Number<i>{}) = custom_f8_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_f8_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_f8_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_f8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_bf8, TestSize)
{
struct custom_bf8_t
{
unsigned _BitInt(8) data;
};
ASSERT_EQ(sizeof(custom_bf8_t), sizeof(unsigned _BitInt(8)));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 2>), sizeof(vector_type<unsigned _BitInt(8), 2>));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 4>), sizeof(vector_type<unsigned _BitInt(8), 4>));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 8>), sizeof(vector_type<unsigned _BitInt(8), 8>));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 16>), sizeof(vector_type<unsigned _BitInt(8), 16>));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 32>), sizeof(vector_type<unsigned _BitInt(8), 32>));
ASSERT_EQ(sizeof(vector_type<custom_bf8_t, 64>), sizeof(vector_type<unsigned _BitInt(8), 64>));
}
TEST(Custom_bf8, TestAsType)
{
struct custom_bf8_t
{
using type = unsigned _BitInt(8);
type data;
custom_bf8_t() : data{type{}} {}
custom_bf8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<unsigned _BitInt(8)> test_vec = {type_convert<unsigned _BitInt(8)>(0.3f),
type_convert<unsigned _BitInt(8)>(-0.6f),
type_convert<unsigned _BitInt(8)>(0.8f),
type_convert<unsigned _BitInt(8)>(-0.2f)};
// reference vector
vector_type<custom_bf8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}(
[&](auto i) { ASSERT_EQ(right_vec.template AsType<custom_bf8_t>()(Number<i>{}).data, 0); });
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bf8_t>()(Number<i>{}) = custom_bf8_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_bf8_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bf8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_bf8, TestAsTypeReshape)
{
struct custom_bf8_t
{
using type = unsigned _BitInt(8);
type data;
custom_bf8_t() : data{type{}} {}
custom_bf8_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<unsigned _BitInt(8)> test_vec = {type_convert<unsigned _BitInt(8)>(0.3f),
type_convert<unsigned _BitInt(8)>(-0.6f),
type_convert<unsigned _BitInt(8)>(0.8f),
type_convert<unsigned _BitInt(8)>(-0.2f)};
// reference vector
vector_type<custom_bf8_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}(
[&](auto i) { ASSERT_EQ(right_vec.template AsType<custom_bf8_t>()(Number<i>{}).data, 0); });
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bf8_t>()(Number<i>{}) = custom_bf8_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_bf8_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_bf8_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bf8_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_half, TestSize)
{
struct custom_half_t
{
half_t data;
};
ASSERT_EQ(sizeof(custom_half_t), sizeof(half_t));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 2>), sizeof(vector_type<half_t, 2>));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 4>), sizeof(vector_type<half_t, 4>));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 8>), sizeof(vector_type<half_t, 8>));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 16>), sizeof(vector_type<half_t, 16>));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 32>), sizeof(vector_type<half_t, 32>));
ASSERT_EQ(sizeof(vector_type<custom_half_t, 64>), sizeof(vector_type<half_t, 64>));
}
TEST(Custom_half, TestAsType)
{
struct custom_half_t
{
using type = half_t;
type data;
custom_half_t() : data{type{}} {}
custom_half_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<half_t> test_vec = {half_t{0.3f}, half_t{-0.6f}, half_t{0.8f}, half_t{-0.2f}};
// reference vector
vector_type<custom_half_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_half_t>()(Number<i>{}).data,
type_convert<half_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_half_t>()(Number<i>{}) = custom_half_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_half_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_half_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_half, TestAsTypeReshape)
{
struct custom_half_t
{
using type = half_t;
type data;
custom_half_t() : data{type{}} {}
custom_half_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<half_t> test_vec = {half_t{0.3f}, half_t{-0.6f}, half_t{0.8f}, half_t{-0.2f}};
// reference vector
vector_type<custom_half_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_half_t>()(Number<i>{}).data,
type_convert<half_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_half_t>()(Number<i>{}) = custom_half_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_half_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_half_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_half_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_bhalf, TestSize)
{
struct custom_bhalf_t
{
bhalf_t data;
};
ASSERT_EQ(sizeof(custom_bhalf_t), sizeof(bhalf_t));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 2>), sizeof(vector_type<bhalf_t, 2>));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 4>), sizeof(vector_type<bhalf_t, 4>));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 8>), sizeof(vector_type<bhalf_t, 8>));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 16>), sizeof(vector_type<bhalf_t, 16>));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 32>), sizeof(vector_type<bhalf_t, 32>));
ASSERT_EQ(sizeof(vector_type<custom_bhalf_t, 64>), sizeof(vector_type<bhalf_t, 64>));
}
TEST(Custom_bhalf, TestAsType)
{
struct custom_bhalf_t
{
using type = bhalf_t;
type data;
custom_bhalf_t() : data{type{}} {}
custom_bhalf_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<bhalf_t> test_vec = {type_convert<bhalf_t>(0.3f),
type_convert<bhalf_t>(-0.6f),
type_convert<bhalf_t>(0.8f),
type_convert<bhalf_t>(-0.2f)};
// reference vector
vector_type<custom_bhalf_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_bhalf_t>()(Number<i>{}).data,
type_convert<bhalf_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bhalf_t>()(Number<i>{}) = custom_bhalf_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_bhalf_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bhalf_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_bhalf, TestAsTypeReshape)
{
struct custom_bhalf_t
{
using type = bhalf_t;
type data;
custom_bhalf_t() : data{type{}} {}
custom_bhalf_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<bhalf_t> test_vec = {type_convert<bhalf_t>(0.3f),
type_convert<bhalf_t>(-0.6f),
type_convert<bhalf_t>(0.8f),
type_convert<bhalf_t>(-0.2f)};
// reference vector
vector_type<custom_bhalf_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_bhalf_t>()(Number<i>{}).data,
type_convert<bhalf_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_bhalf_t>()(Number<i>{}) = custom_bhalf_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_bhalf_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_bhalf_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_bhalf_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_float, TestSize)
{
struct custom_float_t
{
float data;
};
ASSERT_EQ(sizeof(custom_float_t), sizeof(float));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 2>), sizeof(vector_type<float, 2>));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 4>), sizeof(vector_type<float, 4>));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 8>), sizeof(vector_type<float, 8>));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 16>), sizeof(vector_type<float, 16>));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 32>), sizeof(vector_type<float, 32>));
ASSERT_EQ(sizeof(vector_type<custom_float_t, 64>), sizeof(vector_type<float, 64>));
}
TEST(Custom_float, TestAsType)
{
struct custom_float_t
{
using type = float;
type data;
custom_float_t() : data{type{}} {}
custom_float_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<float> test_vec = {0.3f, -0.6f, 0.8f, -0.2f};
// reference vector
vector_type<custom_float_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_float_t>()(Number<i>{}).data, 0.0f);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_float_t>()(Number<i>{}) = custom_float_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_float_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_float_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_float, TestAsTypeReshape)
{
struct custom_float_t
{
using type = float;
type data;
custom_float_t() : data{type{}} {}
custom_float_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<float> test_vec = {0.3f, -0.6f, 0.8f, -0.2f};
// reference vector
vector_type<custom_float_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_float_t>()(Number<i>{}).data, 0.0f);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_float_t>()(Number<i>{}) = custom_float_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_float_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_float_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_float_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_double, TestSize)
{
struct custom_double_t
{
double data;
};
ASSERT_EQ(sizeof(custom_double_t), sizeof(double));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 2>), sizeof(vector_type<double, 2>));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 4>), sizeof(vector_type<double, 4>));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 8>), sizeof(vector_type<double, 8>));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 16>), sizeof(vector_type<double, 16>));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 32>), sizeof(vector_type<double, 32>));
ASSERT_EQ(sizeof(vector_type<custom_double_t, 64>), sizeof(vector_type<double, 64>));
}
TEST(Custom_double, TestAsType)
{
struct custom_double_t
{
using type = double;
type data;
custom_double_t() : data{type{}} {}
custom_double_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<double> test_vec = {0.3, 0.6, 0.8, 0.2};
// reference vector
vector_type<custom_double_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_double_t>()(Number<i>{}).data, 0.0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_double_t>()(Number<i>{}) = custom_double_t{test_vec.at(i)};
});
// copy the vector
vector_type<custom_double_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_double_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Custom_double, TestAsTypeReshape)
{
struct custom_double_t
{
using type = double;
type data;
custom_double_t() : data{type{}} {}
custom_double_t(type init) : data{init} {}
};
// test size
const int size = 4;
std::vector<double> test_vec = {0.3, 0.6, 0.8, 0.2};
// reference vector
vector_type<custom_double_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<custom_double_t>()(Number<i>{}).data, 0.0);
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<custom_double_t>()(Number<i>{}) = custom_double_t{test_vec.at(i)};
});
// copy the first half of a vector
vector_type<custom_double_t, size / 2> left_vec{
right_vec.template AsType<vector_type<custom_double_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<custom_double_t>()(Number<i>{}).data, test_vec.at(i));
});
}
TEST(Complex_half, TestSize)
{
struct complex_half_t
{
half_t real;
half_t img;
};
ASSERT_EQ(sizeof(complex_half_t), sizeof(half_t) + sizeof(half_t));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 2>),
sizeof(vector_type<half_t, 2>) + sizeof(vector_type<half_t, 2>));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 4>),
sizeof(vector_type<half_t, 4>) + sizeof(vector_type<half_t, 4>));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 8>),
sizeof(vector_type<half_t, 8>) + sizeof(vector_type<half_t, 8>));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 16>),
sizeof(vector_type<half_t, 16>) + sizeof(vector_type<half_t, 16>));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 32>),
sizeof(vector_type<half_t, 32>) + sizeof(vector_type<half_t, 32>));
ASSERT_EQ(sizeof(vector_type<complex_half_t, 64>),
sizeof(vector_type<half_t, 64>) + sizeof(vector_type<half_t, 64>));
}
TEST(Complex_half, TestAlignment)
{
struct complex_half_t
{
half_t real;
half_t img;
};
ASSERT_EQ(alignof(vector_type<complex_half_t, 2>),
alignof(vector_type<half_t, 2>) + alignof(vector_type<half_t, 2>));
ASSERT_EQ(alignof(vector_type<complex_half_t, 4>),
alignof(vector_type<half_t, 4>) + alignof(vector_type<half_t, 4>));
ASSERT_EQ(alignof(vector_type<complex_half_t, 8>),
alignof(vector_type<half_t, 8>) + alignof(vector_type<half_t, 8>));
ASSERT_EQ(alignof(vector_type<complex_half_t, 16>),
alignof(vector_type<half_t, 16>) + alignof(vector_type<half_t, 16>));
ASSERT_EQ(alignof(vector_type<complex_half_t, 32>),
alignof(vector_type<half_t, 32>) + alignof(vector_type<half_t, 32>));
ASSERT_EQ(alignof(vector_type<complex_half_t, 64>),
alignof(vector_type<half_t, 64>) + alignof(vector_type<half_t, 64>));
}
TEST(Complex_half, TestAsType)
{
struct complex_half_t
{
using type = half_t;
type real;
type img;
complex_half_t() : real{type{}}, img{type{}} {}
complex_half_t(type real_init, type img_init) : real{real_init}, img{img_init} {}
};
// test size
const int size = 4;
// custom type number of elements
const int num_elem = sizeof(complex_half_t) / sizeof(complex_half_t::type);
std::vector<half_t> test_vec = {half_t{0.3f},
half_t{-0.6f},
half_t{0.8f},
half_t{-0.2f},
half_t{0.5f},
half_t{-0.7f},
half_t{0.9f},
half_t{-0.3f}};
// reference vector
vector_type<complex_half_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<complex_half_t>()(Number<i>{}).real,
type_convert<half_t>(0.0f));
ASSERT_EQ(right_vec.template AsType<complex_half_t>()(Number<i>{}).img,
type_convert<half_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<complex_half_t>()(Number<i>{}) =
complex_half_t{test_vec.at(num_elem * i), test_vec.at(num_elem * i + 1)};
});
// copy the vector
vector_type<complex_half_t, size> left_vec{right_vec};
// check if values were copied correctly
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<complex_half_t>()(Number<i>{}).real,
test_vec.at(num_elem * i));
ASSERT_EQ(left_vec.template AsType<complex_half_t>()(Number<i>{}).img,
test_vec.at(num_elem * i + 1));
});
}
TEST(Complex_half, TestAsTypeReshape)
{
struct complex_half_t
{
using type = half_t;
type real;
type img;
complex_half_t() : real{type{}}, img{type{}} {}
complex_half_t(type real_init, type img_init) : real{real_init}, img{img_init} {}
};
// test size
const int size = 4;
// custom type number of elements
const int num_elem = sizeof(complex_half_t) / sizeof(complex_half_t::type);
std::vector<half_t> test_vec = {half_t{0.3f},
half_t{-0.6f},
half_t{0.8f},
half_t{-0.2f},
half_t{0.5f},
half_t{-0.7f},
half_t{0.9f},
half_t{-0.3f}};
// reference vector
vector_type<complex_half_t, size> right_vec;
// check default CTOR
ck::static_for<0, size, 1>{}([&](auto i) {
ASSERT_EQ(right_vec.template AsType<complex_half_t>()(Number<i>{}).real,
type_convert<half_t>(0.0f));
ASSERT_EQ(right_vec.template AsType<complex_half_t>()(Number<i>{}).img,
type_convert<half_t>(0.0f));
});
// assign test values to the vector
ck::static_for<0, size, 1>{}([&](auto i) {
right_vec.template AsType<complex_half_t>()(Number<i>{}) =
complex_half_t{test_vec.at(num_elem * i), test_vec.at(num_elem * i + 1)};
});
// copy the first half of a vector
vector_type<complex_half_t, size / 2> left_vec{
right_vec.template AsType<vector_type<complex_half_t, size / 2>::type>()(Number<0>{})};
// check if values were copied correctly
ck::static_for<0, size / 2, 1>{}([&](auto i) {
ASSERT_EQ(left_vec.template AsType<complex_half_t>()(Number<i>{}).real,
test_vec.at(num_elem * i));
ASSERT_EQ(left_vec.template AsType<complex_half_t>()(Number<i>{}).img,
test_vec.at(num_elem * i + 1));
});
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment