Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
a4522ae3
"official/nlp/projects/example/train.py" did not exist on "97807e630e1c38d2936d4dd4c543681f549f3085"
Commit
a4522ae3
authored
Nov 06, 2024
by
illsilin
Browse files
sync from public repo
parents
1f127242
e0594d08
Changes
425
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1648 additions
and
34 deletions
+1648
-34
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+0
-5
profiler/include/profiler/profile_gemm_multiply_multiply_impl.hpp
.../include/profiler/profile_gemm_multiply_multiply_impl.hpp
+6
-4
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
+31
-7
profiler/src/profile_gemm_multiply_multiply.cpp
profiler/src/profile_gemm_multiply_multiply.cpp
+9
-1
profiler/src/profile_gemm_universal.cpp
profiler/src/profile_gemm_universal.cpp
+26
-5
profiler/src/profile_grouped_conv_bwd_weight.cpp
profiler/src/profile_grouped_conv_bwd_weight.cpp
+23
-2
python/ck4inductor/grouped_conv_fwd/gen_instances.py
python/ck4inductor/grouped_conv_fwd/gen_instances.py
+167
-0
python/ck4inductor/grouped_conv_fwd/op.py
python/ck4inductor/grouped_conv_fwd/op.py
+93
-0
python/ck4inductor/universal_gemm/gen_instances.py
python/ck4inductor/universal_gemm/gen_instances.py
+4
-1
python/ck4inductor/universal_gemm/op.py
python/ck4inductor/universal_gemm/op.py
+3
-0
python/ck4inductor/util.py
python/ck4inductor/util.py
+4
-1
script/convert_miopen_driver_to_profiler.py
script/convert_miopen_driver_to_profiler.py
+3
-2
test/CMakeLists.txt
test/CMakeLists.txt
+7
-6
test/ck_tile/CMakeLists.txt
test/ck_tile/CMakeLists.txt
+1
-0
test/ck_tile/gemm/CMakeLists.txt
test/ck_tile/gemm/CMakeLists.txt
+4
-0
test/ck_tile/gemm/test_gemm_mem_pipeline.cpp
test/ck_tile/gemm/test_gemm_mem_pipeline.cpp
+29
-0
test/ck_tile/gemm/test_gemm_mem_pipeline_ut_cases.inc
test/ck_tile/gemm/test_gemm_mem_pipeline_ut_cases.inc
+41
-0
test/ck_tile/gemm/test_gemm_mem_pipeline_util.hpp
test/ck_tile/gemm/test_gemm_mem_pipeline_util.hpp
+318
-0
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+5
-0
test/data_type/test_custom_type.cpp
test/data_type/test_custom_type.cpp
+874
-0
No files found.
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
a4522ae3
...
...
@@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops)
# headers for building lib
file
(
COPY
${
MHA_HEADERS
}
DESTINATION
${
FMHA_CPP_FOLDER
}
)
# Delete the blob file if it exists to avoid append of old content.
if
(
EXISTS
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
endif
()
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
# generate a list of kernels, but not actually emit files at config stage
...
...
profiler/include/profiler/profile_gemm_multiply_multiply_impl.hpp
View file @
a4522ae3
...
...
@@ -271,10 +271,12 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
op_name
<<
", KBatch "
<<
kbatch_curr
<<
std
::
endl
;
#if defined CK_ENABLE_FP8
#if defined CK_ENABLE_FP8
|| defined CK_ENABLE_INT8
// set softer tolerances for fp8
if
constexpr
(
is_same_v
<
ADataType
,
f8_t
>
||
is_same_v
<
BDataType
,
f8_t
>
||
is_same_v
<
EDataType
,
f8_t
>
)
if
constexpr
((
is_same_v
<
ADataType
,
f8_t
>
||
is_same_v
<
BDataType
,
f8_t
>
||
is_same_v
<
EDataType
,
f8_t
>
)
||
(
is_same_v
<
ADataType
,
int8_t
>
||
is_same_v
<
BDataType
,
int8_t
>
||
is_same_v
<
EDataType
,
int8_t
>
))
{
std
::
string
msg
=
"Error: Incorrect results!"
;
double
rtol
=
1e-1
;
...
...
@@ -286,7 +288,7 @@ bool profile_gemm_multiply_multiply_impl(int do_verification,
{
#endif
pass
=
pass
&
ck
::
utils
::
check_err
(
e_m_n_device_result
,
e_m_n_host_result
);
#if defined CK_ENABLE_FP8
#if defined CK_ENABLE_FP8
|| defined CK_ENABLE_INT8
}
#endif
...
...
profiler/include/profiler/profile_pool3d_fwd_impl.hpp
View file @
a4522ae3
...
...
@@ -102,11 +102,22 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
Tensor
<
IndexDataType
>
out_indices_n_c_do_ho_wo_device
(
f_host_tensor_descriptor
(
N
,
C
,
Do
,
Ho
,
Wo
));
constexpr
int
inDataRangeTensor1
{
1
};
constexpr
int
inDataRangeTensor2
{
5
};
constexpr
double
inDataRangeTensor3
{
0.5
};
switch
(
in_params
.
init_method
)
{
case
0
:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_1
<
InDataType
>
{});
break
;
case
1
:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
break
;
default:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
-
0.5
,
0.5
});
case
0
:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_1
<
InDataType
>
{
inDataRangeTensor1
});
break
;
case
1
:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
inDataRangeTensor2
,
inDataRangeTensor2
});
break
;
default:
in_n_c_di_hi_wi
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
-
inDataRangeTensor3
,
inDataRangeTensor3
});
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_n_c_di_hi_wi
.
mDesc
.
GetElementSpaceSize
());
...
...
@@ -229,12 +240,25 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
{
out_device_buf
.
FromDevice
(
out_n_c_do_ho_wo_device
.
mData
.
data
());
auto
tolerance
=
1e-3
;
bool
pass
=
ck
::
utils
::
check_err
(
out_n_c_do_ho_wo_device
.
mData
,
auto
absolute_error_threshold
=
1.0
;
switch
(
in_params
.
init_method
)
{
case
0
:
absolute_error_threshold
=
static_cast
<
double
>
(
inDataRangeTensor1
);
break
;
case
1
:
absolute_error_threshold
=
static_cast
<
double
>
(
inDataRangeTensor2
);
break
;
default:
absolute_error_threshold
=
inDataRangeTensor3
;
}
absolute_error_threshold
=
ck
::
utils
::
get_absolute_threshold
<
ComputeDataType
,
OutDataType
>
(
absolute_error_threshold
);
auto
relative_error_threshold
=
ck
::
utils
::
get_relative_threshold
<
ComputeDataType
,
OutDataType
>
();
bool
pass
=
ck
::
utils
::
check_err
(
out_n_c_do_ho_wo_device
.
mData
,
out_n_c_do_ho_wo_host
.
mData
,
"Error: Incorrect results"
,
tolerance
,
tolerance
);
relative_error_threshold
,
absolute_error_threshold
);
if
constexpr
(
OutputIndex
)
{
...
...
profiler/src/profile_gemm_multiply_multiply.cpp
View file @
a4522ae3
...
...
@@ -27,6 +27,7 @@ enum struct GemmDataType
F16_F8_F16
,
// 5
F16_F16_F16_F8
,
// 6
F8_F8_BF16
,
// 7
INT8_INT8_BF16
,
// 8
};
#define OP_NAME "gemm_multiply_multiply"
...
...
@@ -39,7 +40,7 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
printf
(
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8; 4: f8@f16; 5: f16@f8; 6: "
"f16->f8; 7: f8->bf16, "
"comp f8)
\n
"
);
"comp f8
; 8: int8->bf16
)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
printf
(
" 2: A[k, m] * B[k, n] = C[m, n];
\n
"
);
...
...
@@ -89,6 +90,8 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
using
F32
=
float
;
using
BF16
=
ck
::
bhalf_t
;
using
F8
=
ck
::
f8_t
;
using
I8
=
int8_t
;
using
I32
=
int
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
...
...
@@ -162,6 +165,11 @@ int profile_gemm_multiply_multiply(int argc, char* argv[])
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
F32
{},
F32
{},
BF16
{},
Row
{},
Col
{},
Row
{},
Col
{},
Row
{});
}
else
if
(
data_type
==
GemmDataType
::
INT8_INT8_BF16
&&
layout
==
GemmMatrixLayout
::
MK_NK_MN
)
{
return
profile
(
I8
{},
I8
{},
I8
{},
I32
{},
F32
{},
F32
{},
BF16
{},
Row
{},
Col
{},
Row
{},
Col
{},
Row
{});
}
else
{
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
profiler/src/profile_gemm_universal.cpp
View file @
a4522ae3
...
...
@@ -57,6 +57,25 @@ int profile_gemm_universal(int argc, char* argv[])
exit
(
1
);
}
int
M
;
int
N
;
int
StrideA
;
int
StrideB
;
// Analyze the unsupported matrix shapes, switch the M and N number
if
(
std
::
stoi
(
argv
[
9
])
%
8
!=
0
&&
std
::
stoi
(
argv
[
8
])
%
8
==
0
)
{
M
=
std
::
stoi
(
argv
[
9
]);
StrideA
=
std
::
stoi
(
argv
[
12
]);
N
=
std
::
stoi
(
argv
[
8
]);
StrideB
=
std
::
stoi
(
argv
[
11
]);
}
else
{
M
=
std
::
stoi
(
argv
[
8
]);
StrideA
=
std
::
stoi
(
argv
[
11
]);
N
=
std
::
stoi
(
argv
[
9
]);
StrideB
=
std
::
stoi
(
argv
[
12
]);
}
const
auto
data_type
=
static_cast
<
GemmDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
layout
=
static_cast
<
GemmMatrixLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
...
...
@@ -64,12 +83,8 @@ int profile_gemm_universal(int argc, char* argv[])
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
K
=
std
::
stoi
(
argv
[
10
]);
const
int
StrideA
=
std
::
stoi
(
argv
[
11
]);
const
int
StrideB
=
std
::
stoi
(
argv
[
12
]);
const
int
StrideC
=
std
::
stoi
(
argv
[
13
]);
const
int
KBatch
=
std
::
stoi
(
argv
[
14
]);
...
...
@@ -86,7 +101,9 @@ int profile_gemm_universal(int argc, char* argv[])
using
F32
=
float
;
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F8
=
ck
::
f8_t
;
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
using
F8
=
ck
::
f8_t
;
#endif
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
...
...
@@ -147,6 +164,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
return
profile
(
F16
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
}
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else
if
(
data_type
==
GemmDataType
::
F16_F8_F16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
return
profile
(
F16
{},
F8
{},
F16
{},
F32
{},
F16
{},
Row
{},
Row
{},
Row
{});
...
...
@@ -163,6 +181,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
return
profile
(
F8
{},
F16
{},
F16
{},
F32
{},
F16
{},
Row
{},
Col
{},
Row
{});
}
#endif
else
if
(
data_type
==
GemmDataType
::
BF16_BF16_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
...
...
@@ -179,6 +198,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
return
profile
(
BF16
{},
BF16
{},
BF16
{},
F32
{},
BF16
{},
Col
{},
Row
{},
Row
{});
}
#if defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH)
else
if
(
data_type
==
GemmDataType
::
F8_F8_BF16
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Row
{},
Row
{});
...
...
@@ -187,6 +207,7 @@ int profile_gemm_universal(int argc, char* argv[])
{
return
profile
(
F8
{},
F8
{},
F8
{},
F32
{},
BF16
{},
Row
{},
Col
{},
Row
{});
}
#endif
else
{
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
profiler/src/profile_grouped_conv_bwd_weight.cpp
View file @
a4522ae3
...
...
@@ -25,7 +25,8 @@ enum struct ConvDataType
F16_F16_F16
,
// 1
BF16_F32_BF16
,
// 2
F16_F16_F16_BF8_F8
,
// 3
I8_I8_I8
// 4
I8_I8_I8
,
// 4
BF16_BF16_BF16
,
// 5
};
#define OP_NAME "grouped_conv_bwd_weight"
...
...
@@ -38,7 +39,8 @@ static void print_helper_msg()
<<
" 1: Input fp16, Weight fp16, Output fp16
\n
"
<<
" 2: Input bf16, Weight fp32, Output bf16
\n
"
<<
" 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8
\n
"
<<
" 4: Input int8, Weight int8, Output int8)
\n
"
<<
" 4: Input int8, Weight int8, Output int8
\n
"
<<
" 5: Input bf16, Weight bf16, Output bf16)
\n
"
<<
"arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
"N, K, Ho, Wo]
\n
"
<<
" 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
...
...
@@ -180,6 +182,10 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
// fp32 atomic add is used for weight tensor in bf16 kernel
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
BF16
{},
F32
{},
BF16
{},
BF16
{},
BF16
{});
}
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{});
}
}
else
if
(
num_dim_spatial
==
2
&&
layout
==
ConvLayout
::
NGCHW_GKYXC_NGKHW
)
{
...
...
@@ -187,6 +193,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
{
return
profile
(
I2
,
NGCHW
{},
GKYXC
{},
NGKHW
{},
F16
{},
F16
{},
F16
{},
F16
{},
F16
{});
}
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
// fp32 atomic add is used for weight tensor in bf16 kernel
return
profile
(
I2
,
NGCHW
{},
GKYXC
{},
NGKHW
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{});
}
}
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
GNHWC_GKYXC_GNHWK
)
{
...
...
@@ -224,6 +235,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
// fp32 atomic add is used for weight tensor in bf16 kernel
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
BF16
{},
F32
{},
BF16
{},
BF16
{},
BF16
{});
}
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{});
}
if
(
data_type
==
ConvDataType
::
F16_F16_F16_BF8_F8
)
{
return
profile
(
I3
,
NDHWGC
{},
GKZYXC
{},
NDHWGK
{},
F16
{},
F16
{},
F16
{},
BF8
{},
F8
{});
...
...
@@ -240,6 +256,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
{
return
profile
(
I3
,
NGCDHW
{},
GKZYXC
{},
NGKDHW
{},
F16
{},
F16
{},
F16
{},
F16
{},
F16
{});
}
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
return
profile
(
I3
,
NGCDHW
{},
GKZYXC
{},
NGKDHW
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{},
BF16
{});
}
}
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
...
...
python/ck4inductor/grouped_conv_fwd/gen_instances.py
0 → 100644
View file @
a4522ae3
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import
logging
import
os
import
subprocess
from
dataclasses
import
replace
from
functools
import
lru_cache
from
typing
import
List
from
..util
import
library_path
from
.op
import
CKGroupedConvFwdOp
log
=
logging
.
getLogger
(
__name__
)
def
_ck_conv_instances_path
():
conv_instances_path
=
os
.
path
.
join
(
# noqa: F821
library_path
(),
"include"
,
"ck"
,
"library"
,
"tensor_operation_instance"
,
"gpu"
,
"grouped_conv_fwd"
,
)
if
not
os
.
path
.
exists
(
conv_instances_path
):
log
.
error
(
"CK library conv instances path %s does not exist"
,
conv_instances_path
)
return
None
return
conv_instances_path
def
parse_instances
(
str_instances
:
List
[
str
])
->
List
[
CKGroupedConvFwdOp
]:
"""
Parse the lines containing Grouped Convolution Forward template instances
into `CKGroupedConvFwdOp` instances
"""
def
maybe_int
(
s
):
try
:
return
int
(
s
)
except
ValueError
:
return
s
op_instances
=
[]
# TODO: maybe use libclang for parsing C++ code in the future
# to avoid this hacky parsing logic below ? :) - copilot
for
line
in
str_instances
:
s_template_args
=
line
.
split
(
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3"
)[
-
1
].
strip
(
"<>, "
)
template_args
=
[]
i_current
=
0
while
i_current
<
len
(
s_template_args
):
if
s_template_args
[
i_current
]
==
" "
:
# skip whitespace
i_current
+=
1
continue
elif
s_template_args
[
i_current
:
i_current
+
2
]
==
"S<"
:
# parse template S<Index...>
i_next
=
s_template_args
.
find
(
">"
,
i_current
)
template_args
.
append
(
tuple
(
map
(
int
,
s_template_args
[
i_current
+
2
:
i_next
].
split
(
","
)))
)
i_current
=
i_next
+
2
else
:
# all string attributes must be either type aliases or global constants in C++
i_next
=
s_template_args
.
find
(
","
,
i_current
)
template_args
.
append
(
maybe_int
(
s_template_args
[
i_current
:
i_next
if
i_next
!=
-
1
else
None
]
)
)
if
i_next
!=
-
1
:
i_current
=
i_next
+
1
if
i_next
==
-
1
:
break
template_args
[
0
]
=
-
1
# n_dim_spatial
template_args
[
3
]
=
tuple
()
# ds_layout
template_args
[
9
]
=
tuple
()
# ds_element_dtype
new_instance
=
CKGroupedConvFwdOp
(
*
template_args
,
# type: ignore[arg-type]
)
op_instances
.
append
(
new_instance
)
return
op_instances
@
lru_cache
(
None
)
def
gen_conv_ops_library
()
->
List
[
CKGroupedConvFwdOp
]:
"""
Parse the Grouped Convolution Forward instances
defined in the Composable Kernel library folder.
"""
ck_library_dir
=
_ck_conv_instances_path
()
if
not
ck_library_dir
:
return
[]
grep_result
=
subprocess
.
run
(
[
"grep"
,
"-inR"
,
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3"
,
ck_library_dir
,
],
capture_output
=
True
,
text
=
True
,
)
op_instances
=
parse_instances
(
grep_result
.
stdout
.
strip
().
split
(
"
\n
"
))
log
.
debug
(
"ck instances from library: %d"
,
len
(
op_instances
))
schedulers
=
[
"BlockGemmPipelineScheduler::Intrawave"
,
"BlockGemmPipelineScheduler::Interwave"
,
]
conv_specs
=
[
"ConvolutionForwardSpecialization::Default"
,
"ConvolutionForwardSpecialization::Filter1x1Pad0"
,
"ConvolutionForwardSpecialization::Filter1x1Stride1Pad0"
,
"ConvolutionForwardSpecialization::OddC"
,
]
# substitute templated args by looping through their domains
substitute_instances
=
[]
for
instance
in
op_instances
:
sub_scheduler
=
(
instance
.
block_gemm_pipeline_scheduler
==
"BlkGemmPipeSched"
)
sub_spec
=
instance
.
conv_forward_specialization
==
"ConvSpec"
schedulers_range
=
(
schedulers
if
sub_scheduler
else
[
instance
.
block_gemm_pipeline_scheduler
]
)
spec_range
=
conv_specs
if
sub_spec
else
[
instance
.
conv_forward_specialization
]
for
scheduler
in
schedulers_range
:
for
spec
in
spec_range
:
for
channels_last
in
[
True
,
False
]:
if
channels_last
:
a_layout
=
"NHWGC"
e_layout
=
"NHWGK"
else
:
a_layout
=
"NGCHW"
e_layout
=
"NGKHW"
substitute_instances
.
append
(
replace
(
instance
,
block_gemm_pipeline_scheduler
=
scheduler
,
conv_forward_specialization
=
spec
,
gemm_specialization
=
"GemmSpecialization::MNKPadding"
,
n_dim_spatial
=
2
,
a_layout
=
a_layout
,
b_layout
=
"GKYXC"
,
e_layout
=
e_layout
,
)
)
return
substitute_instances
if
__name__
==
"__main__"
:
print
(
gen_conv_ops_library
())
python/ck4inductor/grouped_conv_fwd/op.py
0 → 100644
View file @
a4522ae3
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
from
dataclasses
import
asdict
,
dataclass
from
typing
import
Optional
,
Tuple
@
dataclass
class
CKGroupedConvFwdOp
:
n_dim_spatial
:
int
a_layout
:
str
b_layout
:
str
ds_layout
:
Tuple
[
str
]
e_layout
:
str
a_element_dtype
:
str
b_element_dtype
:
str
acc_dtype
:
str
c_shuffle_dtype
:
str
ds_element_dtype
:
Tuple
[
str
]
e_element_dtype
:
str
a_elementwise_op
:
str
b_elementwise_op
:
str
cde_elementwise_op
:
str
conv_forward_specialization
:
str
gemm_specialization
:
str
block_size
:
int
m_per_block
:
int
n_per_block
:
int
k_per_block
:
int
ak1
:
int
bk1
:
int
m_per_xdl
:
int
n_per_xdl
:
int
m_xdl_per_wave
:
int
n_xdl_per_wave
:
int
a_block_transfer_thread_cluster_lengths_ak0_m_ak1
:
Tuple
[
int
,
int
,
int
]
a_block_transfer_thread_cluster_arrange_order
:
Tuple
[
int
,
int
,
int
]
a_block_transfer_src_access_order
:
Tuple
[
int
,
int
,
int
]
a_block_transfer_src_vector_dim
:
int
a_block_transfer_src_scalar_per_vector
:
int
a_block_transfer_dst_scalar_per_vector_ak1
:
int
a_block_lds_extra_m
:
bool
b_block_transfer_thread_cluster_lengths_bk0_n_bk1
:
Tuple
[
int
,
int
,
int
]
b_block_transfer_thread_cluster_arrange_order
:
Tuple
[
int
,
int
,
int
]
b_block_transfer_src_access_order
:
Tuple
[
int
,
int
,
int
]
b_block_transfer_src_vector_dim
:
int
b_block_transfer_src_scalar_per_vector
:
int
b_block_transfer_dst_scalar_per_vector_bk1
:
int
b_block_lds_extra_n
:
bool
c_shuffle_m_xdl_per_wave_per_shuffle
:
int
c_shuffle_n_xdl_per_wave_per_shuffle
:
int
cde_block_transfer_cluster_lengths_m_block_m_per_block_n_block_n_per_block
:
Tuple
[
# noqa
int
,
int
,
int
,
int
,
]
cde_block_transfer_scalar_per_vector_n_per_block
:
int
block_gemm_pipeline_scheduler
:
str
block_gemm_pipeline_version
:
str
a_compute_dtype
:
Optional
[
str
]
=
None
b_compute_dtype
:
Optional
[
str
]
=
None
def
name
(
self
):
# cpp alias for template instance
return
(
f
"ck_device_grouped_convolution_fwd_multiple_abd_xdl_c_shuffle_v3_"
f
"
{
self
.
key_name
()
}
"
)
def
key_name
(
self
):
# TBD; must be unique per instance. Intended to use as dict key
return
"_"
.
join
(
[
"K"
+
field_name
.
replace
(
"_"
,
""
).
lower
()
+
"V"
+
(
"x"
.
join
(
map
(
str
,
iter
(
field_value
)))
if
isinstance
(
field_value
,
tuple
)
else
str
(
field_value
).
replace
(
":"
,
""
)
)
for
field_name
,
field_value
in
self
.
dict_items
()
]
)
def
dict_items
(
self
):
return
asdict
(
self
).
items
()
python/ck4inductor/universal_gemm/gen_instances.py
View file @
a4522ae3
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import
logging
import
os
import
subprocess
from
dataclasses
import
fields
,
replace
from
dataclasses
import
replace
from
functools
import
lru_cache
,
partial
from
typing
import
List
...
...
python/ck4inductor/universal_gemm/op.py
View file @
a4522ae3
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
from
dataclasses
import
asdict
,
dataclass
from
typing
import
Optional
,
Tuple
...
...
python/ck4inductor/util.py
View file @
a4522ae3
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
import
functools
import
os
@
functools
.
lru_cache
(
None
)
def
library_path
():
return
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'
library
'
)
return
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"
library
"
)
script/convert_miopen_driver_to_profiler.py
View file @
a4522ae3
...
...
@@ -65,8 +65,9 @@ def parse_data_type(args):
if
args
.
ck_profier_op
==
"grouped_conv_fwd"
:
args
.
data_type
=
3
if
args
.
data_type
==
"bfp16"
:
if
args
.
ck_profier_op
==
"grouped_conv_bwd_weight"
or
\
args
.
ck_profier_op
==
"grouped_conv_bwd_data"
or
\
if
args
.
ck_profier_op
==
"grouped_conv_bwd_weight"
:
args
.
data_type
=
5
if
args
.
ck_profier_op
==
"grouped_conv_bwd_data"
or
\
args
.
ck_profier_op
==
"grouped_conv_fwd"
:
args
.
data_type
=
2
...
...
test/CMakeLists.txt
View file @
a4522ae3
...
...
@@ -64,11 +64,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx908 gfx90a
gfx940
gfx94
1
gfx94
2
gfx9
50
gfx1030
)
list
(
REMOVE_ITEM TEST_TARGETS gfx90
0 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx90
8 gfx90a gfx94
0
gfx94
1
gfx9
42
gfx1030
gfx950
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
...
@@ -141,11 +141,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906
gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a
gfx940
gfx94
1
gfx94
2
gfx9
50
gfx1030
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx90
6:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx90
8 gfx90a gfx94
0
gfx94
1
gfx9
42
gfx1030
gfx950
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
list
(
REMOVE_ITEM TEST_TARGETS
gfx900 gfx906 gfx906:xnack-
gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201
)
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
...
@@ -210,3 +210,4 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL
add_subdirectory
(
smfmac_op
)
endif
()
add_subdirectory
(
position_embedding
)
add_subdirectory
(
scatter_gather
)
test/ck_tile/CMakeLists.txt
View file @
a4522ae3
add_subdirectory
(
image_to_column
)
add_subdirectory
(
gemm
)
test/ck_tile/gemm/CMakeLists.txt
0 → 100644
View file @
a4522ae3
# Currently ck_tile is only built on gfx9
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_gtest_executable
(
test_ck_tile_gemm_mem_pipeline test_gemm_mem_pipeline.cpp
)
endif
()
test/ck_tile/gemm/test_gemm_mem_pipeline.cpp
0 → 100644
View file @
a4522ae3
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck_tile/host.hpp"
#include "test_gemm_mem_pipeline_util.hpp"
using
F16
=
ck_tile
::
half_t
;
using
F32
=
float
;
using
Row
=
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
;
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestCkTileGemmMemPipeline
,
KernelTypes
);
#include "test_gemm_mem_pipeline_ut_cases.inc"
test/ck_tile/gemm/test_gemm_mem_pipeline_ut_cases.inc
0 → 100644
View file @
a4522ae3
#pragma once
TYPED_TEST
(
TestCkTileGemmMemPipeline
,
SmallM
)
{
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
320
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemmMemPipeline
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
320
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemmMemPipeline
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
432
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemmMemPipeline
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
512
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
test/ck_tile/gemm/test_gemm_mem_pipeline_util.hpp
0 → 100644
View file @
a4522ae3
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <sstream>
#include <gtest/gtest.h>
#include "ck_tile/core.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/epilogue.hpp"
#include "ck_tile/ops/gemm.hpp"
template
<
typename
Tuple
>
class
TestCkTileGemmMemPipeline
:
public
::
testing
::
Test
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
CLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
// TODO: expose tile size through test t-param ?
struct
gemm_basic_args
{
const
void
*
p_a
;
const
void
*
p_b
;
void
*
p_c
;
ck_tile
::
index_t
kbatch
;
ck_tile
::
index_t
M
;
ck_tile
::
index_t
N
;
ck_tile
::
index_t
K
;
ck_tile
::
index_t
stride_A
;
ck_tile
::
index_t
stride_B
;
ck_tile
::
index_t
stride_C
;
};
void
invoke_gemm
(
const
gemm_basic_args
&
args
,
const
ck_tile
::
stream_config
&
s
)
{
// TODO: This should be parameterized in tests
constexpr
ck_tile
::
index_t
M_Tile
=
128
;
constexpr
ck_tile
::
index_t
N_Tile
=
128
;
constexpr
ck_tile
::
index_t
K_Tile
=
32
;
constexpr
ck_tile
::
index_t
M_Warp
=
2
;
constexpr
ck_tile
::
index_t
N_Warp
=
2
;
constexpr
ck_tile
::
index_t
K_Warp
=
1
;
constexpr
ck_tile
::
index_t
M_Warp_Tile
=
32
;
constexpr
ck_tile
::
index_t
N_Warp_Tile
=
32
;
constexpr
ck_tile
::
index_t
K_Warp_Tile
=
8
;
constexpr
bool
kPadA
=
true
;
constexpr
bool
kPadB
=
true
;
constexpr
bool
kPadC
=
true
;
constexpr
int
kBlockPerCu
=
1
;
// ===============================================
using
GemmShape
=
ck_tile
::
TileGemmShape
<
ck_tile
::
sequence
<
M_Tile
,
N_Tile
,
K_Tile
>
,
ck_tile
::
sequence
<
M_Warp
,
N_Warp
,
K_Warp
>
,
ck_tile
::
sequence
<
M_Warp_Tile
,
N_Warp_Tile
,
K_Warp_Tile
>>
;
using
TilePartitioner
=
ck_tile
::
GemmTilePartitioner
<
GemmShape
>
;
using
GemmEpilogue
=
ck_tile
::
Default2DEpilogue
<
ck_tile
::
Default2DEpilogueProblem
<
AccDataType
,
CDataType
,
false
,
kPadC
>>
;
using
Traits
=
ck_tile
::
TileGemmTraits
<
kPadA
,
kPadB
,
kPadC
,
ALayout
,
BLayout
,
CLayout
>
;
using
BaseGemmPipeline
=
ck_tile
::
BaseGemmPipelineAgBgCrMem
<
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
>>
;
const
ck_tile
::
index_t
num_loop
=
TilePartitioner
::
GetLoopNum
(
args
.
K
);
const
bool
has_hot_loop
=
BaseGemmPipeline
::
BlockHasHotloop
(
num_loop
);
const
ck_tile
::
TailNumber
tail_num
=
BaseGemmPipeline
::
GetBlockLoopTailNum
(
num_loop
);
const
auto
Run
=
[
&
](
const
auto
has_hot_loop_
,
const
auto
tail_number_
)
{
constexpr
bool
has_hot_loop_v
=
has_hot_loop_
.
value
;
constexpr
auto
tail_number_v
=
tail_number_
.
value
;
using
GemmPipeline
=
ck_tile
::
GemmPipelineAgBgCrMem
<
ck_tile
::
UniversalGemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
,
ck_tile
::
GemmPipelineScheduler
::
Intrawave
,
has_hot_loop_v
,
tail_number_v
>>
;
using
Kernel
=
ck_tile
::
GemmKernel
<
TilePartitioner
,
GemmPipeline
,
GemmEpilogue
>
;
auto
kargs
=
Kernel
::
MakeKargs
(
args
.
p_a
,
args
.
p_b
,
args
.
p_c
,
args
.
M
,
args
.
N
,
args
.
K
,
args
.
stride_A
,
args
.
stride_B
,
args
.
stride_C
);
const
dim3
grids
=
Kernel
::
GridSize
(
args
.
M
,
args
.
N
,
args
.
kbatch
);
constexpr
dim3
blocks
=
Kernel
::
BlockSize
();
if
(
s
.
log_level_
>
0
)
{
std
::
cout
<<
"Lunching kernel with args:"
<<
" grid: {"
<<
grids
.
x
<<
", "
<<
grids
.
y
<<
", "
<<
grids
.
z
<<
"}"
<<
", blocks: {"
<<
blocks
.
x
<<
", "
<<
blocks
.
y
<<
", "
<<
blocks
.
z
<<
"}"
<<
std
::
endl
;
}
ck_tile
::
launch_kernel
(
s
,
ck_tile
::
make_kernel
<
blocks
.
x
,
kBlockPerCu
>
(
Kernel
{},
grids
,
blocks
,
0
,
kargs
));
};
if
(
has_hot_loop
)
{
// Tail pipeline One to Seven
if
(
tail_num
==
ck_tile
::
TailNumber
::
One
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
One
>
{});
}
else
if
(
tail_num
==
ck_tile
::
TailNumber
::
Full
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Full
>
{});
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
2
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Two
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Two
>
{});
}
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
3
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Three
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Three
>
{});
}
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
4
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Four
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Four
>
{});
}
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
5
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Five
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Five
>
{});
}
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
6
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Six
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Six
>
{});
}
}
if
constexpr
(
BaseGemmPipeline
::
PrefetchStages
>
7
)
{
if
(
tail_num
==
ck_tile
::
TailNumber
::
Seven
)
{
Run
(
ck_tile
::
bool_constant
<
true
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Seven
>
{});
}
}
}
else
{
// Tail number always Full - #PrefetchStages
if
(
tail_num
==
ck_tile
::
TailNumber
::
Full
)
{
Run
(
ck_tile
::
bool_constant
<
false
>
{},
ck_tile
::
integral_constant
<
ck_tile
::
TailNumber
,
ck_tile
::
TailNumber
::
Full
>
{});
}
else
{
std
::
ostringstream
err
;
err
<<
"When there's no hot loop, this tail number
\"
"
<<
tail_num
<<
"
\"
is not supported! "
<<
__FILE__
<<
":"
<<
__LINE__
<<
", in function: "
<<
__func__
;
throw
std
::
runtime_error
(
err
.
str
());
}
}
}
public:
std
::
vector
<
int
>
k_batches_
;
void
SetUp
()
override
{
k_batches_
=
{
1
};
}
void
Run
(
const
int
M
,
const
int
N
,
const
int
K
,
const
int
StrideA
=
0
,
const
int
StrideB
=
0
,
const
int
StrideC
=
0
)
{
for
(
auto
kb
:
k_batches_
)
{
RunSingle
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
,
kb
);
}
}
void
RunSingle
(
const
int
M
,
const
int
N
,
const
int
K
,
const
int
StrideA
,
const
int
StrideB
,
const
int
StrideC
,
int
kbatch
=
1
)
{
using
namespace
ck_tile
::
literals
;
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
stride
,
1
_uz
});
}
else
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
1
_uz
,
stride
});
}
};
auto
f_get_default_stride
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
stride
==
0
)
{
// give a chance if stride is zero, return a default packed stride
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
col
;
}
else
{
return
row
;
}
}
else
return
stride
;
};
std
::
size_t
stride_A
=
f_get_default_stride
(
M
,
K
,
StrideA
,
ALayout
{});
std
::
size_t
stride_B
=
f_get_default_stride
(
K
,
N
,
StrideB
,
BLayout
{});
std
::
size_t
stride_C
=
f_get_default_stride
(
M
,
N
,
StrideC
,
CLayout
{});
ck_tile
::
HostTensor
<
ADataType
>
a_m_k
(
f_host_tensor_descriptor
(
M
,
K
,
stride_A
,
ALayout
{}));
ck_tile
::
HostTensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
K
,
N
,
stride_B
,
BLayout
{}));
ck_tile
::
HostTensor
<
CDataType
>
c_m_n_dev_result
(
f_host_tensor_descriptor
(
M
,
N
,
stride_C
,
CLayout
{}));
ck_tile
::
FillUniformDistributionIntegerValue
<
ADataType
>
{
-
5
,
5
}(
a_m_k
);
ck_tile
::
FillUniformDistributionIntegerValue
<
BDataType
>
{
-
5
,
5
}(
b_k_n
);
ck_tile
::
DeviceMem
a_m_k_dev_buf
(
a_m_k
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
b_k_n_dev_buf
(
b_k_n
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
c_m_n_dev_buf
(
c_m_n_dev_result
.
get_element_space_size_in_bytes
());
a_m_k_dev_buf
.
ToDevice
(
a_m_k
.
data
());
b_k_n_dev_buf
.
ToDevice
(
b_k_n
.
data
());
c_m_n_dev_buf
.
SetZero
();
c_m_n_dev_result
.
SetZero
();
gemm_basic_args
args
;
args
.
p_a
=
a_m_k_dev_buf
.
GetDeviceBuffer
();
args
.
p_b
=
b_k_n_dev_buf
.
GetDeviceBuffer
();
args
.
p_c
=
c_m_n_dev_buf
.
GetDeviceBuffer
();
args
.
kbatch
=
kbatch
;
args
.
M
=
M
;
args
.
N
=
N
;
args
.
K
=
K
;
args
.
stride_A
=
stride_A
;
args
.
stride_B
=
stride_B
;
args
.
stride_C
=
stride_C
;
invoke_gemm
(
args
,
ck_tile
::
stream_config
{
nullptr
,
false
});
c_m_n_dev_buf
.
FromDevice
(
c_m_n_dev_result
.
data
());
bool
pass
=
true
;
ck_tile
::
HostTensor
<
CDataType
>
c_m_n_host_ref
(
f_host_tensor_descriptor
(
M
,
N
,
stride_C
,
CLayout
{}));
c_m_n_host_ref
.
SetZero
();
ck_tile
::
reference_gemm
<
ADataType
,
BDataType
,
AccDataType
,
CDataType
>
(
a_m_k
,
b_k_n
,
c_m_n_host_ref
);
pass
=
ck_tile
::
check_err
(
c_m_n_dev_result
,
c_m_n_host_ref
);
EXPECT_TRUE
(
pass
);
}
};
test/data_type/CMakeLists.txt
View file @
a4522ae3
...
...
@@ -18,4 +18,9 @@ if(result EQUAL 0)
target_link_libraries
(
test_bf8 PRIVATE utility
)
endif
()
add_gtest_executable
(
test_custom_type test_custom_type.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_custom_type PRIVATE utility
)
endif
()
add_gtest_executable
(
test_type_convert_const type_convert_const.cpp
)
test/data_type/test_custom_type.cpp
0 → 100644
View file @
a4522ae3
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_t
;
using
ck
::
bhalf_t
;
using
ck
::
f8_t
;
using
ck
::
half_t
;
using
ck
::
Number
;
using
ck
::
type_convert
;
using
ck
::
vector_type
;
TEST
(
Custom_bool
,
TestSize
)
{
struct
custom_bool_t
{
bool
data
;
};
ASSERT_EQ
(
sizeof
(
custom_bool_t
),
sizeof
(
bool
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
2
>
),
sizeof
(
vector_type
<
bool
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
4
>
),
sizeof
(
vector_type
<
bool
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
8
>
),
sizeof
(
vector_type
<
bool
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
16
>
),
sizeof
(
vector_type
<
bool
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
32
>
),
sizeof
(
vector_type
<
bool
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bool_t
,
64
>
),
sizeof
(
vector_type
<
bool
,
64
>
));
}
TEST
(
Custom_bool
,
TestAsType
)
{
struct
custom_bool_t
{
using
type
=
bool
;
type
data
;
custom_bool_t
()
:
data
{
type
{}}
{}
custom_bool_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
bool
>
test_vec
=
{
false
,
true
,
false
,
true
};
// reference vector
vector_type
<
custom_bool_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
false
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bool_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_bool
,
TestAsTypeReshape
)
{
struct
custom_bool_t
{
using
type
=
bool
;
type
data
;
custom_bool_t
()
:
data
{
type
{}}
{}
custom_bool_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
bool
>
test_vec
=
{
false
,
true
,
false
,
true
};
// reference vector
vector_type
<
custom_bool_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
false
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_bool_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_bool_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_int8
,
TestSize
)
{
struct
custom_int8_t
{
int8_t
data
;
};
ASSERT_EQ
(
sizeof
(
custom_int8_t
),
sizeof
(
int8_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
2
>
),
sizeof
(
vector_type
<
int8_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
4
>
),
sizeof
(
vector_type
<
int8_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
8
>
),
sizeof
(
vector_type
<
int8_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
16
>
),
sizeof
(
vector_type
<
int8_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
32
>
),
sizeof
(
vector_type
<
int8_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_int8_t
,
64
>
),
sizeof
(
vector_type
<
int8_t
,
64
>
));
}
TEST
(
Custom_int8
,
TestAsType
)
{
struct
custom_int8_t
{
using
type
=
int8_t
;
type
data
;
custom_int8_t
()
:
data
{
type
{}}
{}
custom_int8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
int8_t
>
test_vec
=
{
3
,
-
6
,
8
,
-
2
};
// reference vector
vector_type
<
custom_int8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_int8_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_int8
,
TestAsTypeReshape
)
{
struct
custom_int8_t
{
using
type
=
int8_t
;
type
data
;
custom_int8_t
()
:
data
{
type
{}}
{}
custom_int8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
int8_t
>
test_vec
=
{
3
,
-
6
,
8
,
-
2
};
// reference vector
vector_type
<
custom_int8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_int8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_int8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_uint8
,
TestSize
)
{
struct
custom_uint8_t
{
uint8_t
data
;
};
ASSERT_EQ
(
sizeof
(
custom_uint8_t
),
sizeof
(
uint8_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
2
>
),
sizeof
(
vector_type
<
uint8_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
4
>
),
sizeof
(
vector_type
<
uint8_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
8
>
),
sizeof
(
vector_type
<
uint8_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
16
>
),
sizeof
(
vector_type
<
uint8_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
32
>
),
sizeof
(
vector_type
<
uint8_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_uint8_t
,
64
>
),
sizeof
(
vector_type
<
uint8_t
,
64
>
));
}
TEST
(
Custom_uint8
,
TestAsType
)
{
struct
custom_uint8_t
{
using
type
=
uint8_t
;
type
data
;
custom_uint8_t
()
:
data
{
type
{}}
{}
custom_uint8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
uint8_t
>
test_vec
=
{
3
,
6
,
8
,
2
};
// reference vector
vector_type
<
custom_uint8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_uint8_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_uint8
,
TestAsTypeReshape
)
{
struct
custom_uint8_t
{
using
type
=
uint8_t
;
type
data
;
custom_uint8_t
()
:
data
{
type
{}}
{}
custom_uint8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
uint8_t
>
test_vec
=
{
3
,
6
,
8
,
2
};
// reference vector
vector_type
<
custom_uint8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_uint8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_uint8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_f8
,
TestSize
)
{
struct
custom_f8_t
{
_BitInt
(
8
)
data
;
};
ASSERT_EQ
(
sizeof
(
custom_f8_t
),
sizeof
(
_BitInt
(
8
)));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
2
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
4
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
8
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
16
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
32
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_f8_t
,
64
>
),
sizeof
(
vector_type
<
_BitInt
(
8
),
64
>
));
}
TEST
(
Custom_f8
,
TestAsType
)
{
struct
custom_f8_t
{
using
type
=
_BitInt
(
8
);
type
data
;
custom_f8_t
()
:
data
{
type
{}}
{}
custom_f8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
_BitInt
(
8
)
>
test_vec
=
{
type_convert
<
_BitInt
(
8
)
>
(
0.3
f
),
type_convert
<
_BitInt
(
8
)
>
(
-
0.6
f
),
type_convert
<
_BitInt
(
8
)
>
(
0.8
f
),
type_convert
<
_BitInt
(
8
)
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_f8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_f8_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_f8
,
TestAsTypeReshape
)
{
struct
custom_f8_t
{
using
type
=
_BitInt
(
8
);
type
data
;
custom_f8_t
()
:
data
{
type
{}}
{}
custom_f8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
_BitInt
(
8
)
>
test_vec
=
{
type_convert
<
_BitInt
(
8
)
>
(
0.3
f
),
type_convert
<
_BitInt
(
8
)
>
(
-
0.6
f
),
type_convert
<
_BitInt
(
8
)
>
(
0.8
f
),
type_convert
<
_BitInt
(
8
)
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_f8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_f8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_f8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_bf8
,
TestSize
)
{
struct
custom_bf8_t
{
unsigned
_BitInt
(
8
)
data
;
};
ASSERT_EQ
(
sizeof
(
custom_bf8_t
),
sizeof
(
unsigned
_BitInt
(
8
)));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
2
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
4
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
8
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
16
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
32
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bf8_t
,
64
>
),
sizeof
(
vector_type
<
unsigned
_BitInt
(
8
),
64
>
));
}
TEST
(
Custom_bf8
,
TestAsType
)
{
struct
custom_bf8_t
{
using
type
=
unsigned
_BitInt
(
8
);
type
data
;
custom_bf8_t
()
:
data
{
type
{}}
{}
custom_bf8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
unsigned
_BitInt
(
8
)
>
test_vec
=
{
type_convert
<
unsigned
_BitInt
(
8
)
>
(
0.3
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
-
0.6
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
0.8
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_bf8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bf8_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_bf8
,
TestAsTypeReshape
)
{
struct
custom_bf8_t
{
using
type
=
unsigned
_BitInt
(
8
);
type
data
;
custom_bf8_t
()
:
data
{
type
{}}
{}
custom_bf8_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
unsigned
_BitInt
(
8
)
>
test_vec
=
{
type_convert
<
unsigned
_BitInt
(
8
)
>
(
0.3
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
-
0.6
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
0.8
f
),
type_convert
<
unsigned
_BitInt
(
8
)
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_bf8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_bf8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_bf8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_half
,
TestSize
)
{
struct
custom_half_t
{
half_t
data
;
};
ASSERT_EQ
(
sizeof
(
custom_half_t
),
sizeof
(
half_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
2
>
),
sizeof
(
vector_type
<
half_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
4
>
),
sizeof
(
vector_type
<
half_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
8
>
),
sizeof
(
vector_type
<
half_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
16
>
),
sizeof
(
vector_type
<
half_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
32
>
),
sizeof
(
vector_type
<
half_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_half_t
,
64
>
),
sizeof
(
vector_type
<
half_t
,
64
>
));
}
TEST
(
Custom_half
,
TestAsType
)
{
struct
custom_half_t
{
using
type
=
half_t
;
type
data
;
custom_half_t
()
:
data
{
type
{}}
{}
custom_half_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
half_t
>
test_vec
=
{
half_t
{
0.3
f
},
half_t
{
-
0.6
f
},
half_t
{
0.8
f
},
half_t
{
-
0.2
f
}};
// reference vector
vector_type
<
custom_half_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
type_convert
<
half_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_half_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_half
,
TestAsTypeReshape
)
{
struct
custom_half_t
{
using
type
=
half_t
;
type
data
;
custom_half_t
()
:
data
{
type
{}}
{}
custom_half_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
half_t
>
test_vec
=
{
half_t
{
0.3
f
},
half_t
{
-
0.6
f
},
half_t
{
0.8
f
},
half_t
{
-
0.2
f
}};
// reference vector
vector_type
<
custom_half_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
type_convert
<
half_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_half_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_half_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_bhalf
,
TestSize
)
{
struct
custom_bhalf_t
{
bhalf_t
data
;
};
ASSERT_EQ
(
sizeof
(
custom_bhalf_t
),
sizeof
(
bhalf_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
2
>
),
sizeof
(
vector_type
<
bhalf_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
4
>
),
sizeof
(
vector_type
<
bhalf_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
8
>
),
sizeof
(
vector_type
<
bhalf_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
16
>
),
sizeof
(
vector_type
<
bhalf_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
32
>
),
sizeof
(
vector_type
<
bhalf_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_bhalf_t
,
64
>
),
sizeof
(
vector_type
<
bhalf_t
,
64
>
));
}
TEST
(
Custom_bhalf
,
TestAsType
)
{
struct
custom_bhalf_t
{
using
type
=
bhalf_t
;
type
data
;
custom_bhalf_t
()
:
data
{
type
{}}
{}
custom_bhalf_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
bhalf_t
>
test_vec
=
{
type_convert
<
bhalf_t
>
(
0.3
f
),
type_convert
<
bhalf_t
>
(
-
0.6
f
),
type_convert
<
bhalf_t
>
(
0.8
f
),
type_convert
<
bhalf_t
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_bhalf_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
type_convert
<
bhalf_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bhalf_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_bhalf
,
TestAsTypeReshape
)
{
struct
custom_bhalf_t
{
using
type
=
bhalf_t
;
type
data
;
custom_bhalf_t
()
:
data
{
type
{}}
{}
custom_bhalf_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
bhalf_t
>
test_vec
=
{
type_convert
<
bhalf_t
>
(
0.3
f
),
type_convert
<
bhalf_t
>
(
-
0.6
f
),
type_convert
<
bhalf_t
>
(
0.8
f
),
type_convert
<
bhalf_t
>
(
-
0.2
f
)};
// reference vector
vector_type
<
custom_bhalf_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
type_convert
<
bhalf_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_bhalf_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_bhalf_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_float
,
TestSize
)
{
struct
custom_float_t
{
float
data
;
};
ASSERT_EQ
(
sizeof
(
custom_float_t
),
sizeof
(
float
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
2
>
),
sizeof
(
vector_type
<
float
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
4
>
),
sizeof
(
vector_type
<
float
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
8
>
),
sizeof
(
vector_type
<
float
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
16
>
),
sizeof
(
vector_type
<
float
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
32
>
),
sizeof
(
vector_type
<
float
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_float_t
,
64
>
),
sizeof
(
vector_type
<
float
,
64
>
));
}
TEST
(
Custom_float
,
TestAsType
)
{
struct
custom_float_t
{
using
type
=
float
;
type
data
;
custom_float_t
()
:
data
{
type
{}}
{}
custom_float_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
float
>
test_vec
=
{
0.3
f
,
-
0.6
f
,
0.8
f
,
-
0.2
f
};
// reference vector
vector_type
<
custom_float_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
0.0
f
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_float_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_float
,
TestAsTypeReshape
)
{
struct
custom_float_t
{
using
type
=
float
;
type
data
;
custom_float_t
()
:
data
{
type
{}}
{}
custom_float_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
float
>
test_vec
=
{
0.3
f
,
-
0.6
f
,
0.8
f
,
-
0.2
f
};
// reference vector
vector_type
<
custom_float_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
0.0
f
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_float_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_float_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_double
,
TestSize
)
{
struct
custom_double_t
{
double
data
;
};
ASSERT_EQ
(
sizeof
(
custom_double_t
),
sizeof
(
double
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
2
>
),
sizeof
(
vector_type
<
double
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
4
>
),
sizeof
(
vector_type
<
double
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
8
>
),
sizeof
(
vector_type
<
double
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
16
>
),
sizeof
(
vector_type
<
double
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
32
>
),
sizeof
(
vector_type
<
double
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
custom_double_t
,
64
>
),
sizeof
(
vector_type
<
double
,
64
>
));
}
TEST
(
Custom_double
,
TestAsType
)
{
struct
custom_double_t
{
using
type
=
double
;
type
data
;
custom_double_t
()
:
data
{
type
{}}
{}
custom_double_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
double
>
test_vec
=
{
0.3
,
0.6
,
0.8
,
0.2
};
// reference vector
vector_type
<
custom_double_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
0.0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_double_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Custom_double
,
TestAsTypeReshape
)
{
struct
custom_double_t
{
using
type
=
double
;
type
data
;
custom_double_t
()
:
data
{
type
{}}
{}
custom_double_t
(
type
init
)
:
data
{
init
}
{}
};
// test size
const
int
size
=
4
;
std
::
vector
<
double
>
test_vec
=
{
0.3
,
0.6
,
0.8
,
0.2
};
// reference vector
vector_type
<
custom_double_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
0.0
);
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
});
// copy the first half of a vector
vector_type
<
custom_double_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
custom_double_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
});
}
TEST
(
Complex_half
,
TestSize
)
{
struct
complex_half_t
{
half_t
real
;
half_t
img
;
};
ASSERT_EQ
(
sizeof
(
complex_half_t
),
sizeof
(
half_t
)
+
sizeof
(
half_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
2
>
),
sizeof
(
vector_type
<
half_t
,
2
>
)
+
sizeof
(
vector_type
<
half_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
4
>
),
sizeof
(
vector_type
<
half_t
,
4
>
)
+
sizeof
(
vector_type
<
half_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
8
>
),
sizeof
(
vector_type
<
half_t
,
8
>
)
+
sizeof
(
vector_type
<
half_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
16
>
),
sizeof
(
vector_type
<
half_t
,
16
>
)
+
sizeof
(
vector_type
<
half_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
32
>
),
sizeof
(
vector_type
<
half_t
,
32
>
)
+
sizeof
(
vector_type
<
half_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
complex_half_t
,
64
>
),
sizeof
(
vector_type
<
half_t
,
64
>
)
+
sizeof
(
vector_type
<
half_t
,
64
>
));
}
TEST
(
Complex_half
,
TestAlignment
)
{
struct
complex_half_t
{
half_t
real
;
half_t
img
;
};
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
2
>
),
alignof
(
vector_type
<
half_t
,
2
>
)
+
alignof
(
vector_type
<
half_t
,
2
>
));
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
4
>
),
alignof
(
vector_type
<
half_t
,
4
>
)
+
alignof
(
vector_type
<
half_t
,
4
>
));
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
8
>
),
alignof
(
vector_type
<
half_t
,
8
>
)
+
alignof
(
vector_type
<
half_t
,
8
>
));
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
16
>
),
alignof
(
vector_type
<
half_t
,
16
>
)
+
alignof
(
vector_type
<
half_t
,
16
>
));
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
32
>
),
alignof
(
vector_type
<
half_t
,
32
>
)
+
alignof
(
vector_type
<
half_t
,
32
>
));
ASSERT_EQ
(
alignof
(
vector_type
<
complex_half_t
,
64
>
),
alignof
(
vector_type
<
half_t
,
64
>
)
+
alignof
(
vector_type
<
half_t
,
64
>
));
}
TEST
(
Complex_half
,
TestAsType
)
{
struct
complex_half_t
{
using
type
=
half_t
;
type
real
;
type
img
;
complex_half_t
()
:
real
{
type
{}},
img
{
type
{}}
{}
complex_half_t
(
type
real_init
,
type
img_init
)
:
real
{
real_init
},
img
{
img_init
}
{}
};
// test size
const
int
size
=
4
;
// custom type number of elements
const
int
num_elem
=
sizeof
(
complex_half_t
)
/
sizeof
(
complex_half_t
::
type
);
std
::
vector
<
half_t
>
test_vec
=
{
half_t
{
0.3
f
},
half_t
{
-
0.6
f
},
half_t
{
0.8
f
},
half_t
{
-
0.2
f
},
half_t
{
0.5
f
},
half_t
{
-
0.7
f
},
half_t
{
0.9
f
},
half_t
{
-
0.3
f
}};
// reference vector
vector_type
<
complex_half_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
type_convert
<
half_t
>
(
0.0
f
));
ASSERT_EQ
(
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
img
,
type_convert
<
half_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
});
// copy the vector
vector_type
<
complex_half_t
,
size
>
left_vec
{
right_vec
};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
test_vec
.
at
(
num_elem
*
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
img
,
test_vec
.
at
(
num_elem
*
i
+
1
));
});
}
TEST
(
Complex_half
,
TestAsTypeReshape
)
{
struct
complex_half_t
{
using
type
=
half_t
;
type
real
;
type
img
;
complex_half_t
()
:
real
{
type
{}},
img
{
type
{}}
{}
complex_half_t
(
type
real_init
,
type
img_init
)
:
real
{
real_init
},
img
{
img_init
}
{}
};
// test size
const
int
size
=
4
;
// custom type number of elements
const
int
num_elem
=
sizeof
(
complex_half_t
)
/
sizeof
(
complex_half_t
::
type
);
std
::
vector
<
half_t
>
test_vec
=
{
half_t
{
0.3
f
},
half_t
{
-
0.6
f
},
half_t
{
0.8
f
},
half_t
{
-
0.2
f
},
half_t
{
0.5
f
},
half_t
{
-
0.7
f
},
half_t
{
0.9
f
},
half_t
{
-
0.3
f
}};
// reference vector
vector_type
<
complex_half_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
type_convert
<
half_t
>
(
0.0
f
));
ASSERT_EQ
(
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
img
,
type_convert
<
half_t
>
(
0.0
f
));
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
});
// copy the first half of a vector
vector_type
<
complex_half_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
complex_half_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
test_vec
.
at
(
num_elem
*
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
img
,
test_vec
.
at
(
num_elem
*
i
+
1
));
});
}
Prev
1
…
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment