Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
c8c016dd
Commit
c8c016dd
authored
Dec 13, 2024
by
aska-0096
Browse files
Merge branch 'develop' of
https://github.com/ROCm/composable_kernel
into update_cka8w8
parents
e8ca3daf
4e731776
Changes
399
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
1511 additions
and
383 deletions
+1511
-383
test/ck_tile/grouped_gemm/CMakeLists.txt
test/ck_tile/grouped_gemm/CMakeLists.txt
+4
-0
test/ck_tile/grouped_gemm/test_grouped_gemm.cpp
test/ck_tile/grouped_gemm/test_grouped_gemm.cpp
+29
-0
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
+25
-0
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
+282
-0
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+31
-6
test/data_type/test_bf8_fnuz.cpp
test/data_type/test_bf8_fnuz.cpp
+73
-62
test/data_type/test_bf8_ocp.cpp
test/data_type/test_bf8_ocp.cpp
+268
-0
test/data_type/test_custom_type.cpp
test/data_type/test_custom_type.cpp
+214
-20
test/data_type/test_fp8_fnuz.cpp
test/data_type/test_fp8_fnuz.cpp
+83
-66
test/data_type/test_fp8_ocp.cpp
test/data_type/test_fp8_ocp.cpp
+250
-0
test/grouped_convnd_bwd_data/CMakeLists.txt
test/grouped_convnd_bwd_data/CMakeLists.txt
+6
-2
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
...ped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
+108
-0
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
...uped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
+25
-14
test/grouped_gemm/CMakeLists.txt
test/grouped_gemm/CMakeLists.txt
+0
-6
test/grouped_gemm/test_grouped_gemm_splitk_xdl.cpp
test/grouped_gemm/test_grouped_gemm_splitk_xdl.cpp
+28
-18
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
+13
-118
test/grouped_gemm/test_grouped_gemm_util.hpp
test/grouped_gemm/test_grouped_gemm_util.hpp
+70
-69
test/pool/test_avg_pool2d_fwd.cpp
test/pool/test_avg_pool2d_fwd.cpp
+1
-1
test/pool/test_max_pool2d_fwd.cpp
test/pool/test_max_pool2d_fwd.cpp
+1
-1
No files found.
test/ck_tile/grouped_gemm/CMakeLists.txt
0 → 100644
View file @
c8c016dd
# Currently ck_tile is only built on gfx9
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_gtest_executable
(
test_ck_tile_grouped_gemm test_grouped_gemm.cpp
)
endif
()
test/ck_tile/grouped_gemm/test_grouped_gemm.cpp
0 → 100644
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck_tile/host.hpp"
#include "test_grouped_gemm_util.hpp"
using
F16
=
ck_tile
::
half_t
;
using
F32
=
float
;
using
Row
=
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
;
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
//std::tuple< Col, Row, Row, F16, F16, F32, F16>,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
//,
//std::tuple< Col, Col, Row, F16, F16, F32, F16>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestCkTileGroupedGemm
,
KernelTypes
);
#include "test_grouped_gemm_ut_cases.inc"
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
0 → 100644
View file @
c8c016dd
#pragma once
TYPED_TEST
(
TestCkTileGroupedGemm
,
Basic
)
{
const
int
group_count
=
16
;
std
::
vector
<
int
>
Ms
;
std
::
vector
<
int
>
Ns
;
std
::
vector
<
int
>
Ks
;
std
::
vector
<
int
>
stride_As
;
std
::
vector
<
int
>
stride_Bs
;
std
::
vector
<
int
>
stride_Cs
;
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
{
Ms
.
push_back
(
256
+
256
*
i
);
Ns
.
push_back
(
128
+
128
*
i
);
Ks
.
push_back
(
128
+
64
*
i
);
stride_As
.
push_back
(
Ks
[
i
]);
stride_Bs
.
push_back
(
Ks
[
i
]);
stride_Cs
.
push_back
(
Ns
[
i
]);
}
this
->
Run
(
Ms
,
Ns
,
Ks
,
stride_As
,
stride_Bs
,
stride_Cs
,
group_count
);
}
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
0 → 100644
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <sstream>
#include <gtest/gtest.h>
#include "ck_tile/core.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/epilogue.hpp"
#include "ck_tile/ops/gemm.hpp"
#include "ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp"
template
<
typename
Tuple
>
class
TestCkTileGroupedGemm
:
public
::
testing
::
Test
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
CLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
struct
GroupedGemKernelParam
{
static
const
bool
kPadM
=
false
;
static
const
bool
kPadN
=
false
;
static
const
bool
kPadK
=
false
;
static
const
bool
kTilePermute
=
false
;
static
const
ck_tile
::
index_t
kOutputRank
=
2
;
static
const
int
kBlockPerCu
=
1
;
static
const
ck_tile
::
index_t
M_Tile
=
128
;
static
const
ck_tile
::
index_t
N_Tile
=
128
;
static
const
ck_tile
::
index_t
K_Tile
=
32
;
static
const
ck_tile
::
index_t
M_Warp
=
2
;
static
const
ck_tile
::
index_t
N_Warp
=
2
;
static
const
ck_tile
::
index_t
K_Warp
=
1
;
static
const
ck_tile
::
index_t
M_Warp_Tile
=
32
;
static
const
ck_tile
::
index_t
N_Warp_Tile
=
32
;
static
const
ck_tile
::
index_t
K_Warp_Tile
=
8
;
};
using
CodegenGemmShape
=
ck_tile
::
TileGemmShape
<
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Tile
,
GroupedGemKernelParam
::
N_Tile
,
GroupedGemKernelParam
::
K_Tile
>
,
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Warp
,
GroupedGemKernelParam
::
N_Warp
,
GroupedGemKernelParam
::
K_Warp
>
,
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Warp_Tile
,
GroupedGemKernelParam
::
N_Warp_Tile
,
GroupedGemKernelParam
::
K_Warp_Tile
>>
;
using
TilePartitioner
=
ck_tile
::
GemmTile1DPartitioner
<
CodegenGemmShape
>
;
template
<
typename
CLayout
>
using
GemmEpilogue
=
std
::
conditional_t
<
std
::
is_same_v
<
CLayout
,
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
>
,
ck_tile
::
CShuffleEpilogue
<
ck_tile
::
CShuffleEpilogueProblem
<
AccDataType
,
CDataType
,
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
,
GroupedGemKernelParam
::
kTilePermute
,
GroupedGemKernelParam
::
kOutputRank
,
1
,
0
,
TilePartitioner
::
MPerBlock
,
TilePartitioner
::
NPerBlock
>>
,
ck_tile
::
Default2DEpilogue
<
ck_tile
::
Default2DEpilogueProblem
<
AccDataType
,
CDataType
,
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
>>>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenGemmTraits
=
ck_tile
::
TileGemmTraits
<
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
,
GroupedGemKernelParam
::
kPadK
,
ALayout
,
BLayout
,
CLayout
>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenPipelineProblem
=
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
CodegenGemmShape
,
CodegenGemmTraits
<
ALayout
,
BLayout
,
CLayout
>>
;
using
CodegenGemmPolicy
=
ck_tile
::
UniversalGemmPipelineAgBgCrPolicy
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenGemmPipeline
=
ck_tile
::
GemmPipelineAGmemBGmemCRegV1
<
CodegenPipelineProblem
<
ALayout
,
BLayout
,
CLayout
>
,
CodegenGemmPolicy
>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
Kernel
=
ck_tile
::
GroupedGemmKernel
<
TilePartitioner
,
CodegenGemmPipeline
<
ALayout
,
BLayout
,
CLayout
>
,
GemmEpilogue
<
CLayout
>>
;
using
grouped_gemm_kargs
=
ck_tile
::
GroupedGemmHostArgs
;
std
::
size_t
GetWorkspaceSize
(
const
std
::
vector
<
grouped_gemm_kargs
>&
gemm_descs
)
{
return
Kernel
<
std
::
nullptr_t
,
std
::
nullptr_t
,
std
::
nullptr_t
>::
GetWorkSpaceSize
(
gemm_descs
);
}
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
void
invoke_grouped_gemm
(
const
std
::
vector
<
grouped_gemm_kargs
>&
gemm_descs
,
const
ck_tile
::
stream_config
&
s
,
void
*
p_workspace_
)
{
using
GroupedGemmKernel
=
Kernel
<
ALayout
,
BLayout
,
CLayout
>
;
auto
arguments
=
GroupedGemmKernel
::
MakeKargs
(
gemm_descs
);
const
dim3
grids
=
GroupedGemmKernel
::
GridSize
(
gemm_descs
);
constexpr
dim3
blocks
=
GroupedGemmKernel
::
BlockSize
();
ck_tile
::
hip_check_error
(
hipMemcpyWithStream
(
p_workspace_
,
arguments
.
data
(),
arguments
.
size
()
*
sizeof
(
typename
GroupedGemmKernel
::
GemmTransKernelArg
),
hipMemcpyHostToDevice
,
s
.
stream_id_
));
if
(
s
.
log_level_
>
0
)
{
std
::
cout
<<
"Launching kernel with args:"
<<
" grid: {"
<<
grids
.
x
<<
", "
<<
grids
.
y
<<
", "
<<
grids
.
z
<<
"}"
<<
", blocks: {"
<<
blocks
.
x
<<
", "
<<
blocks
.
y
<<
", "
<<
blocks
.
z
<<
"}"
<<
std
::
endl
;
}
ck_tile
::
launch_kernel
(
s
,
ck_tile
::
make_kernel
<
blocks
.
x
,
GroupedGemKernelParam
::
kBlockPerCu
>
(
GroupedGemmKernel
{},
grids
,
blocks
,
0
,
ck_tile
::
cast_pointer_to_constant_address_space
(
p_workspace_
),
gemm_descs
.
size
()));
}
public:
void
Run
(
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ks
,
std
::
vector
<
int
>&
stride_As
,
std
::
vector
<
int
>&
stride_Bs
,
std
::
vector
<
int
>&
stride_Cs
,
const
int
group_count
=
16
)
{
using
namespace
ck_tile
::
literals
;
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
stride
,
1
_uz
});
}
else
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
1
_uz
,
stride
});
}
};
auto
f_get_default_stride
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
stride
==
0
)
{
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
col
;
}
else
{
return
row
;
}
}
else
return
stride
;
};
std
::
vector
<
ck_tile
::
HostTensor
<
ADataType
>>
a_m_k_tensors
;
std
::
vector
<
ck_tile
::
HostTensor
<
BDataType
>>
b_k_n_tensors
;
std
::
vector
<
ck_tile
::
HostTensor
<
CDataType
>>
c_m_n_tensors
;
a_m_k_tensors
.
reserve
(
group_count
);
b_k_n_tensors
.
reserve
(
group_count
);
c_m_n_tensors
.
reserve
(
group_count
);
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
a_m_k_dev_buf
;
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
b_k_n_dev_buf
;
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
c_m_n_dev_buf
;
a_m_k_dev_buf
.
reserve
(
group_count
);
b_k_n_dev_buf
.
reserve
(
group_count
);
c_m_n_dev_buf
.
reserve
(
group_count
);
std
::
vector
<
grouped_gemm_kargs
>
gemm_descs
;
gemm_descs
.
reserve
(
group_count
);
for
(
int
i
=
0
;
i
<
group_count
;
++
i
)
{
const
ck_tile
::
index_t
M
=
Ms
[
i
];
const
ck_tile
::
index_t
N
=
Ns
[
i
];
const
ck_tile
::
index_t
K
=
Ks
[
i
];
stride_As
[
i
]
=
f_get_default_stride
(
M
,
N
,
stride_As
[
i
],
ALayout
{});
stride_Bs
[
i
]
=
f_get_default_stride
(
K
,
N
,
stride_Bs
[
i
],
BLayout
{});
stride_Cs
[
i
]
=
f_get_default_stride
(
M
,
N
,
stride_Cs
[
i
],
CLayout
{});
a_m_k_tensors
.
push_back
(
ck_tile
::
HostTensor
<
ADataType
>
(
f_host_tensor_descriptor
(
M
,
K
,
stride_As
[
i
],
ALayout
{})));
b_k_n_tensors
.
push_back
(
ck_tile
::
HostTensor
<
BDataType
>
(
f_host_tensor_descriptor
(
K
,
N
,
stride_Bs
[
i
],
BLayout
{})));
c_m_n_tensors
.
push_back
(
ck_tile
::
HostTensor
<
CDataType
>
(
f_host_tensor_descriptor
(
M
,
N
,
stride_Cs
[
i
],
CLayout
{})));
std
::
cout
<<
"gemm["
<<
i
<<
"]"
<<
" a_m_k: "
<<
a_m_k_tensors
[
i
].
mDesc
<<
" b_k_n: "
<<
b_k_n_tensors
[
i
].
mDesc
<<
" c_m_n: "
<<
c_m_n_tensors
[
i
].
mDesc
<<
std
::
endl
;
ck_tile
::
FillUniformDistribution
<
ADataType
>
{
-
5.
f
,
5.
f
}(
a_m_k_tensors
[
i
]);
ck_tile
::
FillUniformDistribution
<
BDataType
>
{
-
5.
f
,
5.
f
}(
b_k_n_tensors
[
i
]);
a_m_k_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
a_m_k_tensors
[
i
].
get_element_space_size_in_bytes
()));
b_k_n_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
b_k_n_tensors
[
i
].
get_element_space_size_in_bytes
()));
c_m_n_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
c_m_n_tensors
[
i
].
get_element_space_size_in_bytes
()));
a_m_k_dev_buf
[
i
]
->
ToDevice
(
a_m_k_tensors
[
i
].
data
());
b_k_n_dev_buf
[
i
]
->
ToDevice
(
b_k_n_tensors
[
i
].
data
());
c_m_n_dev_buf
[
i
]
->
SetZero
();
c_m_n_tensors
[
i
].
SetZero
();
const
void
*
p_a
=
a_m_k_dev_buf
[
i
]
->
GetDeviceBuffer
();
const
void
*
p_b
=
b_k_n_dev_buf
[
i
]
->
GetDeviceBuffer
();
void
*
p_c
=
c_m_n_dev_buf
[
i
]
->
GetDeviceBuffer
();
gemm_descs
.
push_back
(
{
p_a
,
p_b
,
p_c
,
M
,
N
,
K
,
stride_As
[
i
],
stride_Bs
[
i
],
stride_Cs
[
i
]});
}
ck_tile
::
DeviceMem
gemm_workspace
;
gemm_workspace
.
Realloc
(
GetWorkspaceSize
(
gemm_descs
));
invoke_grouped_gemm
<
ALayout
,
BLayout
,
CLayout
>
(
gemm_descs
,
ck_tile
::
stream_config
{
nullptr
,
false
},
gemm_workspace
.
GetDeviceBuffer
());
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
{
c_m_n_dev_buf
[
i
]
->
FromDevice
(
c_m_n_tensors
[
i
].
data
());
}
bool
pass
{
true
};
for
(
int
i
=
0
;
i
<
group_count
;
++
i
)
{
ck_tile
::
HostTensor
<
CDataType
>
c_m_n_host_ref
(
f_host_tensor_descriptor
(
Ms
[
i
],
Ns
[
i
],
stride_Cs
[
i
],
CLayout
{}));
c_m_n_host_ref
.
SetZero
();
ck_tile
::
reference_gemm
<
ADataType
,
BDataType
,
AccDataType
,
CDataType
>
(
a_m_k_tensors
[
i
],
b_k_n_tensors
[
i
],
c_m_n_host_ref
);
pass
&=
ck_tile
::
check_err
(
c_m_n_tensors
[
i
],
c_m_n_host_ref
);
}
EXPECT_TRUE
(
pass
);
}
};
test/data_type/CMakeLists.txt
View file @
c8c016dd
...
...
@@ -9,13 +9,38 @@ if (USE_BITINT_EXTENSION_INT4)
endif
()
endif
()
add_gtest_executable
(
test_fp8 test_fp8.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_fp8 PRIVATE utility
)
add_custom_target
(
test_fp8
)
if
(
CK_USE_OCP_FP8
)
add_gtest_executable
(
test_fp8_ocp test_fp8_ocp.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_fp8_ocp PRIVATE utility
)
endif
()
add_gtest_executable
(
test_bf8_ocp test_bf8_ocp.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_bf8_ocp PRIVATE utility
)
endif
()
add_dependencies
(
test_fp8 test_fp8_ocp
)
add_dependencies
(
test_fp8 test_bf8_ocp
)
endif
()
add_gtest_executable
(
test_bf8 test_bf8.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_bf8 PRIVATE utility
)
if
(
CK_USE_FNUZ_FP8
)
add_gtest_executable
(
test_fp8_fnuz test_fp8_fnuz.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_fp8_fnuz PRIVATE utility
)
endif
()
add_gtest_executable
(
test_bf8_fnuz test_bf8_fnuz.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_bf8_fnuz PRIVATE utility
)
endif
()
add_dependencies
(
test_fp8 test_fp8_fnuz
)
add_dependencies
(
test_fp8 test_bf8_fnuz
)
endif
()
add_gtest_executable
(
test_custom_type test_custom_type.cpp
)
...
...
test/data_type/test_bf8.cpp
→
test/data_type/test_bf8
_fnuz
.cpp
View file @
c8c016dd
...
...
@@ -5,158 +5,169 @@
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_t
;
using
ck
::
bf8_
fnuz_
t
;
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
BF8
,
NumericLimits
)
TEST
(
BF8
FNUZ
,
NumericLimits
)
{
// constants given for negative zero nan mode
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_t
>::
Min
(),
type_convert
<
bf8_t
>
(
0x04
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_t
>::
Max
(),
type_convert
<
bf8_t
>
(
0x7F
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_t
>::
Lowest
(),
type_convert
<
bf8_t
>
(
0xFF
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_t
>::
QuietNaN
(),
type_convert
<
bf8_t
>
(
0x80
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_
fnuz_
t
>::
Min
(),
type_convert
<
bf8_
fnuz_
t
>
(
0x04
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_
fnuz_
t
>::
Max
(),
type_convert
<
bf8_
fnuz_
t
>
(
0x7F
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_
fnuz_
t
>::
Lowest
(),
type_convert
<
bf8_
fnuz_
t
>
(
0xFF
));
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_
fnuz_
t
>::
QuietNaN
(),
type_convert
<
bf8_
fnuz_
t
>
(
0x80
));
}
TEST
(
BF8
,
ConvertFP32Nearest
)
TEST
(
BF8
FNUZ
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bf8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal bf8_t to float and check if equal to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
const
auto
max_bf8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
Max
());
// convert maximal bf8_fnuz_t to float and check if equal to 57344.0
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_fnuz_t
>
(
max_bf8_t_float
)),
abs_tol
);
// convert maximal float to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to bf8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
f8_convert_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
// convert inf float to bf8_
fnuz_
t and check if it is qNan
ASSERT_NEAR
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
QuietNaN
(
),
f8_convert_rne
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to bf8 and back, check if holds
float
neg_float
=
-
0.0000610351
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to bf8 and back, check if holds
pos_float
=
0.0000305175
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to bf8 and back, check if holds
neg_float
=
-
0.0000152587
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP32Stochastic
)
TEST
(
BF8
FNUZ
,
ConvertFP32Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bf8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
0.0
f
)),
abs_tol
);
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
// convert maximal bf8_t to float and check if equal to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
const
auto
max_bf8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
Max
());
// convert maximal bf8_fnuz_t to float and check if equal to 57344.0
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_fnuz_t
>
(
max_bf8_t_float
)),
abs_tol
);
// convert maximal float to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to bf8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
f8_convert_sr
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
// convert inf float to bf8_
fnuz_
t and check if it is qNan
ASSERT_NEAR
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
QuietNaN
(
),
f8_convert_sr
<
bf8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to bf8 and back, check if holds
float
neg_float
=
-
0.0000610351
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to bf8 and back, check if holds
pos_float
=
0.0000305175
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to bf8 and back, check if holds
neg_float
=
-
0.0000152587
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP16Nearest
)
TEST
(
BF8
FNUZ
,
ConvertFP16Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_fnuz_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal bf8_t to fp16 and check if equal to 57344.0
const
auto
max_bf8_t_half
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
Max
());
// convert maximal bf8_fnuz_t to fp16 and check if equal to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
}
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
t
>
(
half_t
{
57344.0
}
)),
abs_tol
);
max_bf8_t_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_t
>
(
max_bf8_t_half
)),
abs_tol
);
// convert maximal fp16 to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
}
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
ASSERT_NEAR
(
max_bf8_t_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
f8_convert_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
// convert QuietNaN fp16 to bf8_
fnuz_
t and check if it is QuietNaN
ASSERT_NEAR
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
QuietNaN
(
),
f8_convert_rne
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to bf8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0000762939
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to bf8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0000610351
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to bf8 and back, check if holds
pos_half
=
half_t
{
0.0000305175
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to bf8 and back, check if holds
neg_half
=
half_t
{
-
0.0000152587
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP16Stochastic
)
TEST
(
BF8
FNUZ
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal bf8_t to fp16 and check if equal to 57344.0
const
auto
max_bf8_t_half
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
Max
());
// convert maximal bf8_fnuz_t to fp16 and check if equal to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
}
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
t
>
(
half_t
{
57344.0
}
)),
abs_tol
);
max_bf8_t_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_t
>
(
max_bf8_t_half
)),
abs_tol
);
// convert maximal fp16 to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
}
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
ASSERT_NEAR
(
max_bf8_t_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
f8_convert_sr
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
// convert QuietNaN fp16 to bf8_
fnuz_
t and check if it is QuietNaN
ASSERT_NEAR
(
ck
::
NumericLimits
<
bf8_fnuz_t
>::
QuietNaN
(
),
f8_convert_sr
<
bf8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to bf8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0000762939
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to bf8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0000610351
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to bf8 and back, check if holds
pos_half
=
half_t
{
0.0000305175
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to bf8 and back, check if holds
neg_half
=
half_t
{
-
0.0000152587
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
}
test/data_type/test_bf8_ocp.cpp
0 → 100644
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_ocp_t
;
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
BF8OCP
,
NumericLimits
)
{
// constants given for OCP FP8
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Min
(),
type_convert
<
bf8_ocp_t
>
(
0x04
));
// 0b00000100 = 2^-14
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
type_convert
<
bf8_ocp_t
>
(
0x7B
));
// 0b01111011 = 57344
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Lowest
(),
type_convert
<
bf8_ocp_t
>
(
0xFB
));
// 0b11111011 = -57344
EXPECT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
QuietNaN
().
data
,
type_convert
<
bf8_ocp_t
>
(
0x7D
).
data
);
// 0b01111101
EXPECT_FALSE
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
QuietNaN
()
==
ck
::
NumericLimits
<
bf8_ocp_t
>::
QuietNaN
());
EXPECT_TRUE
(
ck
::
fp8_is_inf
(
type_convert
<
bf8_ocp_t
>
(
0xFC
))
&&
ck
::
fp8_is_inf
(
type_convert
<
bf8_ocp_t
>
(
0x7C
)));
}
TEST
(
BF8OCP
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bfp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
0.0
f
)),
0.0
f
);
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
const
auto
max_bf8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to float and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
max_bf8_t_float
)),
0.0
f
);
// convert maximal float to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
0.0
f
);
// convert float infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_rne
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()));
// positive normal float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
// 10*2^-17
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_float
)),
abs_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
auto
neg_min_bf8
=
-
0.00006103515625
f
;
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
neg_min_bf8
)),
0.0
f
);
// positive subnorm float value to bf8 and back, check if holds
constexpr
auto
pos_subnorm_bf8
=
0.000030517578125
f
;
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
0.0
f
);
// min subnorm bf8 value to bf8 and back, check if holds
constexpr
auto
min_subnorm_bf8
=
-
0.0000152587890625
f
;
//-2^-16
ASSERT_NEAR
(
min_subnorm_bf8
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
min_subnorm_bf8
)),
0.0
f
);
// smaller than min subnorm bf8 value to bf8 must be zero
constexpr
auto
less_than_min_subnorm
=
0.00000762939453125
f
;
// 2^-17
ASSERT_EQ
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_rne
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
TEST
(
BF8OCP
,
ConvertFP32Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bfp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
0.0
f
)),
0.0
f
);
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
const
auto
max_bf8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to float and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
max_bf8_t_float
)),
0.0
f
);
// convert maximal float to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
0.0
f
);
// convert float infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_sr
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()));
// positive normal float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
// 10*2^-17
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_float
)),
abs_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
auto
neg_min_bf8
=
-
0.00006103515625
f
;
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
neg_min_bf8
)),
0.0
f
);
// positive subnorm float value to bf8 and back, check if holds
constexpr
auto
pos_subnorm_bf8
=
0.000030517578125
f
;
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
0.0
f
);
// min subnorm bf8 value to bf8 and back, check if holds
constexpr
auto
min_subnorm_bf8
=
-
0.0000152587890625
f
;
//-2^-16
ASSERT_NEAR
(
min_subnorm_bf8
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
min_subnorm_bf8
)),
0.0
f
);
// smaller than min subnorm bf8 value to bf8 alternates between 0 and 2^-16
constexpr
auto
less_than_min_subnorm
=
0.00000762939453125
f
;
// 2^-17
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
less_than_min_subnorm
)),
0.0000152587890625
f
);
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_sr
<
bf8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
TEST
(
BF8OCP
,
ConvertFP16Nearest
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
// convert 0 half_t to bfp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
half_t_tol
);
const
auto
max_bf8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to half_t and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
max_bf8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_rne
<
bf8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive normal bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_norm_bf8
{
0.0000762939
f
};
// 10*2^-17
ASSERT_NEAR
(
pos_norm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_norm_bf8
)),
half_t_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
half_t
neg_min_bf8
{
-
0.00006103515625
f
};
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
neg_min_bf8
)),
half_t_zero
);
// positive subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_subnorm_bf8
{
0.000030517578125
f
};
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
half_t_zero
);
// min subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
min_subnorm_bf8
{
-
0.0000152587890625
f
};
//-2^-16
ASSERT_NEAR
(
min_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
min_subnorm_bf8
)),
half_t_zero
);
// smaller than min subnorm bf8 value to bf8 must be zero
constexpr
half_t
less_than_min_subnorm
{
0.00000762939453125
f
};
// 2^-17
ASSERT_EQ
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
TEST
(
BF8OCP
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
constexpr
auto
min_subnorm_bf8
=
0.0000152587890625
f
;
// 2^-16
// convert 0 half_t to bfp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t (6.103515625e-05) to fp8 and back
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
half_t_zero
);
const
auto
max_bf8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to half_t and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
max_bf8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_sr
<
bf8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive normal bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_norm_bf8
{
0.0000762939
f
};
// 10*2^-17
ASSERT_NEAR
(
pos_norm_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_norm_bf8
)),
half_t_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
half_t
neg_min_bf8
{
-
0.00006103515625
f
};
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
neg_min_bf8
)),
half_t_zero
);
// positive subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_subnorm_bf8
{
0.000030517578125
f
};
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
half_t_zero
);
// min subnorm bf8 value to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
-
min_subnorm_bf8
},
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
half_t
{
-
min_subnorm_bf8
})),
half_t_zero
);
// smaller than min subnorm bf8 value to bf8 alternates between 0 and 2^-16
constexpr
half_t
less_than_min_subnorm
{
0.00000762939453125
f
};
// 2^-17
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
less_than_min_subnorm
)),
half_t
{
min_subnorm_bf8
});
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
test/data_type/test_custom_type.cpp
View file @
c8c016dd
...
...
@@ -51,8 +51,11 @@ TEST(Custom_bool, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bool_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bool_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bool_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -129,8 +132,11 @@ TEST(Custom_int8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_int8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_int8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_int8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -207,8 +213,11 @@ TEST(Custom_uint8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_uint8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_uint8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_uint8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -287,8 +296,11 @@ TEST(Custom_f8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_f8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_f8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_f8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -369,8 +381,11 @@ TEST(Custom_bf8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bf8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bf8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bf8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -450,8 +465,11 @@ TEST(Custom_half, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_half_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_half_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_half_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -533,8 +551,11 @@ TEST(Custom_bhalf, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bhalf_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bhalf_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bhalf_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -615,8 +636,11 @@ TEST(Custom_float, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_float_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_float_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_float_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -693,8 +717,11 @@ TEST(Custom_double, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_double_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_double_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_double_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -813,8 +840,11 @@ TEST(Complex_half, TestAsType)
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
});
// copy the vector
vector_type
<
complex_half_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
complex_half_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
complex_half_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
...
...
@@ -872,3 +902,167 @@ TEST(Complex_half, TestAsTypeReshape)
test_vec
.
at
(
num_elem
*
i
+
1
));
});
}
#if CK_USE_OCP_FP8
TEST
(
FP8OCP
,
TestSize
)
{
static_assert
(
std
::
is_same_v
<
f8_t
,
ck
::
f8_ocp_t
>
,
"OCP FP8 is not enabled"
);
ASSERT_EQ
(
sizeof
(
f8_t
),
sizeof
(
ck
::
fp8_storage_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
2
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
4
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
8
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
16
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
32
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
f8_t
,
64
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
64
>
));
}
TEST
(
FP8OCP
,
TestAsType
)
{
static_assert
(
std
::
is_same_v
<
f8_t
,
ck
::
f8_ocp_t
>
,
"OCP FP8 is not enabled"
);
// test size
std
::
array
<
float
,
8
>
test_vec
=
{
-
4
,
-
2
,
-
0.5
,
-
0.25
,
1.0
/
8.0
,
1
,
1.5
,
16
};
constexpr
int
size
=
test_vec
.
size
();
// reference vector
vector_type
<
f8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{}),
f8_t
{
0
});
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
));
});
vector_type
<
f8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
f8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{}),
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
)));
});
ck
::
non_native_vector_base
<
ck
::
f8_ocp_t
,
2
>
nnvb_f8x2
(
ck
::
type_convert
<
f8_t
>
(
-
10.0
f
));
ASSERT_EQ
(
nnvb_f8x2
.
template
AsType
<
f8_t
>()(
Number
<
0
>
{}),
ck
::
type_convert
<
f8_t
>
(
-
10.0
f
));
ASSERT_EQ
(
nnvb_f8x2
.
template
AsType
<
f8_t
>()(
Number
<
1
>
{}),
ck
::
type_convert
<
f8_t
>
(
-
10.0
f
));
}
TEST
(
FP8OCP
,
TestAsTypeReshape
)
{
static_assert
(
std
::
is_same_v
<
f8_t
,
ck
::
f8_ocp_t
>
,
"OCP FP8 is not enabled"
);
// test size
std
::
array
<
float
,
8
>
test_vec
=
{
-
8
,
-
0.5
,
-
0.25
,
1.0
/
8.0
,
1
/
256
,
1
,
1.5
,
16
};
constexpr
int
size
=
test_vec
.
size
();
// reference vector
vector_type
<
f8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{}),
f8_t
{
0
});
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
));
});
// copy the first half of a vector
vector_type
<
f8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
f8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{}),
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
)));
});
}
TEST
(
BF8OCP
,
TestSize
)
{
static_assert
(
std
::
is_same_v
<
bf8_t
,
ck
::
bf8_ocp_t
>
,
"OCP BF8 is not enabled"
);
ASSERT_EQ
(
sizeof
(
bf8_t
),
sizeof
(
ck
::
fp8_storage_t
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
2
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
2
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
4
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
4
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
8
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
8
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
16
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
16
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
32
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
32
>
));
ASSERT_EQ
(
sizeof
(
vector_type
<
bf8_t
,
64
>
),
sizeof
(
vector_type
<
ck
::
fp8_storage_t
,
64
>
));
}
TEST
(
BF8OCP
,
TestAsType
)
{
static_assert
(
std
::
is_same_v
<
bf8_t
,
ck
::
bf8_ocp_t
>
,
"OCP BF8 is not enabled"
);
// test size
std
::
array
<
float
,
8
>
test_vec
=
{
-
4
,
-
2
,
-
0.5
,
-
0.25
,
1.0
/
8.0
,
1
,
1.5
,
16
};
constexpr
int
size
=
test_vec
.
size
();
// reference vector
vector_type
<
bf8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{}),
bf8_t
{
0
});
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
));
});
vector_type
<
bf8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
bf8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{}),
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
)));
});
ck
::
non_native_vector_base
<
bf8_t
,
2
>
nnvb_bf8x2
(
ck
::
type_convert
<
bf8_t
>
(
-
10.0
f
));
ASSERT_EQ
(
nnvb_bf8x2
.
template
AsType
<
bf8_t
>()(
Number
<
0
>
{}),
ck
::
type_convert
<
bf8_t
>
(
-
10.0
f
));
ASSERT_EQ
(
nnvb_bf8x2
.
template
AsType
<
bf8_t
>()(
Number
<
1
>
{}),
ck
::
type_convert
<
bf8_t
>
(
-
10.0
f
));
}
TEST
(
BF8OCP
,
TestAsTypeReshape
)
{
static_assert
(
std
::
is_same_v
<
bf8_t
,
ck
::
bf8_ocp_t
>
,
"OCP BF8 is not enabled"
);
// test size
std
::
array
<
float
,
8
>
test_vec
=
{
-
8
,
-
0.5
,
-
0.25
,
1.0
/
8.0
,
1
/
256
,
1
,
1.5
,
16
};
constexpr
int
size
=
test_vec
.
size
();
// reference vector
vector_type
<
bf8_t
,
size
>
right_vec
;
// check default CTOR
ck
::
static_for
<
0
,
size
,
1
>
{}(
[
&
](
auto
i
)
{
ASSERT_EQ
(
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{}),
bf8_t
{
0
});
});
// assign test values to the vector
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
));
});
// copy the first half of a vector
vector_type
<
bf8_t
,
size
/
2
>
left_vec
{
right_vec
.
template
AsType
<
vector_type
<
bf8_t
,
size
/
2
>
::
type
>
()(
Number
<
0
>
{})};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
/
2
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{}),
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
)));
});
}
#endif
test/data_type/test_fp8.cpp
→
test/data_type/test_fp8
_fnuz
.cpp
View file @
c8c016dd
...
...
@@ -7,154 +7,171 @@
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
f8_t
;
using
ck
::
f8_
fnuz_
t
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
FP8
,
NumericLimits
)
TEST
(
FP8
FNUZ
,
NumericLimits
)
{
// constants given for negative zero nan mode
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Min
(),
type_convert
<
f8_t
>
(
0x08
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Max
(),
type_convert
<
f8_t
>
(
0x7F
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
Lowest
(),
type_convert
<
f8_t
>
(
0xFF
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_t
>::
QuietNaN
(),
type_convert
<
f8_t
>
(
0x80
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_
fnuz_
t
>::
Min
(),
type_convert
<
f8_
fnuz_
t
>
(
0x08
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_
fnuz_
t
>::
Max
(),
type_convert
<
f8_
fnuz_
t
>
(
0x7F
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_
fnuz_
t
>::
Lowest
(),
type_convert
<
f8_
fnuz_
t
>
(
0xFF
));
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_
fnuz_
t
>::
QuietNaN
(),
type_convert
<
f8_
fnuz_
t
>
(
0x80
));
}
TEST
(
FP8
,
ConvertFP32Nearest
)
TEST
(
FP8
FNUZ
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
const
auto
max_f8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
Max
());
// convert maximal f8_fnuz_t to float and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
max_f8_t_float
)),
abs_tol
);
// XXX: FNUZ f8_convert_rne behavior is inconsistent.
// Clipping large values to fp8 max (saturation to finite) contradicts converting inf float to
// fp8 qNAN (no saturation).
// convert maximal float to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
f8_convert_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
// convert inf float to f8_
fnuz_
t and check if it is qNan
ASSERT_NEAR
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
QuietNaN
(
),
f8_convert_rne
<
f8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP32Stochastic
)
TEST
(
FP8
FNUZ
,
ConvertFP32Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
0.0
f
)),
abs_tol
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
const
auto
max_f8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
Max
());
// convert maximal f8_fnuz_t to float and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_fnuz_t
>
(
max_f8_t_float
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_fnuz_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
f8_convert_sr
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
// convert inf float to f8_
fnuz_
t and check if it is qNan
ASSERT_NEAR
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
QuietNaN
(
),
f8_convert_sr
<
f8_
fnuz_
t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Nearest
)
TEST
(
FP8
FNUZ
,
ConvertFP16Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
const
auto
max_f8_t_half
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
Max
());
// convert maximal f8_fnuz_t to fp16 and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
max_f8_t_half
)),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR
(
max_f8_t_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
f8_convert_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
// convert QuietNaN fp16 to f8_
fnuz_
t and check if it is QuietNaN
ASSERT_NEAR
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
QuietNaN
(
),
f8_convert_rne
<
f8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.017578125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.015625
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to fp8 and back, check if holds
pos_half
=
half_t
{
0.00390625
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to fp8 and back, check if holds
neg_half
=
half_t
{
-
0.001953125
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Stochastic
)
TEST
(
FP8
FNUZ
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
const
auto
max_f8_t_half
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
Max
());
// convert maximal f8_fnuz_t to fp16 and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_fnuz_t
>
(
max_f8_t_half
)),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR
(
max_f8_t_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_fnuz_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
f8_convert_sr
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
// convert QuietNaN fp16 to f8_
fnuz_
t and check if it is QuietNaN
ASSERT_NEAR
(
ck
::
NumericLimits
<
f8_fnuz_t
>::
QuietNaN
(
),
f8_convert_sr
<
f8_
fnuz_
t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.017578125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.015625
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to fp8 and back, check if holds
pos_half
=
half_t
{
0.00390625
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to fp8 and back, check if holds
neg_half
=
half_t
{
-
0.001953125
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_
fnuz_
t
>
(
neg_half
)),
abs_tol
);
}
test/data_type/test_fp8_ocp.cpp
0 → 100644
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
f8_ocp_t
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
FP8OCP
,
NumericLimits
)
{
// constants given for OCP FP8
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Min
(),
type_convert
<
f8_ocp_t
>
(
0x08
));
// 0b00001000 = 2^-6
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
type_convert
<
f8_ocp_t
>
(
0x7E
));
// 0b01111110 = 448
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Lowest
(),
type_convert
<
f8_ocp_t
>
(
0xFE
));
// 0b11111110 = -448
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
().
data
,
type_convert
<
f8_ocp_t
>
(
0x7F
).
data
);
// 0b01111111
EXPECT_FALSE
(
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
()
==
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
());
}
TEST
(
FP8OCP
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
0.0
f
)),
0.0
f
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
const
auto
max_f8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
());
// convert maximal f8_ocp_t to float and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
max_f8_t_float
)),
0.0
f
);
// convert maximal float to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
0.0
f
);
// convert float infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()));
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// smallest normal fp8 value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
//-2^-6
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_float
)),
0.0
f
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// min subnorm fp8 value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
//-2^-9
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_float
)),
0.0
f
);
// smaller than min subnorm fp8 value to fp8 must be zero
auto
less_than_min_subnorm
=
0.0009765625
f
;
// 2^-10
ASSERT_EQ
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto
f8_nan
=
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
ASSERT_TRUE
((
f8_nan
.
data
&
0x7f
)
==
0x7f
);
}
TEST
(
FP8OCP
,
ConvertFP32Stochastic
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
0.0
f
)),
0.0
f
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
const
auto
max_f8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
());
// convert maximal f8_ocp_t to float and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
max_f8_t_float
)),
0.0
f
);
// convert maximal float to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
0.0
f
);
// convert float infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
f8_convert_sr
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()));
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// smallest normal fp8 value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
//-2^-6
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
neg_float
)),
0.0
f
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// min subnorm fp8 value to fp8 and back, check if holds
constexpr
auto
min_subnorm_fp8
=
-
0.001953125
f
;
//-2^-9
ASSERT_NEAR
(
min_subnorm_fp8
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
min_subnorm_fp8
)),
0.0
f
);
// smaller than min subnorm fp8 value to fp8 alternates between 0 and 2^-9
auto
less_than_min_subnorm
=
0.0009765625
f
;
// 2^-10
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_sr
<
f8_ocp_t
>
(
less_than_min_subnorm
)),
0.001953125
f
);
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto
f8_nan
=
f8_convert_sr
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
ASSERT_TRUE
((
f8_nan
.
data
&
0x7f
)
==
0x7f
);
}
TEST
(
FP8OCP
,
ConvertFP16Nearest
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
// convert 0 half_t to fp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
half_t_tol
);
const
auto
max_f8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
());
// convert maximal f8_ocp_t to half_t and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
max_f8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR
(
max_f8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
f8_convert_rne
<
f8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive norm half_t value to fp8 and back, check if holds
half_t
pos_half_t
{
0.017578125
f
};
ASSERT_NEAR
(
pos_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_half_t
)),
half_t_tol
);
// smallest normal fp8 value to fp8 and back, check if holds
half_t
neg_half_t
{
-
0.015625
f
};
//-2^-6
ASSERT_NEAR
(
neg_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_half_t
)),
half_t_zero
);
// positive subnorm half_t value to fp8 and back, check if holds
pos_half_t
=
half_t
{
0.00390625
f
};
ASSERT_NEAR
(
pos_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_half_t
)),
half_t_tol
);
// min subnorm fp8 value to fp8 and back, check if holds
neg_half_t
=
half_t
{
-
0.001953125
f
};
//-2^-9
ASSERT_NEAR
(
neg_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_half_t
)),
half_t_zero
);
// smaller than min subnorm fp8 value to fp8 must be zero
auto
less_than_min_subnorm
=
half_t
{
0.0009765625
f
};
// 2^-10
ASSERT_EQ
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
f8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto
f8_nan
=
f8_convert_rne
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_f8_is_nan
(
f8_nan
.
data
));
}
TEST
(
FP8OCP
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
constexpr
auto
min_subnorm_fp8
=
0.001953125
f
;
// 2^-9
// convert 0 half_t to fp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t (6.103515625e-05) to fp8 and back
// alternates between 0 and 2^-9 (0.001953125)
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
min_subnorm_fp8
));
const
auto
max_f8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
());
// convert maximal f8_ocp_t to half_t and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
max_f8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR
(
max_f8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
f8_convert_sr
<
f8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive norm half_t value to fp8 and back, check if holds
half_t
pos_half_t
{
0.017578125
f
};
ASSERT_NEAR
(
pos_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
pos_half_t
)),
half_t_tol
);
// smallest normal fp8 value to fp8 and back, check if holds
half_t
neg_half_t
{
-
0.015625
f
};
//-2^-6
ASSERT_NEAR
(
neg_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
neg_half_t
)),
half_t_zero
);
// positive subnorm half_t value to fp8 and back, check if holds
pos_half_t
=
half_t
{
0.00390625
f
};
ASSERT_NEAR
(
pos_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
pos_half_t
)),
half_t_tol
);
// min subnorm fp8 value to fp8 and back, check if holds
neg_half_t
=
half_t
{
-
min_subnorm_fp8
};
//-2^-9
ASSERT_NEAR
(
neg_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
neg_half_t
)),
half_t_zero
);
// smaller than min subnorm fp8 value to fp8 alternates between 0 and 2^-9
auto
less_than_min_subnorm
=
half_t
{
0.0009765625
f
};
// 2^-10
ASSERT_NEAR
(
type_convert
<
float
>
(
half_t_zero
),
type_convert
<
float
>
(
type_convert
<
half_t
>
(
f8_convert_sr
<
f8_ocp_t
>
(
less_than_min_subnorm
))),
min_subnorm_fp8
);
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto
f8_nan
=
f8_convert_sr
<
f8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
fp8_impl
::
ocp_f8_is_nan
(
f8_nan
.
data
));
}
test/grouped_convnd_bwd_data/CMakeLists.txt
View file @
c8c016dd
add_gtest_executable
(
test_grouped_convnd_bwd_data test_grouped_convnd_bwd_data_xdl
_wmma
.cpp
)
add_gtest_executable
(
test_grouped_convnd_bwd_data
_xdl
test_grouped_convnd_bwd_data_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_bwd_data PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
target_link_libraries
(
test_grouped_convnd_bwd_data_xdl PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_bwd_data_wmma test_grouped_convnd_bwd_data_wmma.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_bwd_data_wmma PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_bwd_data_interface_xdl test_grouped_convnd_bwd_data_interface_xdl.cpp
)
if
(
result EQUAL 0
)
...
...
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
0 → 100644
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_data_impl
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
InLayout
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
);
}
EXPECT_TRUE
(
pass
);
}
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
int8_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
int8_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
int8_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
int8_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma2d
:
public
TestGroupedConvndBwdDataWmma
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma3d
:
public
TestGroupedConvndBwdDataWmma
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataWmma2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataWmma3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdDataWmma2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdDataWmma3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl
_wmma
.cpp
→
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
...
...
@@ -12,7 +12,7 @@
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData
:
public
::
testing
::
Test
class
TestGroupedConvndBwdData
Xdl
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
...
...
@@ -51,35 +51,31 @@ using namespace ck::tensor_layout::convolution;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
int8_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
float
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
int8_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
int8_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
float
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
int8_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData2d
:
public
TestGroupedConvndBwdData
<
Tuple
>
class
TestGroupedConvndBwdData
Xdl
2d
:
public
TestGroupedConvndBwdData
Xdl
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData3d
:
public
TestGroupedConvndBwdData
<
Tuple
>
class
TestGroupedConvndBwdData
Xdl
3d
:
public
TestGroupedConvndBwdData
Xdl
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData3d
,
KernelTypes3d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData
Xdl
2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData
Xdl
3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdData2d
,
Test2D
)
TYPED_TEST
(
TestGroupedConvndBwdData
Xdl
2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
...
...
@@ -94,10 +90,13 @@ TYPED_TEST(TestGroupedConvndBwdData2d, Test2D)
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
// SplitN case
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdData3d
,
Test3D
)
TYPED_TEST
(
TestGroupedConvndBwdData
Xdl
3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
...
...
@@ -112,5 +111,17 @@ TYPED_TEST(TestGroupedConvndBwdData3d, Test3D)
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
// SplitN case
this
->
conv_params
.
push_back
({
3
,
1
,
128
,
4
,
192
,
{
2
,
2
,
2
},
{
2
,
224
,
224
},
{
1
,
224
,
224
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
template
Run
<
3
>();
}
test/grouped_gemm/CMakeLists.txt
View file @
c8c016dd
...
...
@@ -6,12 +6,6 @@ if(result EQUAL 0)
add_dependencies
(
test_grouped_gemm test_grouped_gemm_splitk
)
endif
()
add_gtest_executable
(
test_grouped_gemm_two_stage_splitk test_grouped_gemm_two_stage_multiple_d_splitk_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_gemm_two_stage_splitk PRIVATE utility device_grouped_gemm_instance
)
add_dependencies
(
test_grouped_gemm test_grouped_gemm_two_stage_splitk
)
endif
()
add_gtest_executable
(
test_grouped_gemm_interface test_grouped_gemm_interface_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_gemm_interface PRIVATE utility device_grouped_gemm_instance
)
...
...
test/grouped_gemm/test_grouped_gemm_splitk_xdl.cpp
View file @
c8c016dd
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include <vector>
...
...
@@ -11,24 +11,34 @@
#include "test_grouped_gemm_util.hpp"
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F8
=
ck
::
f8_t
;
using
I8
=
int8_t
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
RRR_F16_F16_F16
=
ck
::
test
::
TestGroupedGemm
<
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F16
>>
;
using
RCR_F16_F16_F16
=
ck
::
test
::
TestGroupedGemm
<
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F16
>>
;
using
RRR_F16_F16_F16_LargeK
=
ck
::
test
::
TestGroupedGemm
<
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F16
>>
;
using
RCR_F16_F16_F16_LargeK
=
ck
::
test
::
TestGroupedGemm
<
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F16
>>
;
const
std
::
vector
<
int
>
KBATCH
{
1
,
2
,
3
,
5
,
8
};
INSTANTIATE_TEST_SUITE_P
(
TestGroupedGemm_splitk_MK_KN
,
RRR_F16_F16_F16
,
testing
::
ValuesIn
(
KBATCH
));
INSTANTIATE_TEST_SUITE_P
(
TestGroupedGemm_splitk_MK_NK
,
RCR_F16_F16_F16
,
testing
::
ValuesIn
(
KBATCH
));
INSTANTIATE_TEST_SUITE_P
(
TestGroupedGemm_splitk_LargeK_MK_KN
,
RRR_F16_F16_F16_LargeK
,
testing
::
Values
(
32
,
64
));
INSTANTIATE_TEST_SUITE_P
(
TestGroupedGemm_splitk_LargeK_MK_NK
,
RCR_F16_F16_F16_LargeK
,
testing
::
Values
(
32
,
64
));
template
<
typename
Tuple
>
class
TestGroupedGemm
:
public
ck
::
test
::
TestGroupedGemm
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
BF16
,
BF16
,
BF16
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
BF16
,
BF16
,
BF16
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
BF16
,
BF16
,
BF16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
BF16
,
I8
,
BF16
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
BF16
,
I8
,
BF16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F8
,
F16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F8
,
F16
,
F16
>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestGroupedGemm
,
KernelTypes
);
#include "test_grouped_gemm_ut_cases.inc"
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
View file @
c8c016dd
#pragma once
T
EST_P
(
RRR_F16_F16_F16
,
TinyCases
)
T
YPED_TEST
(
TestGroupedGemm
,
TinyCases
)
{
const
std
::
vector
<
int
>
Ms
{
0
,
1
};
constexpr
int
N
=
768
;
...
...
@@ -8,14 +8,11 @@ TEST_P(RRR_F16_F16_F16, TinyCases)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
T
EST_P
(
RRR_F16_F16_F16
,
SmallCases
)
T
YPED_TEST
(
TestGroupedGemm
,
SmallCases
)
{
const
std
::
vector
<
int
>
Ms
{
2
,
1
,
3
,
4
,
5
,
0
};
constexpr
int
N
=
768
;
...
...
@@ -23,14 +20,11 @@ TEST_P(RRR_F16_F16_F16, SmallCases)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
T
EST_P
(
RRR_F16_F16_F16
,
MidCases
)
T
YPED_TEST
(
TestGroupedGemm
,
MidCases
)
{
const
std
::
vector
<
int
>
Ms
{
167
,
183
,
177
,
153
,
139
,
204
};
constexpr
int
N
=
768
;
...
...
@@ -38,14 +32,11 @@ TEST_P(RRR_F16_F16_F16, MidCases)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
T
EST_P
(
RRR_F16_F16_F16
,
Regular
)
T
YPED_TEST
(
TestGroupedGemm
,
Regular
)
{
const
std
::
vector
<
int
>
Ms
{
64
,
128
,
256
};
constexpr
int
N
=
768
;
...
...
@@ -53,14 +44,11 @@ TEST_P(RRR_F16_F16_F16, Regular)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
T
EST_P
(
RRR_F16_F16_F16
,
MNKPadded
)
T
YPED_TEST
(
TestGroupedGemm
,
MNKPadded
)
{
const
std
::
vector
<
int
>
Ms
{
127
,
150
,
188
,
210
};
constexpr
int
N
=
136
;
...
...
@@ -68,88 +56,11 @@ TEST_P(RRR_F16_F16_F16, MNKPadded)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
TEST_P
(
RCR_F16_F16_F16
,
TinyCases
)
{
const
std
::
vector
<
int
>
Ms
{
0
,
1
};
constexpr
int
N
=
768
;
constexpr
int
K
=
544
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RCR_F16_F16_F16
,
SmallCases
)
{
const
std
::
vector
<
int
>
Ms
{
2
,
1
,
3
,
4
,
5
,
0
};
constexpr
int
N
=
768
;
constexpr
int
K
=
544
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RCR_F16_F16_F16
,
MidCases
)
{
const
std
::
vector
<
int
>
Ms
{
167
,
183
,
177
,
153
,
139
,
204
};
constexpr
int
N
=
768
;
constexpr
int
K
=
544
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RCR_F16_F16_F16
,
Regular
)
{
const
std
::
vector
<
int
>
Ms
{
32
,
64
,
128
,
256
};
constexpr
int
N
=
768
;
constexpr
int
K
=
320
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RCR_F16_F16_F16
,
MNKPadded
)
{
const
std
::
vector
<
int
>
Ms
{
127
,
150
,
188
,
210
};
constexpr
int
N
=
136
;
constexpr
int
K
=
280
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RRR_F16_F16_F16_LargeK
,
TestLargeKBatch
)
TYPED_TEST
(
TestGroupedGemm
,
TestLargeKBatch
)
{
const
std
::
vector
<
int
>
Ms
{
188
,
210
};
constexpr
int
N
=
768
;
...
...
@@ -157,24 +68,8 @@ TEST_P(RRR_F16_F16_F16_LargeK, TestLargeKBatch)
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
());
}
TEST_P
(
RCR_F16_F16_F16_LargeK
,
TestLargeKBatch
)
{
const
std
::
vector
<
int
>
Ms
{
188
,
210
};
constexpr
int
N
=
768
;
constexpr
int
K
=
4096
;
const
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
const
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
const
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
this
->
k_batches_
=
{
32
,
64
};
this
->
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
this
->
GetParam
()
);
this
->
Run
(
Ms
,
Ns
,
Ks
);
}
test/grouped_gemm/test_grouped_gemm_util.hpp
View file @
c8c016dd
...
...
@@ -22,7 +22,6 @@
#include "ck/utility/tuple.hpp"
#include "ck/utility/number.hpp"
#include "profiler/profile_grouped_gemm_impl.hpp"
#include "profiler/profile_grouped_gemm_two_stage_impl.hpp"
namespace
ck
{
namespace
test
{
...
...
@@ -40,7 +39,7 @@ std::string serialize_range(const Range& range)
}
template
<
typename
Tuple
>
class
TestGroupedGemm
:
public
testing
::
Test
WithParam
<
int
>
class
TestGroupedGemm
:
public
testing
::
Test
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
...
...
@@ -50,77 +49,79 @@ class TestGroupedGemm : public testing::TestWithParam<int>
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
EDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
public:
static
constexpr
bool
verify_
=
true
;
static
constexpr
int
init_method_
=
1
;
//
decimal
value initialization
static
constexpr
int
init_method_
=
1
;
//
integer
value initialization
static
constexpr
bool
log_
=
false
;
static
constexpr
bool
bench_
=
false
;
// measure kernel performance
static
constexpr
int
n_warmup_
=
0
;
static
constexpr
int
n_iter_
=
1
;
std
::
vector
<
int
>
k_batches_
;
void
SetUp
()
override
{}
void
SetUp
()
override
{
k_batches_
=
{
1
,
2
,
3
,
5
,
8
};
}
private:
template
<
typename
Layout
>
void
SetStrides
(
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
rows
,
const
std
::
vector
<
int
>&
cols
)
const
{
if
(
std
::
is_same_v
<
Layout
,
Row
>
)
{
for
(
const
auto
c
:
cols
)
{
strides
.
emplace_back
(
c
);
}
}
else
if
(
std
::
is_same_v
<
Layout
,
Col
>
)
{
for
(
const
auto
r
:
rows
)
{
strides
.
emplace_back
(
r
);
}
}
}
public:
void
Run
(
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ks
,
const
std
::
vector
<
int
>&
StrideAs
,
const
std
::
vector
<
int
>&
StrideBs
,
const
std
::
vector
<
int
>&
StrideCs
,
int
kbatch
=
1
,
int
n_warmup
=
1
,
int
n_iter
=
10
)
const
std
::
vector
<
int
>&
StrideAs
=
{},
const
std
::
vector
<
int
>&
StrideBs
=
{},
const
std
::
vector
<
int
>&
StrideCs
=
{})
{
bool
pass
=
ck
::
profiler
::
profile_grouped_gemm_impl
<
ADataType
,
BDataType
,
EDataType
,
float
,
ALayout
,
BLayout
,
ELayout
>
(
verify_
,
init_method_
,
log_
,
bench_
,
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
,
n_warmup
,
n_iter
);
EXPECT_TRUE
(
pass
);
}
};
template
<
typename
Tuple
>
class
TestGroupedGemmTwoStage
:
public
testing
::
TestWithParam
<
int
>
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
ELayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
EDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
std
::
vector
<
int
>
stride_as
=
StrideAs
;
std
::
vector
<
int
>
stride_bs
=
StrideBs
;
std
::
vector
<
int
>
stride_cs
=
StrideCs
;
public:
static
constexpr
bool
verify_
=
true
;
static
constexpr
int
init_method_
=
1
;
// decimal value initialization
static
constexpr
bool
log_
=
false
;
static
constexpr
bool
bench_
=
false
;
// measure kernel performance
if
(
stride_as
.
empty
())
{
SetStrides
<
ALayout
>
(
stride_as
,
Ms
,
Ks
);
}
if
(
stride_bs
.
empty
())
{
SetStrides
<
BLayout
>
(
stride_bs
,
Ks
,
Ns
);
}
if
(
stride_cs
.
empty
())
{
SetStrides
<
ELayout
>
(
stride_cs
,
Ms
,
Ns
);
}
void
SetUp
()
override
{}
RunSingle
(
Ms
,
Ns
,
Ks
,
stride_as
,
stride_bs
,
stride_cs
,
k_batches_
);
}
void
Run
(
const
std
::
vector
<
int
>&
Ms
,
void
Run
Single
(
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ks
,
const
std
::
vector
<
int
>&
StrideAs
,
const
std
::
vector
<
int
>&
StrideBs
,
const
std
::
vector
<
int
>&
StrideCs
,
int
kbatch
=
1
,
int
n_warmup
=
1
,
int
n_iter
=
10
)
const
std
::
vector
<
int
>&
kbatches
)
{
bool
pass
=
ck
::
profiler
::
profile_grouped_gemm_
two_stage_
impl
<
ADataType
,
bool
pass
=
ck
::
profiler
::
profile_grouped_gemm_impl
<
ADataType
,
BDataType
,
EDataType
,
float
,
...
...
@@ -136,9 +137,9 @@ class TestGroupedGemmTwoStage : public testing::TestWithParam<int>
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
,
n_warmup
,
n_iter
);
kbatch
es
,
n_warmup
_
,
n_iter
_
);
EXPECT_TRUE
(
pass
);
}
};
...
...
@@ -263,7 +264,7 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
p_As
,
p_Bs
,
p_Ds
,
p_Cs
,
gemm_descs
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
if
(
kbatch
>
1
)
{
ggemm_instance
.
SetKBatchSize
(
argument
,
kbatch
);
ggemm_instance
.
SetKBatchSize
(
&
argument
,
kbatch
);
}
return
ggemm_instance
.
IsSupportedArgument
(
argument
);
...
...
@@ -300,13 +301,13 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
p_As
,
p_Bs
,
p_Ds
,
p_Cs
,
gemm_descs
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
if
(
kbatch
>
1
)
{
ggemm_instance
.
SetKBatchSize
(
argument
,
kbatch
);
ggemm_instance
.
SetKBatchSize
(
&
argument
,
kbatch
);
}
EXPECT_TRUE
(
ggemm_instance
.
IsSupportedArgument
(
argument
));
auto
invoker
=
ggemm_instance
.
MakeInvoker
();
DeviceMem
gemm_
desc_workspace
(
ggemm_instance
.
Get
WorkSpace
Size
(
&
argument
));
ggemm_instance
.
Set
WorkSpacePointer
(
&
argument
,
gemm_
desc_workspace
.
GetDeviceBuffer
());
DeviceMem
dev_
gemm_
kargs
(
ggemm_instance
.
Get
DeviceKernelArg
Size
(
&
argument
));
ggemm_instance
.
Set
DeviceKernelArgs
(
&
argument
,
dev_
gemm_
kargs
.
GetDeviceBuffer
());
return
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
});
}
};
...
...
test/pool/test_avg_pool2d_fwd.cpp
View file @
c8c016dd
...
...
@@ -138,7 +138,7 @@ TYPED_TEST_SUITE(AvgPool2D_BF16, AvgPool2D_BF16_Types);
TYPED_TEST_SUITE
(
AvgPool2D_I8
,
AvgPool2D_I8_Types
);
TYPED_TEST_SUITE
(
AvgPool2D_F8
,
AvgPool2D_F8_Types
);
TYPED_TEST
(
AvgPool2D_F32
,
AvgPool2D_
I8
_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_F32
,
AvgPool2D_
F32
_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_F16
,
AvgPool2D_F16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_BF16
,
AvgPool2D_BF16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_I8
,
AvgPool2D_I8_Test
)
{
this
->
Run
();
}
...
...
test/pool/test_max_pool2d_fwd.cpp
View file @
c8c016dd
...
...
@@ -143,7 +143,7 @@ TYPED_TEST_SUITE(MaxPool2D_BF16, MaxPool2D_BF16_Types);
TYPED_TEST_SUITE
(
MaxPool2D_I8
,
MaxPool2D_I8_Types
);
TYPED_TEST_SUITE
(
MaxPool2D_F8
,
MaxPool2D_F8_Types
);
TYPED_TEST
(
MaxPool2D_F32
,
MaxPool2D_
I8
_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_F32
,
MaxPool2D_
F32
_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_F16
,
MaxPool2D_F16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_BF16
,
MaxPool2D_BF16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_I8
,
MaxPool2D_I8_Test
)
{
this
->
Run
();
}
...
...
Prev
1
…
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment