Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
13dd3ab5
Commit
13dd3ab5
authored
Oct 10, 2024
by
Andriy Roshchenko
Browse files
Implementation of ConvertFP32Nearest in test_fp8_ocp.
parent
e2efb63c
Changes
6
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1477 additions
and
68 deletions
+1477
-68
CMakePresets.json
CMakePresets.json
+2
-2
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+1357
-65
include/ck/utility/type_convert.hpp
include/ck/utility/type_convert.hpp
+7
-1
test/data_type/test_bf8_ocp.cpp
test/data_type/test_bf8_ocp.cpp
+22
-0
test/data_type/test_fp8_fnuz.cpp
test/data_type/test_fp8_fnuz.cpp
+5
-0
test/data_type/test_fp8_ocp.cpp
test/data_type/test_fp8_ocp.cpp
+84
-0
No files found.
CMakePresets.json
View file @
13dd3ab5
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
"cacheVariables"
:
{
"cacheVariables"
:
{
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_EXPORT_COMPILE_COMMANDS"
:
"ON"
,
"CMAKE_EXPORT_COMPILE_COMMANDS"
:
"ON"
,
"GPU_TARGETS"
:
"gfx90
a
"
,
"GPU_TARGETS"
:
"gfx9
5
0"
,
"BUILD_DEV"
:
"ON"
,
"BUILD_DEV"
:
"ON"
,
"CMAKE_CXX_COMPILER"
:
"/opt/rocm/llvm/bin/clang++"
,
"CMAKE_CXX_COMPILER"
:
"/opt/rocm/llvm/bin/clang++"
,
"CMAKE_PREFIX_PATH"
:
"/opt/rocm"
"CMAKE_PREFIX_PATH"
:
"/opt/rocm"
...
@@ -33,7 +33,7 @@
...
@@ -33,7 +33,7 @@
},
},
"cacheVariables"
:
{
"cacheVariables"
:
{
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_CXX_FLAGS"
:
"-O0"
"CMAKE_CXX_FLAGS"
:
"-O0
-ggdb
"
}
}
},
},
{
{
...
...
include/ck/utility/data_type.hpp
View file @
13dd3ab5
This diff is collapsed.
Click to expand it.
include/ck/utility/type_convert.hpp
View file @
13dd3ab5
...
@@ -100,6 +100,12 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_
...
@@ -100,6 +100,12 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_
return
type_convert
<
bhalf_t
>
(
x_fp32
);
return
type_convert
<
bhalf_t
>
(
x_fp32
);
}
}
template
<
>
inline
__host__
__device__
constexpr
f8_ocp_t
type_convert
<
f8_ocp_t
,
int
>
(
int
x
)
{
return
f8_ocp_t
{
type_convert
<
f8_ocp_t
::
data_type
>
(
x
)};
}
// Convert X to Y
// Convert X to Y
template
<
typename
Y
,
typename
X
>
template
<
typename
Y
,
typename
X
>
__host__
__device__
constexpr
Y
type_convert_sp
(
X
x
)
__host__
__device__
constexpr
Y
type_convert_sp
(
X
x
)
...
@@ -409,7 +415,7 @@ inline __host__ __device__ float type_convert<float, f8_fnuz_t>(f8_fnuz_t x)
...
@@ -409,7 +415,7 @@ inline __host__ __device__ float type_convert<float, f8_fnuz_t>(f8_fnuz_t x)
}
}
template
<
>
template
<
>
inline
__host__
__device__
float2_t
type_convert
<
float2_t
,
f8x2_t
>
(
f8x2_t
x
)
inline
__host__
__device__
float2_t
type_convert
<
float2_t
,
f8x2_
fnuz_
t
>
(
f8x2_
fnuz_
t
x
)
{
{
#if defined(__gfx94__)
#if defined(__gfx94__)
const
auto
i16val
=
bit_cast
<
uint16_t
>
(
x
);
const
auto
i16val
=
bit_cast
<
uint16_t
>
(
x
);
...
...
test/data_type/test_bf8_ocp.cpp
0 → 100644
View file @
13dd3ab5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_ocp_t
;
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
BF8OCP
,
NumericLimits
)
{}
TEST
(
BF8OCP
,
ConvertFP32Nearest
)
{}
TEST
(
BF8OCP
,
ConvertFP32Stochastic
)
{}
TEST
(
BF8OCP
,
ConvertFP16Nearest
)
{}
TEST
(
BF8OCP
,
ConvertFP16Stochastic
)
{}
test/data_type/test_fp8_fnuz.cpp
View file @
13dd3ab5
...
@@ -38,6 +38,11 @@ TEST(FP8FNUZ, ConvertFP32Nearest)
...
@@ -38,6 +38,11 @@ TEST(FP8FNUZ, ConvertFP32Nearest)
// convert maximal f8_fnuz_t to float and check if equal to fp8 max
// convert maximal f8_fnuz_t to float and check if equal to fp8 max
ASSERT_NEAR
(
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
max_f8_t_float
)),
abs_tol
);
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
max_f8_t_float
)),
abs_tol
);
// XXX: FNUZ f8_convert_rne behavior is inconsistent.
// Clipping large values to fp8 max (saturation to finite) contradicts converting inf float to
// fp8 qNAN (no saturation).
// convert maximal float to fp8 and back, check if clipped to fp8 max
// convert maximal float to fp8 and back, check if clipped to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
type_convert
<
float
>
(
f8_convert_rne
<
f8_fnuz_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
...
...
test/data_type/test_fp8_ocp.cpp
0 → 100644
View file @
13dd3ab5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
f8_ocp_t
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
TEST
(
FP8OCP
,
NumericLimits
)
{
// constants given for OCP FP8
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Min
(),
type_convert
<
f8_ocp_t
>
(
0x08
));
// 0b00001000 = 2^-6
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
type_convert
<
f8_ocp_t
>
(
0x7E
));
// 0b01111110 = 448
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Lowest
(),
type_convert
<
f8_ocp_t
>
(
0xFE
));
// 0b11111110 = -448
EXPECT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
().
data
,
type_convert
<
f8_ocp_t
>
(
0x7F
).
data
);
// 0b01111111
EXPECT_FALSE
(
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
()
==
ck
::
NumericLimits
<
f8_ocp_t
>::
QuietNaN
());
}
TEST
(
FP8OCP
,
ConvertFP32Nearest
)
{
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
0.0
f
)),
0.0
f
);
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
const
auto
max_f8_t_float
=
type_convert
<
float
>
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
());
// convert maximal f8_ocp_t to float and check if equal to fp8 max
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
max_f8_t_float
)),
0.0
f
);
// convert maximal float to fp8 and back, check if clipped to fp8 max (saturation to finite)
ASSERT_NEAR
(
max_f8_t_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
0.0
f
);
// convert float infinity to f8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
f8_ocp_t
>::
Max
(),
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()));
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// smallest normal fp8 value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
//-2^-6
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_float
)),
0.0
f
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
pos_float
)),
abs_tol
);
// min subnorm fp8 value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
//-2^-9
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
neg_float
)),
0.0
f
);
// smaller than min subnorm fp8 value to fp8 must be zero
auto
less_than_min_subnorm
=
0.0009765625
f
;
// 2^-10
ASSERT_EQ
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to f8_ocp_t and check if it is quiet NaN
auto
f8_nan
=
f8_convert_rne
<
f8_ocp_t
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
ASSERT_TRUE
((
f8_nan
.
data
&
0x7f
)
==
0x7f
);
}
TEST
(
FP8OCP
,
ConvertFP32Stochastic
)
{}
TEST
(
FP8OCP
,
ConvertFP16Nearest
)
{}
TEST
(
FP8OCP
,
ConvertFP16Stochastic
)
{}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment