Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
e36b09b7
Commit
e36b09b7
authored
Oct 15, 2024
by
Andriy Roshchenko
Browse files
Implement ConvertFP16Nearest and ConvertFP16Stochastic tests.
parent
487cb570
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
160 additions
and
3 deletions
+160
-3
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+34
-1
test/data_type/test_bf8_ocp.cpp
test/data_type/test_bf8_ocp.cpp
+126
-2
No files found.
include/ck/utility/data_type.hpp
View file @
e36b09b7
...
@@ -786,7 +786,8 @@ struct bf8_ocp_t
...
@@ -786,7 +786,8 @@ struct bf8_ocp_t
}
}
#if CK_USE_OCP_FP8
#if CK_USE_OCP_FP8
__host__
__device__
explicit
operator
float
()
const
{
__host__
__device__
explicit
operator
float
()
const
{
#else
#else
__host__
explicit
operator
float
()
const
__host__
explicit
operator
float
()
const
{
{
...
@@ -796,6 +797,20 @@ struct bf8_ocp_t
...
@@ -796,6 +797,20 @@ struct bf8_ocp_t
#else
#else
return
internal
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
return
internal
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
this
->
data
);
// XXX: clip==false must be consistent with operator half_t
this
->
data
);
// XXX: clip==false must be consistent with operator half_t
#endif
}
#if CK_USE_OCP_FP8
__host__
__device__
explicit
operator
half_t
()
const
{
#else
__host__
explicit
operator
half_t
()
const
{
#endif
#if CK_FP8_CVT_FAST_PATH
return
static_cast
<
half_t
>
(
internal
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
));
#else
return
internal
::
cast_from_f8
<
half_t
,
wm
,
we
,
false
>
(
this
->
data
);
// XXX: clip==false must be consistent with operator float
#endif
#endif
}
}
}
}
...
@@ -844,6 +859,15 @@ inline __host__ __device__ f8_ocp_t f8_convert_rne<f8_ocp_t, half_t>(half_t x)
...
@@ -844,6 +859,15 @@ inline __host__ __device__ f8_ocp_t f8_convert_rne<f8_ocp_t, half_t>(half_t x)
return
f8_ocp_t
{
return
f8_ocp_t
{
internal
::
cvt_half_t_to_fp8
<
f8_ocp_t
::
default_interpret
,
f8_ocp_t
::
default_saturation
>
(
x
)};
internal
::
cvt_half_t_to_fp8
<
f8_ocp_t
::
default_interpret
,
f8_ocp_t
::
default_saturation
>
(
x
)};
}
}
template
<
>
inline
__host__
__device__
bf8_ocp_t
f8_convert_rne
<
bf8_ocp_t
,
half_t
>
(
half_t
x
)
{
return
bf8_ocp_t
{
internal
::
cvt_half_t_to_fp8
<
bf8_ocp_t
::
default_interpret
,
bf8_ocp_t
::
default_saturation
>
(
x
)};
}
// Declare a template function for fp8 conversion using RNE
// Declare a template function for fp8 conversion using RNE
template
<
typename
Y
,
typename
X
>
template
<
typename
Y
,
typename
X
>
__host__
__device__
constexpr
Y
f8_convert_sr
(
X
x
);
__host__
__device__
constexpr
Y
f8_convert_sr
(
X
x
);
...
@@ -875,6 +899,15 @@ inline __host__ __device__ f8_ocp_t f8_convert_sr<f8_ocp_t, half_t>(half_t x)
...
@@ -875,6 +899,15 @@ inline __host__ __device__ f8_ocp_t f8_convert_sr<f8_ocp_t, half_t>(half_t x)
true
>
(
x
)};
true
>
(
x
)};
}
}
// convert half_t to bf8 with stochastic rounding
template
<
>
inline
__host__
__device__
bf8_ocp_t
f8_convert_sr
<
bf8_ocp_t
,
half_t
>
(
half_t
x
)
{
return
bf8_ocp_t
{
internal
::
cvt_half_t_to_fp8
<
bf8_ocp_t
::
default_interpret
,
bf8_ocp_t
::
default_saturation
,
true
>
(
x
)};
}
#if CK_USE_OCP_FP8
#if CK_USE_OCP_FP8
using
f8_t
=
f8_ocp_t
;
using
f8_t
=
f8_ocp_t
;
using
bf8_t
=
bf8_ocp_t
;
using
bf8_t
=
bf8_ocp_t
;
...
...
test/data_type/test_bf8_ocp.cpp
View file @
e36b09b7
...
@@ -139,6 +139,130 @@ TEST(BF8OCP, ConvertFP32Stochastic)
...
@@ -139,6 +139,130 @@ TEST(BF8OCP, ConvertFP32Stochastic)
ASSERT_TRUE
(
ck
::
internal
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
ASSERT_TRUE
(
ck
::
internal
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
}
TEST
(
BF8OCP
,
ConvertFP16Nearest
)
{
ASSERT_TRUE
(
false
)
<<
"Not implemented"
;
}
TEST
(
BF8OCP
,
ConvertFP16Nearest
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
// convert 0 half_t to bfp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
half_t_tol
);
const
auto
max_bf8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to half_t and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
max_bf8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_rne
<
bf8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive normal bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_norm_bf8
{
0.0000762939
f
};
// 10*2^-17
ASSERT_NEAR
(
pos_norm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_norm_bf8
)),
half_t_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
half_t
neg_min_bf8
{
-
0.00006103515625
f
};
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
neg_min_bf8
)),
half_t_zero
);
// positive subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_subnorm_bf8
{
0.000030517578125
f
};
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
half_t_zero
);
// min subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
min_subnorm_bf8
{
-
0.0000152587890625
f
};
//-2^-16
ASSERT_NEAR
(
min_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
min_subnorm_bf8
)),
half_t_zero
);
// smaller than min subnorm bf8 value to bf8 must be zero
constexpr
half_t
less_than_min_subnorm
{
0.00000762939453125
f
};
// 2^-17
ASSERT_EQ
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_rne
<
bf8_ocp_t
>
(
less_than_min_subnorm
)));
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_rne
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
internal
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
TEST
(
BF8OCP
,
ConvertFP16Stochastic
)
{
ASSERT_TRUE
(
false
)
<<
"Not implemented"
;
}
TEST
(
BF8OCP
,
ConvertFP16Stochastic
)
{
// fix the tolerance value
constexpr
half_t
half_t_tol
=
1e-3
;
constexpr
half_t
half_t_zero
=
0.0
;
constexpr
auto
min_subnorm_bf8
=
0.0000152587890625
f
;
// 2^-16
// convert 0 half_t to bfp8 and back, check if holds
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
half_t_zero
)),
half_t_zero
);
// convert minimal half_t (6.103515625e-05) to fp8 and back
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
half_t_zero
);
const
auto
max_bf8_t_half_t
=
type_convert
<
half_t
>
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
());
// convert maximal bf8_ocp_t to half_t and check if equal to bf8 max
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
max_bf8_t_half_t
)),
half_t_zero
);
// convert maximal half_t to bf8 and back, check if clipped to bf8 max (saturation to finite)
ASSERT_NEAR
(
max_bf8_t_half_t
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
half_t_zero
);
// convert half_t infinity to bf8_ocp_t and check if it is max value (saturation to finite)
ASSERT_EQ
(
ck
::
NumericLimits
<
bf8_ocp_t
>::
Max
(),
f8_convert_sr
<
bf8_ocp_t
>
(
type_convert
<
half_t
>
(
std
::
numeric_limits
<
float
>::
infinity
())));
// positive normal bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_norm_bf8
{
0.0000762939
f
};
// 10*2^-17
ASSERT_NEAR
(
pos_norm_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_norm_bf8
)),
half_t_tol
);
// negative smallest normal bf8 value to bf8 and back, check if holds
constexpr
half_t
neg_min_bf8
{
-
0.00006103515625
f
};
//-2^-14
ASSERT_NEAR
(
neg_min_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
neg_min_bf8
)),
half_t_zero
);
// positive subnorm bf8 value to bf8 and back, check if holds
constexpr
half_t
pos_subnorm_bf8
{
0.000030517578125
f
};
// 2^-15
ASSERT_NEAR
(
pos_subnorm_bf8
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
pos_subnorm_bf8
)),
half_t_zero
);
// min subnorm bf8 value to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
-
min_subnorm_bf8
},
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
half_t
{
-
min_subnorm_bf8
})),
half_t_zero
);
// smaller than min subnorm bf8 value to bf8 alternates between 0 and 2^-16
constexpr
half_t
less_than_min_subnorm
{
0.00000762939453125
f
};
// 2^-17
ASSERT_NEAR
(
half_t_zero
,
type_convert
<
half_t
>
(
f8_convert_sr
<
bf8_ocp_t
>
(
less_than_min_subnorm
)),
half_t
{
min_subnorm_bf8
});
// convert quiet NaN to bf8_ocp_t and check if it is quiet NaN
const
auto
bf8_nan
=
f8_convert_sr
<
bf8_ocp_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
());
ASSERT_TRUE
(
ck
::
internal
::
ocp_bf8_is_nan
(
bf8_nan
.
data
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment