Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
28187354
"docs/source/ja/index.md" did not exist on "e0d836c81322b0ce4766cb2f95333191a299ed99"
Commit
28187354
authored
May 12, 2023
by
Rostyslav Geyyer
Browse files
Add element op
parent
653f9515
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
9 deletions
+19
-9
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+17
-0
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+2
-9
No files found.
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
28187354
...
...
@@ -127,6 +127,23 @@ struct ConvertBF16RTN
}
};
struct
ConvertF8SR
{
// convert to fp8 using stochastic rounding (SR)
template
<
typename
Y
,
typename
X
>
__host__
__device__
void
operator
()(
Y
&
y
,
const
X
&
x
)
const
{
// check Y datatype
static_assert
(
is_same
<
Y
,
f8_t
>::
value
,
"Data type is not supported by this operation!"
);
// check X datatype
static_assert
(
is_same
<
X
,
float
>::
value
||
is_same
<
X
,
half_t
>::
value
,
"Data type is not supported by this operation!"
);
y
=
f8_convert_sr
<
Y
>
(
x
);
}
};
struct
Scale
{
__host__
__device__
Scale
(
float
scale
)
:
scale_
(
scale
)
{}
...
...
include/ck/utility/data_type.hpp
View file @
28187354
...
...
@@ -1174,11 +1174,11 @@ __host__ __device__ uint32_t prand_generator(int id, T val)
// Declare a template function for fp8 conversion using SR
template
<
typename
Y
,
typename
X
>
__host__
__device__
constexpr
Y
f
p
8_convert_sr
(
X
x
);
__host__
__device__
constexpr
Y
f8_convert_sr
(
X
x
);
// convert fp32 to fp8 with stochastic rounding
template
<
>
inline
__host__
__device__
f8_t
f
p
8_convert_sr
<
f8_t
,
float
>
(
float
x
)
inline
__host__
__device__
f8_t
f8_convert_sr
<
f8_t
,
float
>
(
float
x
)
{
constexpr
bool
negative_zero_nan
=
true
;
constexpr
bool
clip
=
true
;
...
...
@@ -1188,13 +1188,6 @@ inline __host__ __device__ f8_t fp8_convert_sr<f8_t, float>(float x)
return
cast_to_f8
<
negative_zero_nan
,
clip
,
(
rm
==
f8_rounding_mode
::
stochastic
)
>
(
x
,
rng
);
}
// convert fp8 to fp32
template
<
>
inline
__host__
__device__
float
fp8_convert_sr
<
float
,
f8_t
>
(
f8_t
x
)
{
return
type_convert
<
float
>
(
x
);
}
template
<
typename
T
>
struct
NumericLimits
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment