Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
621a459f
"docs/source/en/api/pipelines/stable_diffusion/upscale.mdx" did not exist on "da31075700eb5f7aae1eb974a1c185e53b74f316"
Commit
621a459f
authored
Apr 25, 2023
by
Rostyslav Geyyer
Browse files
Update type_convert_precision -> bf16_convert_rtn
parent
5195dbbb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
12 deletions
+15
-12
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+6
-3
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
+1
-1
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+8
-8
No files found.
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
621a459f
...
...
@@ -97,22 +97,25 @@ struct ConvertBF16RTN
template
<
typename
Y
,
typename
X
>
__host__
__device__
void
operator
()(
Y
&
y
,
const
X
&
x
)
const
;
// convert fp16->bf16 using rounding to nearest (rtn) via fp32
template
<
>
__host__
__device__
void
operator
()
<
bhalf_t
,
half_t
>
(
bhalf_t
&
y
,
const
half_t
&
x
)
const
{
y
=
type
_convert_
precisio
n
<
bhalf_t
>
(
x
);
y
=
bf16
_convert_
rt
n
<
bhalf_t
>
(
x
);
}
// convert fp32->bf16 using rounding to nearest (rtn)
template
<
>
__host__
__device__
void
operator
()
<
bhalf_t
,
float
>
(
bhalf_t
&
y
,
const
float
&
x
)
const
{
y
=
type
_convert_
precisio
n
<
bhalf_t
>
(
x
);
y
=
bf16
_convert_
rt
n
<
bhalf_t
>
(
x
);
}
// need to keep this specialization for fp16->fp16 ops
template
<
>
__host__
__device__
void
operator
()
<
half_t
,
half_t
>
(
half_t
&
y
,
const
half_t
&
x
)
const
{
y
=
type_convert
_precision
<
half_t
>
(
x
);
y
=
type_convert
<
half_t
>
(
x
);
}
};
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
View file @
621a459f
...
...
@@ -339,7 +339,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1
}
static_ford
<
SliceLengths
>
{}([
&
](
auto
idx
)
{
// apply the src elementwise op and convert under the hood if needed
// apply the src elementwise op and convert
to DstData
under the hood if needed
DstData
dst_v
;
src_element_op_
(
dst_v
,
src_thread_scratch_tuple_
[
thread_scratch_id
][
idx
]);
dst_thread_scratch_
(
idx
)
=
dst_v
;
...
...
include/ck/utility/data_type.hpp
View file @
621a459f
...
...
@@ -1033,16 +1033,16 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_
// Convert X to Y with highest possible precision
template
<
typename
Y
,
typename
X
>
__host__
__device__
constexpr
Y
type
_convert_
precisio
n
(
X
x
)
{
static_assert
(
!
std
::
is_reference_v
<
Y
>
&&
!
std
::
is_reference_v
<
X
>
);
__host__
__device__
constexpr
Y
bf16
_convert_
rt
n
(
X
x
)
;
//
{
//
static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
return
static_cast
<
Y
>
(
x
);
}
//
return static_cast<Y>(x);
//
}
// Convert fp32 to bf16 with RTN if higher precision is needed
template
<
>
inline
__host__
__device__
constexpr
bhalf_t
type
_convert_
precisio
n
<
bhalf_t
,
float
>
(
float
x
)
inline
__host__
__device__
constexpr
bhalf_t
bf16
_convert_
rt
n
<
bhalf_t
,
float
>
(
float
x
)
{
union
{
...
...
@@ -1086,11 +1086,11 @@ inline __host__ __device__ constexpr bhalf_t type_convert_precision<bhalf_t, flo
// convert fp16 to bfp16 via fp32 with RTN if higher precision is needed
template
<
>
inline
__host__
__device__
constexpr
bhalf_t
type
_convert_
precisio
n
<
bhalf_t
,
half_t
>
(
half_t
x
)
inline
__host__
__device__
constexpr
bhalf_t
bf16
_convert_
rt
n
<
bhalf_t
,
half_t
>
(
half_t
x
)
{
float
x_fp32
=
static_cast
<
float
>
(
x
);
return
type
_convert_
precisio
n
<
bhalf_t
>
(
x_fp32
);
return
bf16
_convert_
rt
n
<
bhalf_t
>
(
x_fp32
);
}
template
<
typename
T
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment