Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5f1d777b
"apps/life_sci/dgllife/model/pretrain.py" did not exist on "e590feeb629ef37b7575ec973bcb2a0e0451dcb5"
Commit
5f1d777b
authored
Mar 10, 2023
by
ltqin
Browse files
fix save bfloat16x4_t
parent
27d764eb
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
6 deletions
+10
-6
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+2
-1
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
+2
-2
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+6
-3
No files found.
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
5f1d777b
...
...
@@ -41,7 +41,8 @@ struct PassThrough
}
template
<
>
__host__
__device__
void
operator
()
<
bfloat16_t
,
bfloat16_t
>
(
bfloat16_t
&
y
,
const
bfloat16_t
&
x
)
const
__host__
__device__
void
operator
()
<
bfloat16_t
,
bfloat16_t
>
(
bfloat16_t
&
y
,
const
bfloat16_t
&
x
)
const
{
y
=
x
;
}
...
...
include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
View file @
5f1d777b
...
...
@@ -544,7 +544,6 @@ struct MfmaSelector
#endif
}
template
<
>
static
constexpr
auto
GetMfma
<
int8_t
,
32
,
32
>
()
{
...
...
@@ -756,7 +755,8 @@ struct XdlopsGemm
__device__
void
Run
(
const
FloatA
&
p_a_wave
,
const
FloatB
&
p_b_wave
,
FloatC
&
p_c_thread
)
const
{
static_assert
(
is_same
<
base_type
,
double
>::
value
||
is_same
<
base_type
,
float
>::
value
||
is_same
<
base_type
,
half_t
>::
value
||
is_same
<
base_type
,
bhalf_t
>::
value
||
is_same
<
base_type
,
bfloat16_t
>::
value
||
is_same
<
base_type
,
half_t
>::
value
||
is_same
<
base_type
,
bhalf_t
>::
value
||
is_same
<
base_type
,
bfloat16_t
>::
value
||
is_same
<
base_type
,
int8_t
>::
value
,
"base base_type must be double, float, half, bfloat16, and int8_t!"
);
...
...
include/ck/utility/amd_buffer_addressing.hpp
View file @
5f1d777b
...
...
@@ -424,18 +424,21 @@ __device__ typename vector_type<T, N>::type amd_buffer_load_impl(int32x4_t src_w
{
if
constexpr
(
N
==
1
)
{
return
llvm_amdgcn_raw_buffer_load_i16
(
auto
tmp
=
llvm_amdgcn_raw_buffer_load_i16
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
return
bit_cast
<
bfloat16_t
>
(
tmp
);
}
else
if
constexpr
(
N
==
2
)
{
return
llvm_amdgcn_raw_buffer_load_i16x2
(
auto
tmp
=
llvm_amdgcn_raw_buffer_load_i16x2
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
return
bit_cast
<
bfloat16x2_t
>
(
tmp
);
}
else
if
constexpr
(
N
==
4
)
{
return
llvm_amdgcn_raw_buffer_load_i16x4
(
auto
tmp
=
llvm_amdgcn_raw_buffer_load_i16x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
return
bit_cast
<
bfloat16x4_t
>
(
tmp
);
}
else
if
constexpr
(
N
==
8
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment