Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
eb99b233
Commit
eb99b233
authored
Oct 13, 2023
by
Bartlomiej Kocot
Browse files
Fix instances dtype check
parent
a3c80265
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
12 deletions
+20
-12
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+8
-0
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+12
-12
No files found.
include/ck/utility/amd_buffer_addressing.hpp
View file @
eb99b233
...
@@ -415,8 +415,12 @@ __device__ typename vector_type<T, N>::type amd_buffer_load_impl(int32x4_t src_w
...
@@ -415,8 +415,12 @@ __device__ typename vector_type<T, N>::type amd_buffer_load_impl(int32x4_t src_w
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bhalf_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bhalf_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
int32_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
int32_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#if defined CK_ENABLE_FP8
(
is_same
<
T
,
f8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
f8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#endif
#if defined CK_ENABLE_BF8
(
is_same
<
T
,
bf8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bf8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#endif
(
is_same
<
T
,
int8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
)),
(
is_same
<
T
,
int8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
)),
"wrong! not implemented"
);
"wrong! not implemented"
);
...
@@ -537,8 +541,12 @@ __device__ void amd_buffer_store_impl(const typename vector_type<T, N>::type src
...
@@ -537,8 +541,12 @@ __device__ void amd_buffer_store_impl(const typename vector_type<T, N>::type src
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bhalf_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bhalf_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
int32_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
int32_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#if defined CK_ENABLE_FP8
(
is_same
<
T
,
f8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
f8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#endif
#if defined CK_ENABLE_BF8
(
is_same
<
T
,
bf8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
(
is_same
<
T
,
bf8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
))
||
#endif
(
is_same
<
T
,
int8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
)),
(
is_same
<
T
,
int8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
||
N
==
16
)),
"wrong! not implemented"
);
"wrong! not implemented"
);
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
eb99b233
...
@@ -87,18 +87,18 @@ IF(IS_DIRECTORY "${subdir_path}")
...
@@ -87,18 +87,18 @@ IF(IS_DIRECTORY "${subdir_path}")
message
(
"int8 instance found!"
)
message
(
"int8 instance found!"
)
set
(
add_inst 1
)
set
(
add_inst 1
)
endif
()
endif
()
if
(
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp8"
OR
if
(
(
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp8"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_f8"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_f8"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp16"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp16"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_f16"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_f16"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp32"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp32"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_f32"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_f32"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp64"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_fp64"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_f64"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_f64"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_bf16"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_bf16"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_int8"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_int8"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_i8"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_i8"
AND
NOT
"
${
cmake_instance
}
"
MATCHES
"_int4"
OR
NOT
"
${
cmake_instance
}
"
MATCHES
"_int4"
)
OR
NOT DEFINED DTYPES
)
NOT DEFINED DTYPES
)
message
(
"instance should be built for all types!"
)
message
(
"instance should be built for all types!"
)
set
(
add_inst 1
)
set
(
add_inst 1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment