Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
2662f8e5
Commit
2662f8e5
authored
Mar 10, 2021
by
root
Browse files
fixed naive
parent
61a1c170
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
5 deletions
+5
-5
composable_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
...ble_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
+5
-5
No files found.
composable_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
View file @
2662f8e5
...
@@ -130,13 +130,13 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -130,13 +130,13 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
// thread A, B for GEMM
// thread A, B for GEMM
constexpr
auto
a_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
constexpr
auto
a_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
Number
<
KPerThreadLoop
>
{},
Number
<
MPerThread
>
{});
make_tuple
(
Number
<
KPerThreadLoop
>
{},
Number
<
MPerThread
>
{})
)
;
constexpr
auto
b_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
constexpr
auto
b_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
Number
<
KPerThreadLoop
>
{},
Number
<
NPerThread
>
{});
make_tuple
(
Number
<
KPerThreadLoop
>
{},
Number
<
NPerThread
>
{})
)
;
FloatA
p_a_thread
[
a_thread_mtx
.
GetElementSpace
()];
FloatA
p_a_thread
[
a_thread_mtx
.
GetElementSpace
Size
()];
FloatB
p_b_thread
[
b_thread_mtx
.
GetElementSpace
()];
FloatB
p_b_thread
[
b_thread_mtx
.
GetElementSpace
Size
()];
constexpr
auto
a_thread_copy
=
ThreadwiseMatrixSliceCopy_v3
<
BlockMatrixA
,
constexpr
auto
a_thread_copy
=
ThreadwiseMatrixSliceCopy_v3
<
BlockMatrixA
,
decltype
(
a_thread_mtx
),
decltype
(
a_thread_mtx
),
...
@@ -342,7 +342,7 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -342,7 +342,7 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
template
<
typename
FloatA
,
typename
FloatB
,
typename
FloatC
>
template
<
typename
FloatA
,
typename
FloatB
,
typename
FloatC
>
__device__
void
Run
(
const
FloatA
*
p_a_block
,
const
FloatB
*
p_b_block
,
FloatC
*
p_c_thread
)
const
__device__
void
Run
(
const
FloatA
*
p_a_block
,
const
FloatB
*
p_b_block
,
FloatC
*
p_c_thread
)
const
{
{
#if
CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE
#if
0
constexpr auto I0 = Number<0>{};
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
constexpr auto I1 = Number<1>{};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment