Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
683e2596
Commit
683e2596
authored
Mar 18, 2021
by
root
Browse files
rename
parent
8fb97941
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
8 deletions
+8
-8
composable_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
...ble_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
+8
-8
No files found.
composable_kernel/include/tensor_operation/blockwise_gemm_v3.hpp
View file @
683e2596
...
@@ -18,7 +18,7 @@ template <index_t BlockSize,
...
@@ -18,7 +18,7 @@ template <index_t BlockSize,
index_t
KPerThread
,
index_t
KPerThread
,
index_t
HPerThread
,
index_t
HPerThread
,
index_t
WPerThread
,
index_t
WPerThread
,
index_t
CYX
PerThreadLoop
,
index_t
E
PerThreadLoop
,
index_t
ThreadGemmADataPerRead_K
,
index_t
ThreadGemmADataPerRead_K
,
index_t
ThreadGemmBDataPerRead_W
>
index_t
ThreadGemmBDataPerRead_W
>
struct
BlockwiseGemm_km_kn_m0m1n0n1_v3
struct
BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -130,14 +130,14 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -130,14 +130,14 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
constexpr
auto
a_block_mtx
=
BlockMatrixA
{};
constexpr
auto
a_block_mtx
=
BlockMatrixA
{};
constexpr
auto
CYX
PerBlock
=
a_block_mtx
.
GetLength
(
I0
);
constexpr
auto
E
PerBlock
=
a_block_mtx
.
GetLength
(
I0
);
// thread A, B for GEMM
// thread A, B for GEMM
constexpr
auto
a_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
constexpr
auto
a_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
Number
<
CYX
PerThreadLoop
>
{},
Number
<
KPerThread
>
{}));
make_tuple
(
Number
<
E
PerThreadLoop
>
{},
Number
<
KPerThread
>
{}));
constexpr
auto
b_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
constexpr
auto
b_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
Number
<
CYX
PerThreadLoop
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
Number
<
E
PerThreadLoop
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
constexpr
auto
c_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
constexpr
auto
c_thread_mtx
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
Number
<
KPerThread
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
Number
<
KPerThread
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
...
@@ -146,7 +146,7 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -146,7 +146,7 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
constexpr
auto
a_thread_copy
=
ThreadwiseSliceCopy_a
<
BlockMatrixA
,
constexpr
auto
a_thread_copy
=
ThreadwiseSliceCopy_a
<
BlockMatrixA
,
decltype
(
a_thread_mtx
),
decltype
(
a_thread_mtx
),
CYX
PerThreadLoop
,
E
PerThreadLoop
,
KPerThread
,
KPerThread
,
ThreadGemmADataPerRead_K
>
{};
ThreadGemmADataPerRead_K
>
{};
...
@@ -155,15 +155,15 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
...
@@ -155,15 +155,15 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
decltype
(
c_thread_mtx
)
>
{};
decltype
(
c_thread_mtx
)
>
{};
// loop over k
// loop over k
#pragma unroll
#pragma unroll
for
(
index_t
cyx
_begin
=
0
;
cyx
_begin
<
CYX
PerBlock
;
cyx
_begin
+=
CYX
PerThreadLoop
)
for
(
index_t
e
_begin
=
0
;
e
_begin
<
E
PerBlock
;
e
_begin
+=
E
PerThreadLoop
)
{
{
a_thread_copy
.
Run
(
p_a_block
+
a_block_mtx
.
CalculateOffset
(
make_tuple
(
cyx
_begin
,
0
))
+
a_thread_copy
.
Run
(
p_a_block
+
a_block_mtx
.
CalculateOffset
(
make_tuple
(
e
_begin
,
0
))
+
mMyThreadOffsetA
,
mMyThreadOffsetA
,
p_a_thread
);
p_a_thread
);
threadwise_gemm
.
Run
(
p_a_thread
,
threadwise_gemm
.
Run
(
p_a_thread
,
p_b_thread
+
p_b_thread
+
b_thread_mtx
.
CalculateOffset
(
make_tuple
(
cyx
_begin
,
0
,
0
,
0
)),
b_thread_mtx
.
CalculateOffset
(
make_tuple
(
e
_begin
,
0
,
0
,
0
)),
p_c_thread
);
p_c_thread
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment