Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c0e2c3df
"driver/driver.cpp" did not exist on "5e77650415119376712fbe7aefb2f3922af021db"
Commit
c0e2c3df
authored
Jun 09, 2022
by
ltqin
Browse files
change read order
parent
9707178f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
13 deletions
+14
-13
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp
...operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp
+12
-12
include/ck/utility/synchronization.hpp
include/ck/utility/synchronization.hpp
+2
-1
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp
View file @
c0e2c3df
...
...
@@ -113,7 +113,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
static
constexpr
auto
I7
=
Number
<
7
>
{};
static
constexpr
auto
BaseMultK0
=
4
;
static
constexpr
auto
MultiK0
=
BaseMultK0
*
1
;
static
constexpr
auto
MultiK0
=
BaseMultK0
*
2
;
// K1 should be Number<...>
static
constexpr
auto
K1
=
Number
<
K1Value
>
{};
...
...
@@ -571,11 +571,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
b_thread_3rd_buf
);
b_threadwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_thread_slice_copy_step
);
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_1st_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
s_nop
();
// 2nd
b_threadwise_copy
.
Run
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_grid_buf
,
b_thread_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
...
...
@@ -583,10 +578,15 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
b_thread_4th_buf
);
b_threadwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_thread_slice_copy_step
);
s_nop
();
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_1st_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
// 2nd
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_2nd_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
s_nop
();
// 3rd
b_threadwise_copy
.
Run
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_grid_buf
,
...
...
@@ -595,11 +595,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
b_thread_1st_buf
);
b_threadwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_thread_slice_copy_step
);
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_3rd_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
s_nop
();
// 4th
b_threadwise_copy
.
Run
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_grid_buf
,
b_thread_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
...
...
@@ -607,6 +602,11 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
b_thread_2nd_buf
);
b_threadwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_k1_k2_n0_n1_n2_n3_k3
,
b_thread_slice_copy_step
);
s_nop
();
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_3rd_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
// 4th
blockwise_gemm
.
Run
(
a_block_buf
,
b_thread_4th_buf
,
c_thread_buf
);
blockwise_gemm
.
MoveABlockSliceWindow
();
...
...
include/ck/utility/synchronization.hpp
View file @
c0e2c3df
...
...
@@ -16,7 +16,8 @@ __device__ void block_sync_lds()
__syncthreads
();
#endif
}
__device__
void
s_nop
(){
__device__
void
s_nop
()
{
asm
volatile
(
"\
s_nop 0
\n
\
"
::
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment