Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
84b4ada5
Commit
84b4ada5
authored
Feb 27, 2023
by
aska-0096
Browse files
gemm sanity fix
parent
6a9d7b64
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
14 deletions
+14
-14
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
+1
-1
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+13
-13
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
View file @
84b4ada5
...
@@ -343,7 +343,7 @@ struct GridwiseGemmPipeline_v1<1, false, true>
...
@@ -343,7 +343,7 @@ struct GridwiseGemmPipeline_v1<1, false, true>
b_blockwise_copy
.
RunWrite
(
b_block_desc
,
b_block_buf
);
b_blockwise_copy
.
RunWrite
(
b_block_desc
,
b_block_buf
);
//
a_block_buf = a_block_buf_switch;
a_block_buf
=
a_block_buf_switch
;
++
i
;
++
i
;
}
while
(
i
<
(
num_loop
-
1
));
}
while
(
i
<
(
num_loop
-
1
));
}
}
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
84b4ada5
...
@@ -1398,23 +1398,23 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
...
@@ -1398,23 +1398,23 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
if
constexpr
(
IntraRowSwizzlePerm
)
if
constexpr
(
IntraRowSwizzlePerm
)
{
{
//
temp = __builtin_amdgcn_permlane16(
temp
=
__builtin_amdgcn_permlane16
(
//
temp,
temp
,
//
type_convert<int>(v_this_row),
type_convert
<
int
>
(
v_this_row
),
//
0xb3a29180,
0xb3a29180
,
//
0xf7e6d5c4,
0xf7e6d5c4
,
//
1,
1
,
//
0);
0
);
v_this_row
=
type_convert
<
SrcData
>
(
temp
);
v_this_row
=
type_convert
<
SrcData
>
(
temp
);
}
}
// apply inter-row permute.
// apply inter-row permute.
//
temp = __builtin_amdgcn_permlanex16(temp,
temp
=
__builtin_amdgcn_permlanex16
(
temp
,
//
type_convert<int>(v_this_row),
type_convert
<
int
>
(
v_this_row
),
//
LowEightRowlaneIdx,
LowEightRowlaneIdx
,
//
HighEightRowLaneIdx,
HighEightRowLaneIdx
,
//
1,
1
,
//
0);
0
);
v_theother_row
=
type_convert
<
SrcData
>
(
temp
);
v_theother_row
=
type_convert
<
SrcData
>
(
temp
);
if
(
get_thread_local_1d_id
()
%
32
<
16
)
if
(
get_thread_local_1d_id
()
%
32
<
16
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment