Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
05830053
"driver/include/host_tensor.hpp" did not exist on "acd7082fe109aa4228dfca652e87cab96bc6837f"
Commit
05830053
authored
Mar 23, 2023
by
aska-0096
Browse files
Bug found, intra-row permute off caused
parent
dc8309db
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
7 deletions
+11
-7
example/01_gemm/gemm_wmma_fp16.cpp
example/01_gemm/gemm_wmma_fp16.cpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp
.../ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp
+9
-5
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
+0
-1
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+1
-0
No files found.
example/01_gemm/gemm_wmma_fp16.cpp
View file @
05830053
...
@@ -60,7 +60,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
...
@@ -60,7 +60,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
true
,
true
,
4
,
// C shuffle (M Repeat) Per store
4
,
// C shuffle (M Repeat) Per store
1
,
// C shuffle (N Repeat) Per store
1
,
// C shuffle (N Repeat) Per store
S
<
1
,
32
,
1
,
8
>
,
S
<
1
,
16
,
1
,
16
>
,
8
>
;
8
>
;
// clang-format on
// clang-format on
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp
View file @
05830053
...
@@ -87,11 +87,15 @@ struct DeviceGemmWmma_CShuffle : public DeviceGemm<ALayout,
...
@@ -87,11 +87,15 @@ struct DeviceGemmWmma_CShuffle : public DeviceGemm<ALayout,
static
constexpr
auto
NWaves
=
NPerBlock
/
(
NRepeat
*
NPerWmma
);
static
constexpr
auto
NWaves
=
NPerBlock
/
(
NRepeat
*
NPerWmma
);
static
constexpr
auto
WmmaK
=
16
;
static
constexpr
auto
WmmaK
=
16
;
static
constexpr
auto
AEnableLds
=
NWaves
==
1
?
false
:
true
;
static
constexpr
auto
AEnableLds_auto
=
NWaves
==
1
?
false
:
true
;
static
constexpr
auto
BEnableLds
=
MWaves
==
1
?
false
:
true
;
static
constexpr
auto
BEnableLds_auto
=
MWaves
==
1
?
false
:
true
;
// Unconditional enable double side LDS if uncommented following
// AEnableLds = true;
// If true, LDS is used unconditionally
// BEnableLds = true;
static
constexpr
auto
AEnableLds_manu
=
false
;
static
constexpr
auto
BEnableLds_manu
=
false
;
static
constexpr
auto
AEnableLds
=
AEnableLds_auto
||
AEnableLds_manu
;
static
constexpr
auto
BEnableLds
=
BEnableLds_auto
||
BEnableLds_manu
;
static
constexpr
auto
matrix_padder
=
static
constexpr
auto
matrix_padder
=
MatrixPadder
<
GemmSpec
,
index_t
,
index_t
,
index_t
>
{
MPerBlock
,
NPerBlock
,
KPerBlock
};
MatrixPadder
<
GemmSpec
,
index_t
,
index_t
,
index_t
>
{
MPerBlock
,
NPerBlock
,
KPerBlock
};
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
View file @
05830053
...
@@ -360,7 +360,6 @@ struct GridwiseGemmPipeline_v1<1, false, true>
...
@@ -360,7 +360,6 @@ struct GridwiseGemmPipeline_v1<1, false, true>
}
}
};
};
// placeholder
template
<
>
template
<
>
struct
GridwiseGemmPipeline_v1
<
1
,
true
,
false
>
struct
GridwiseGemmPipeline_v1
<
1
,
true
,
false
>
{
{
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
05830053
...
@@ -1401,6 +1401,7 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
...
@@ -1401,6 +1401,7 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
// apply element-wise operation
// apply element-wise operation
element_op_
(
v_this_row
,
src_buf
[
Number
<
src_offset
>
{}]);
element_op_
(
v_this_row
,
src_buf
[
Number
<
src_offset
>
{}]);
// apply intra-row permute.
if
constexpr
(
IntraRowSwizzlePerm
)
if
constexpr
(
IntraRowSwizzlePerm
)
{
{
temp
=
__builtin_amdgcn_permlane16
(
temp
=
__builtin_amdgcn_permlane16
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment