Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9d16e558
Commit
9d16e558
authored
Mar 23, 2023
by
aska-0096
Browse files
confirmed compiler sanity when skip A-LDS
parent
a3b86965
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
11 deletions
+11
-11
example/01_gemm/gemm_wmma_fp16.cpp
example/01_gemm/gemm_wmma_fp16.cpp
+7
-7
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
+4
-4
No files found.
example/01_gemm/gemm_wmma_fp16.cpp
View file @
9d16e558
...
@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
...
@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
CElementOp
,
CElementOp
,
GemmDefault
,
GemmDefault
,
256
,
// BlockSize
256
,
// BlockSize
128
,
// MPerBlock
256
,
// MPerBlock
128
,
// NPerBlock
32
,
// NPerBlock
32
,
// KPerBlock
32
,
// KPerBlock
8
,
// K1
8
,
// K1
16
,
// MPerWmma
16
,
// MPerWmma
16
,
// NPerWmma
16
,
// NPerWmma
1
,
// M Repeat
4
,
// M Repeat
8
,
// N-Repeat
1
,
// N-Repeat
S
<
4
,
64
,
1
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
...
@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
...
@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
8
,
8
,
8
,
8
,
true
,
true
,
S
<
4
,
64
,
1
>
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
2
,
8
,
8
,
8
,
8
,
true
,
true
,
1
,
// C shuffle (M Repeat) Per store
4
,
// C shuffle (M Repeat) Per store
4
,
// C shuffle (N Repeat) Per store
1
,
// C shuffle (N Repeat) Per store
S
<
1
,
64
,
1
,
4
>
,
S
<
1
,
64
,
1
,
4
>
,
8
>
;
8
>
;
// clang-format on
// clang-format on
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
View file @
9d16e558
...
@@ -47,7 +47,7 @@ __global__ void
...
@@ -47,7 +47,7 @@ __global__ void
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__))
defined(__gfx1102__))
__shared__
char
p_shared
[
GridwiseGemm
::
Get
SharedMem
oryNumberOfByte
()
];
__shared__
char
p_shared
[
GridwiseGemm
::
SharedMem
Trait
::
lds_size
];
GridwiseGemm
::
template
Run
<
HasMainKBlockLoop
>(
p_a_grid
,
GridwiseGemm
::
template
Run
<
HasMainKBlockLoop
>(
p_a_grid
,
p_b_grid
,
p_b_grid
,
...
@@ -130,9 +130,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
...
@@ -130,9 +130,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
static
constexpr
auto
I5
=
Number
<
5
>
{};
static
constexpr
auto
I5
=
Number
<
5
>
{};
static
constexpr
auto
I6
=
Number
<
6
>
{};
static
constexpr
auto
I6
=
Number
<
6
>
{};
static
constexpr
auto
I7
=
Number
<
7
>
{};
static
constexpr
auto
I7
=
Number
<
7
>
{};
static
constexpr
auto
B_K0
=
BGridDesc_K0_N_K1
{}.
GetLength
(
I0
);
static
constexpr
auto
B_K1
=
BGridDesc_K0_N_K1
{}.
GetLength
(
I2
);
// FIX ME: To be deprecated
// FIX ME: To be deprecated
static
constexpr
auto
K1
=
Number
<
K1Value
>
{};
static
constexpr
auto
K1
=
Number
<
K1Value
>
{};
...
@@ -299,6 +297,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
...
@@ -299,6 +297,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
__host__
__device__
static
constexpr
auto
__host__
__device__
static
constexpr
auto
MakeBBlockDescriptor_K0_N0_N1_N2_K1
(
const
BBlockDesc_BK0_N_BK1
&
)
MakeBBlockDescriptor_K0_N0_N1_N2_K1
(
const
BBlockDesc_BK0_N_BK1
&
)
{
{
constexpr
auto
B_K0
=
KPerBlock
/
K1
;
constexpr
auto
B_K1
=
K1
;
return
transform_tensor_descriptor
(
return
transform_tensor_descriptor
(
BBlockDesc_BK0_N_BK1
{},
BBlockDesc_BK0_N_BK1
{},
make_tuple
(
make_pass_through_transform
(
Number
<
B_K0
>
{}),
make_tuple
(
make_pass_through_transform
(
Number
<
B_K0
>
{}),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment