Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
82e1f1b9
Commit
82e1f1b9
authored
Feb 13, 2025
by
coderfeli
Browse files
change cshuffle cluster, mi300x reach roofline
parent
568ad1e1
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
3 deletions
+7
-3
example/65_gemm_multiply_multiply/moe_gemm2.cpp
example/65_gemm_multiply_multiply/moe_gemm2.cpp
+7
-3
No files found.
example/65_gemm_multiply_multiply/moe_gemm2.cpp
View file @
82e1f1b9
...
...
@@ -122,13 +122,17 @@ using CDEElementOp = MulABScaleExpertWeight;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
static
constexpr
ck
::
index_t
MPerBlock
=
64
;
static
constexpr
ck
::
index_t
BLOCKSIZE
=
256
;
static
constexpr
ck
::
index_t
NPerBlock
=
128
;
static
constexpr
ck
::
index_t
MNPerXDL
=
32
;
static
constexpr
ck
::
index_t
KPerBlock
=
256
/
sizeof
(
A0DataType
);
static
constexpr
ck
::
index_t
MXDLPerWave
=
MPerBlock
/
32
;
//todo fix this constraint
static
constexpr
ck
::
index_t
CShuffleMXDLPerWave
=
MPerBlock
/
32
;
static
constexpr
ck
::
index_t
CShuffleNLane
=
NPerBlock
/
2
;
static
constexpr
ck
::
index_t
CShuffleMLane
=
BLOCKSIZE
/
CShuffleNLane
;
static
constexpr
ck
::
index_t
AK1
=
16
/
sizeof
(
A0DataType
);
static
constexpr
ck
::
index_t
BK1
=
16
/
sizeof
(
B0DataType
);
static
constexpr
ck
::
index_t
EVec
=
16
/
sizeof
(
EDataType
)
;
static
constexpr
ck
::
index_t
EVec
=
2
;
static
constexpr
ck
::
index_t
D0Vec
=
1
;
static
constexpr
ck
::
index_t
D1Vec
=
1
;
static
constexpr
ck
::
index_t
D2Vec
=
1
;
...
...
@@ -145,7 +149,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceMoeGemm
<
Row
,
Col
,
DsLayout
,
ELayout
,
A0DataType
,
B0DataType
,
DsDataType
,
EDataType
,
AccDataType
,
CShuffleDataType
,
AElementOp
,
BElementOp
,
CDEElementOp
,
GemmSpec
,
//threadnum, mblock, nblock, kblock
256
,
MPerBlock
,
128
,
KPerBlock
,
BLOCKSIZE
,
MPerBlock
,
NPerBlock
,
KPerBlock
,
// ak1, bk1
AK1
,
BK1
,
// mn_perxdl
...
...
@@ -160,7 +164,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceMoeGemm
// CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
// MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
// PerShuffle| PerShuffle| _NBlock_NWaveNPerXdl| _NWaveNPerXdl|
CShuffleMXDLPerWave
,
1
,
S
<
1
,
16
,
1
,
16
>
,
S
<
EVec
,
D0Vec
,
D1Vec
,
D2Vec
>
,
CShuffleMXDLPerWave
,
1
,
S
<
1
,
CShuffleMLane
,
1
,
CShuffleNLane
>
,
S
<
EVec
,
D0Vec
,
D1Vec
,
D2Vec
>
,
ck
::
BlockGemmPipelineScheduler
::
Intrawave
,
ck
::
BlockGemmPipelineVersion
::
v1
,
false
,
A0DataType
>
;
// kernel 2: 128->32x128x128
// < Row, Col, DsLayout, ELayout, A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType, AElementOp, BElementOp, CDEElementOp, GemmSpec, 128, 32, 128, 128, 16, 16, 32, 32, 1, 2, S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, 1, 1, S<1, 16, 1, 8>, S<8, 8, 1>, ck::BlockGemmPipelineScheduler::Interwave, ck::BlockGemmPipelineVersion::v1, EDataType>;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment