Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
afa31621
Unverified
Commit
afa31621
authored
May 31, 2023
by
Po Yen Chen
Committed by
GitHub
May 31, 2023
Browse files
Merge branch 'develop' into feature/support-readfirstlane-for-object-types
parents
1001c731
6eef0755
Changes
45
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
665 additions
and
69 deletions
+665
-69
test/grouped_gemm/grouped_gemm_fp16.cpp
test/grouped_gemm/grouped_gemm_fp16.cpp
+0
-69
test/grouped_gemm/test_grouped_gemm_interface.cpp
test/grouped_gemm/test_grouped_gemm_interface.cpp
+202
-0
test/grouped_gemm/test_grouped_gemm_splitk.cpp
test/grouped_gemm/test_grouped_gemm_splitk.cpp
+34
-0
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
+180
-0
test/grouped_gemm/test_grouped_gemm_util.hpp
test/grouped_gemm/test_grouped_gemm_util.hpp
+249
-0
No files found.
test/grouped_gemm/grouped_gemm_fp16.cpp
deleted
100644 → 0
View file @
1001c731
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <random>
#include "profiler/profile_grouped_gemm_impl.hpp"
namespace
{
using
ADataType
=
ck
::
half_t
;
using
BDataType
=
ck
::
half_t
;
using
CDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
bool
TestGroupedGemm
()
{
std
::
mt19937
gen
(
19391
);
std
::
uniform_int_distribution
<>
distrib
(
1
,
10
);
int
group_count
=
distrib
(
gen
);
// GEMM shape
std
::
vector
<
ck
::
tensor_operation
::
device
::
GemmDesc
>
gemm_descs
;
std
::
vector
<
const
void
*>
p_a
,
p_b
;
std
::
vector
<
void
*>
p_c
;
std
::
vector
<
int
>
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
;
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
{
Ms
.
push_back
(
256
+
256
*
distrib
(
gen
));
Ns
.
push_back
(
256
+
256
*
distrib
(
gen
));
Ks
.
push_back
(
128
+
128
*
distrib
(
gen
));
StrideAs
.
push_back
(
std
::
is_same
<
Row
,
ALayout
>::
value
?
Ks
[
i
]
:
Ms
[
i
]);
StrideBs
.
push_back
(
std
::
is_same
<
Row
,
BLayout
>::
value
?
Ns
[
i
]
:
Ks
[
i
]);
StrideCs
.
push_back
(
std
::
is_same
<
Row
,
CLayout
>::
value
?
Ns
[
i
]
:
Ms
[
i
]);
}
return
ck
::
profiler
::
profile_grouped_gemm_impl
<
ADataType
,
BDataType
,
CDataType
,
AccDataType
,
ALayout
,
BLayout
,
CLayout
>
(
true
,
1
,
false
,
1
,
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
);
}
}
// anonymous namespace
int
main
()
{
bool
res
=
true
;
res
=
res
&&
TestGroupedGemm
<
Row
,
Row
,
Row
>
();
res
=
res
&&
TestGroupedGemm
<
Row
,
Col
,
Row
>
();
res
=
res
&&
TestGroupedGemm
<
Col
,
Row
,
Row
>
();
res
=
res
&&
TestGroupedGemm
<
Col
,
Col
,
Row
>
();
std
::
cout
<<
"TestGroupedGemm ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
?
0
:
1
;
}
test/grouped_gemm/test_grouped_gemm_interface.cpp
0 → 100644
View file @
afa31621
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "test_grouped_gemm_util.hpp"
class
TestGGemmSplitKInterface_MKNKMN
:
public
::
testing
::
Test
{
protected:
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
ALayout
=
Row
;
using
BLayout
=
Col
;
using
ELayout
=
Row
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
template
<
ck
::
tensor_operation
::
device
::
GemmSpecialization
GemmSpec
,
ck
::
index_t
KPerBlock
,
ck
::
index_t
K1
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
CDEBlockTransferScalarPerVector_NPerBlock
>
using
GGemmInstance
=
ck
::
test
::
DeviceGroupedGemmSplitkInstanceWrapper
<
ALayout
,
BLayout
,
ELayout
,
GemmSpec
,
KPerBlock
,
K1
,
ABlockTransferSrcScalarPerVector
,
BBlockTransferSrcScalarPerVector
,
CDEBlockTransferScalarPerVector_NPerBlock
>
;
using
DefaultGGemmInstance
=
GGemmInstance
<
GemmDefault
,
32
,
8
,
4
,
8
,
8
>
;
};
TEST_F
(
TestGGemmSplitKInterface_MKNKMN
,
TileSize
)
{
std
::
vector
<
int
>
Ms
{
128
,
256
,
188
,
512
};
constexpr
int
N
=
256
;
constexpr
int
K
=
128
;
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
// M % MPerBlock
EXPECT_FALSE
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ms
=
std
::
vector
<
int
>
{
256
,
128
,
128
,
512
};
Ns
=
std
::
vector
<
int
>
{
256
,
177
,
128
,
512
};
// N % NPerBlock
EXPECT_FALSE
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
}
TEST_F
(
TestGGemmSplitKInterface_MKNKMN
,
VectorLoadWidth
)
{
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
using
PaddedGGemmInstance
=
GGemmInstance
<
GemmMNKPadding
,
32
,
8
,
4
,
8
,
8
>
;
std
::
vector
<
int
>
Ms
{
128
,
256
,
256
,
512
};
constexpr
int
N
=
256
;
constexpr
int
K
=
512
;
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
// K % ABlockTransferSrcScalarPerVector
Ks
=
std
::
vector
<
int
>
{
256
,
177
,
128
,
512
};
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ks
=
std
::
vector
<
int
>
{
256
,
164
,
128
,
512
};
// K % BBlockTransferSrcScalarPerVector
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ks
=
std
::
vector
<
int
>
(
4
,
128
);
Ns
=
std
::
vector
<
int
>
{
256
,
127
,
128
,
512
};
// N % CBlockTransferScalarPerVector_NWaveNPerXDL
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
}
TEST_F
(
TestGGemmSplitKInterface_MKNKMN
,
KLoops
)
{
std
::
vector
<
int
>
Ms
{
128
,
256
,
256
,
512
};
constexpr
int
N
=
256
;
constexpr
int
K
=
128
;
constexpr
int
kbatch
=
4
;
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
// kloops % 2
Ks
=
std
::
vector
<
int
>
{
256
,
512
,
320
,
768
};
EXPECT_FALSE
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
));
// Not all gemms have same value for main_k0_block_loop!
Ks
=
std
::
vector
<
int
>
{
256
,
512
,
512
,
512
};
EXPECT_THROW
(
DefaultGGemmInstance
{}.
Run
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
,
kbatch
),
std
::
runtime_error
);
}
class
TestGGemmSplitKInterface_KMKNNM
:
public
::
testing
::
Test
{
protected:
using
Row
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
ALayout
=
Col
;
using
BLayout
=
Row
;
using
ELayout
=
Col
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
template
<
ck
::
tensor_operation
::
device
::
GemmSpecialization
GemmSpec
,
ck
::
index_t
KPerBlock
,
ck
::
index_t
K1
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
CDEBlockTransferScalarPerVector_NPerBlock
>
using
GGemmInstance
=
ck
::
test
::
DeviceGroupedGemmSplitkInstanceWrapper
<
ALayout
,
BLayout
,
ELayout
,
GemmSpec
,
KPerBlock
,
K1
,
ABlockTransferSrcScalarPerVector
,
BBlockTransferSrcScalarPerVector
,
CDEBlockTransferScalarPerVector_NPerBlock
>
;
using
DefaultGGemmInstance
=
GGemmInstance
<
GemmDefault
,
32
,
8
,
4
,
8
,
4
>
;
};
TEST_F
(
TestGGemmSplitKInterface_KMKNNM
,
TileSize
)
{
std
::
vector
<
int
>
Ms
{
128
,
256
,
188
,
512
};
constexpr
int
N
=
256
;
constexpr
int
K
=
128
;
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
// M % MPerBlock
EXPECT_FALSE
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ms
=
std
::
vector
<
int
>
{
128
,
256
,
256
,
512
};
Ns
=
std
::
vector
<
int
>
{
256
,
177
,
128
,
512
};
// N % NPerBlock
EXPECT_FALSE
(
DefaultGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
}
TEST_F
(
TestGGemmSplitKInterface_KMKNNM
,
VectorLoadWidth
)
{
static
constexpr
auto
GemmMNKPadding
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
using
PaddedGGemmInstance
=
GGemmInstance
<
GemmMNKPadding
,
32
,
8
,
2
,
8
,
4
>
;
std
::
vector
<
int
>
Ms
{
128
,
256
,
256
,
512
};
constexpr
int
N
=
256
;
constexpr
int
K
=
512
;
std
::
vector
<
int
>
Ns
(
Ms
.
size
(),
N
);
std
::
vector
<
int
>
Ks
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideAs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideBs
(
Ms
.
size
(),
K
);
std
::
vector
<
int
>
StrideCs
(
Ms
.
size
(),
N
);
// M % ABlockTransferSrcScalarPerVector
Ms
=
std
::
vector
<
int
>
{
256
,
177
,
128
,
512
};
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ms
=
std
::
vector
<
int
>
{
128
,
256
,
256
,
512
};
Ns
=
std
::
vector
<
int
>
{
256
,
164
,
128
,
512
};
// N % BBlockTransferSrcScalarPerVector
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
Ns
=
std
::
vector
<
int
>
{
128
,
256
,
256
,
512
};
Ms
=
std
::
vector
<
int
>
{
256
,
130
,
128
,
512
};
// M % CBlockTransferScalarPerVector_NWaveNPerXDL
EXPECT_FALSE
(
PaddedGGemmInstance
{}.
IsSupported
(
Ms
,
Ns
,
Ks
,
StrideAs
,
StrideBs
,
StrideCs
));
}
test/grouped_gemm/test_grouped_gemm_splitk.cpp
0 → 100644
View file @
afa31621
This diff is collapsed.
Click to expand it.
test/grouped_gemm/test_grouped_gemm_ut_cases.inc
0 → 100644
View file @
afa31621
This diff is collapsed.
Click to expand it.
test/grouped_gemm/test_grouped_gemm_util.hpp
0 → 100644
View file @
afa31621
This diff is collapsed.
Click to expand it.
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment