test_grouped_gemm_interface.cpp 6.81 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "test_grouped_gemm_util.hpp"

Jing Zhang's avatar
Jing Zhang committed
13
class TestGGemmInterface_MKNKMN : public ::testing::Test
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
    protected:
    using Row = ck::tensor_layout::gemm::RowMajor;
    using Col = ck::tensor_layout::gemm::ColumnMajor;

    using ALayout = Row;
    using BLayout = Col;
    using ELayout = Row;

    static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;

    template <ck::tensor_operation::device::GemmSpecialization GemmSpec,
              ck::index_t KPerBlock,
              ck::index_t K1,
              ck::index_t ABlockTransferSrcScalarPerVector,
              ck::index_t BBlockTransferSrcScalarPerVector,
              ck::index_t CDEBlockTransferScalarPerVector_NPerBlock>
    using GGemmInstance =
Jing Zhang's avatar
Jing Zhang committed
32
33
34
35
36
37
38
39
40
        ck::test::DeviceGroupedGemmInstanceWrapper<ALayout,
                                                   BLayout,
                                                   ELayout,
                                                   GemmSpec,
                                                   KPerBlock,
                                                   K1,
                                                   ABlockTransferSrcScalarPerVector,
                                                   BBlockTransferSrcScalarPerVector,
                                                   CDEBlockTransferScalarPerVector_NPerBlock>;
41
42
43
44

    using DefaultGGemmInstance = GGemmInstance<GemmDefault, 32, 8, 4, 8, 8>;
};

Jing Zhang's avatar
Jing Zhang committed
45
TEST_F(TestGGemmInterface_MKNKMN, TileSize)
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{
    std::vector<int> Ms{128, 256, 188, 512};
    constexpr int N = 256;
    constexpr int K = 128;

    std::vector<int> Ns(Ms.size(), N);
    std::vector<int> Ks(Ms.size(), K);
    std::vector<int> StrideAs(Ms.size(), K);
    std::vector<int> StrideBs(Ms.size(), K);
    std::vector<int> StrideCs(Ms.size(), N);

    // M % MPerBlock
    EXPECT_FALSE(DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ms = std::vector<int>{256, 128, 128, 512};
    Ns = std::vector<int>{256, 177, 128, 512};
    // N % NPerBlock
    EXPECT_FALSE(DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));
}

Jing Zhang's avatar
Jing Zhang committed
66
TEST_F(TestGGemmInterface_MKNKMN, VectorLoadWidth)
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
    static constexpr auto GemmMNKPadding =
        ck::tensor_operation::device::GemmSpecialization::MNKPadding;
    using PaddedGGemmInstance = GGemmInstance<GemmMNKPadding, 32, 8, 4, 8, 8>;

    std::vector<int> Ms{128, 256, 256, 512};
    constexpr int N = 256;
    constexpr int K = 512;

    std::vector<int> Ns(Ms.size(), N);
    std::vector<int> Ks(Ms.size(), K);
    std::vector<int> StrideAs(Ms.size(), K);
    std::vector<int> StrideBs(Ms.size(), K);
    std::vector<int> StrideCs(Ms.size(), N);

    // K % ABlockTransferSrcScalarPerVector
    Ks = std::vector<int>{256, 177, 128, 512};
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ks = std::vector<int>{256, 164, 128, 512};
    // K % BBlockTransferSrcScalarPerVector
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ks = std::vector<int>(4, 128);
    Ns = std::vector<int>{256, 127, 128, 512};
    // N % CBlockTransferScalarPerVector_NWaveNPerXDL
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));
}

Jing Zhang's avatar
Jing Zhang committed
96
class TestGGemmInterface_KMKNNM : public ::testing::Test
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
{
    protected:
    using Row = ck::tensor_layout::gemm::RowMajor;
    using Col = ck::tensor_layout::gemm::ColumnMajor;

    using ALayout = Col;
    using BLayout = Row;
    using ELayout = Col;

    static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;

    template <ck::tensor_operation::device::GemmSpecialization GemmSpec,
              ck::index_t KPerBlock,
              ck::index_t K1,
              ck::index_t ABlockTransferSrcScalarPerVector,
              ck::index_t BBlockTransferSrcScalarPerVector,
              ck::index_t CDEBlockTransferScalarPerVector_NPerBlock>
    using GGemmInstance =
Jing Zhang's avatar
Jing Zhang committed
115
116
117
118
119
120
121
122
123
        ck::test::DeviceGroupedGemmInstanceWrapper<ALayout,
                                                   BLayout,
                                                   ELayout,
                                                   GemmSpec,
                                                   KPerBlock,
                                                   K1,
                                                   ABlockTransferSrcScalarPerVector,
                                                   BBlockTransferSrcScalarPerVector,
                                                   CDEBlockTransferScalarPerVector_NPerBlock>;
124
125
126
127

    using DefaultGGemmInstance = GGemmInstance<GemmDefault, 32, 8, 4, 8, 4>;
};

Jing Zhang's avatar
Jing Zhang committed
128
TEST_F(TestGGemmInterface_KMKNNM, TileSize)
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
{
    std::vector<int> Ms{128, 256, 188, 512};
    constexpr int N = 256;
    constexpr int K = 128;

    std::vector<int> Ns(Ms.size(), N);
    std::vector<int> Ks(Ms.size(), K);
    std::vector<int> StrideAs(Ms.size(), K);
    std::vector<int> StrideBs(Ms.size(), K);
    std::vector<int> StrideCs(Ms.size(), N);

    // M % MPerBlock
    EXPECT_FALSE(DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ms = std::vector<int>{128, 256, 256, 512};
    Ns = std::vector<int>{256, 177, 128, 512};
    // N % NPerBlock
    EXPECT_FALSE(DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));
}

Jing Zhang's avatar
Jing Zhang committed
149
TEST_F(TestGGemmInterface_KMKNNM, VectorLoadWidth)
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
{
    static constexpr auto GemmMNKPadding =
        ck::tensor_operation::device::GemmSpecialization::MNKPadding;
    using PaddedGGemmInstance = GGemmInstance<GemmMNKPadding, 32, 8, 2, 8, 4>;

    std::vector<int> Ms{128, 256, 256, 512};
    constexpr int N = 256;
    constexpr int K = 512;

    std::vector<int> Ns(Ms.size(), N);
    std::vector<int> Ks(Ms.size(), K);
    std::vector<int> StrideAs(Ms.size(), K);
    std::vector<int> StrideBs(Ms.size(), K);
    std::vector<int> StrideCs(Ms.size(), N);

    // M % ABlockTransferSrcScalarPerVector
    Ms = std::vector<int>{256, 177, 128, 512};
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ms = std::vector<int>{128, 256, 256, 512};
    Ns = std::vector<int>{256, 164, 128, 512};
    // N % BBlockTransferSrcScalarPerVector
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));

    Ns = std::vector<int>{128, 256, 256, 512};
    Ms = std::vector<int>{256, 130, 128, 512};
    // M % CBlockTransferScalarPerVector_NWaveNPerXDL
    EXPECT_FALSE(PaddedGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs));
}