smfmac_op_xdl.cpp 4.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.

#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <numeric>
#include <tuple>
#include <vector>

#include "ck/ck.hpp"
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "test/smfmac_op/smfmac_op_util.hpp"
16
#include "ck/host_utility/device_prop.hpp"
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

using BF16        = ck::bhalf_t;
using F16         = ck::half_t;
using F32         = float;
using Row         = ck::tensor_layout::gemm::RowMajor;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;

template <typename Tuple>
class TestSmfmac : public ::testing::Test
{
    protected:
    using Src1Type                           = std::tuple_element_t<0, Tuple>;
    static constexpr ck::index_t Src1VecSize = std::tuple_element_t<1, Tuple>{}.value;
    using Src2Type                           = std::tuple_element_t<2, Tuple>;
    static constexpr ck::index_t Src2VecSize = std::tuple_element_t<3, Tuple>{}.value;
    using DstType                            = std::tuple_element_t<4, Tuple>;
    static constexpr ck::index_t AccVecSize  = std::tuple_element_t<5, Tuple>{}.value;
    using GPUAccType                         = std::tuple_element_t<6, Tuple>;
    using CPUAccType                         = std::tuple_element_t<7, Tuple>;
    static constexpr ck::index_t M           = std::tuple_element_t<8, Tuple>{}.value;
    static constexpr ck::index_t N           = std::tuple_element_t<9, Tuple>{}.value;
    static constexpr ck::index_t K           = std::tuple_element_t<10, Tuple>{}.value;

    void Run()
    {
42
        bool pass = true;
Illia Silin's avatar
Illia Silin committed
43
        if(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950")
44
45
46
47
48
49
50
51
52
53
54
        {
            constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type,
                                                                       Src1VecSize,
                                                                       Src2Type,
                                                                       Src2VecSize,
                                                                       GPUAccType,
                                                                       AccVecSize,
                                                                       DstType,
                                                                       M,
                                                                       N,
                                                                       K>;
55

56
            constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default);
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
            ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}(
                [&](auto i) {
                    pass &= ck::smfmac_op_util::TestSmfmac<
                        std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
                        Src1Type,
                        Src2Type,
                        DstType,
                        GPUAccType,
                        CPUAccType,
                        decltype(Row{}),
                        decltype(Row{}),
                        decltype(Row{}),
                        PassThrough,
                        PassThrough,
                        PassThrough,
                        AccVecSize,
                        M,
                        N,
                        K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
                });
        }
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
        EXPECT_TRUE(pass);
    }
};

template <ck::index_t N>
using I = ck::Number<N>;

using KernelTypes =
    ::testing::Types<std::tuple<F16, I<4>, F16, I<8>, F32, I<4>, F32, F32, I<16>, I<16>, I<32>>,
                     std::tuple<BF16, I<4>, BF16, I<8>, F32, I<4>, F32, F32, I<16>, I<16>, I<32>>,
                     std::tuple<F16, I<4>, F16, I<8>, F32, I<16>, F32, F32, I<32>, I<32>, I<16>>,
                     std::tuple<BF16, I<4>, BF16, I<8>, F32, I<16>, F32, F32, I<32>, I<32>, I<16>>>;

TYPED_TEST_SUITE(TestSmfmac, KernelTypes);
TYPED_TEST(TestSmfmac, TestSmfmacFP16BF16) { this->Run(); }