"vscode:/vscode.git/clone" did not exist on "75235419621f38a4b53ae5c2882997a4ce7e698e"
gemm_wmma_fp16.cpp 1.69 KB
Newer Older
aska-0096's avatar
aska-0096 committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
aska-0096's avatar
aska-0096 committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

#include "common.hpp"

#include "ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp"

using ADataType        = ck::half_t;
using BDataType        = ck::half_t;
using AccDataType      = float;
using CShuffleDataType = float;
using CDataType        = ck::half_t;

using ALayout = Row;
using BLayout = Col;
using CLayout = Row;

using AElementOp = PassThrough;
using BElementOp = PassThrough;
using CElementOp = PassThrough;

aska-0096's avatar
aska-0096 committed
22
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
aska-0096's avatar
aska-0096 committed
23

Jing Zhang's avatar
Jing Zhang committed
24
25
26
27
28
29
30
31
32
33
34
35
36
using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle<
    ALayout,
    BLayout,
    CLayout,
    ADataType,
    BDataType,
    CDataType,
    AccDataType,
    CShuffleDataType,
    AElementOp,
    BElementOp,
    CElementOp,
    GemmDefault,
Jing Zhang's avatar
Jing Zhang committed
37
38
39
40
41
42
43
44
45
46
47
    2,   // Prefetch stage
    256, // BlockSize
    128, // MPerBlock
    256, // NPerBlock
    64,  // KPerBlock
    8,   // K1
    16,  // MPerWmma
    16,  // NPerWmma
    4,   // M-Repeat // M-PerWmma / M-Repeat = M-Wave
    4,   // N-Repeat // N-PerWmma / N-Repeat = N-Wave
    S<4, 64, 1>,
Jing Zhang's avatar
Jing Zhang committed
48
49
50
51
52
53
    S<1, 0, 2>,
    S<1, 0, 2>,
    2,
    8,
    8,
    true,
Jing Zhang's avatar
Jing Zhang committed
54
    S<4, 64, 1>,
Jing Zhang's avatar
Jing Zhang committed
55
56
57
58
59
60
61
62
    S<1, 0, 2>,
    S<1, 0, 2>,
    2,
    8,
    8,
    true,
    1, // C shuffle (M Repeat) Per store
    1, // C shuffle (N Repeat) Per store
Jing Zhang's avatar
Jing Zhang committed
63
    S<1, 32, 1, 8>,
Jing Zhang's avatar
Jing Zhang committed
64
    8>;
65

aska-0096's avatar
aska-0096 committed
66
67
68
69
70
71
using ReferenceGemmInstance = ck::tensor_operation::host::
    ReferenceGemm<ADataType, BDataType, CDataType, AccDataType, AElementOp, BElementOp, CElementOp>;

#include "run_gemm_example.inc"

int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }