gemm_wmma_fp16.cpp 2.25 KB
Newer Older
aska-0096's avatar
aska-0096 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include "common.hpp"

#include "ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp"

using ADataType        = ck::half_t;
using BDataType        = ck::half_t;
using AccDataType      = float;
using CShuffleDataType = float;
using CDataType        = ck::half_t;

using ALayout = Row;
using BLayout = Col;
using CLayout = Row;

using AElementOp = PassThrough;
using BElementOp = PassThrough;
using CElementOp = PassThrough;

aska-0096's avatar
aska-0096 committed
22
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
aska-0096's avatar
aska-0096 committed
23
24

// clang-format off
25
using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
aska-0096's avatar
aska-0096 committed
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
         < ALayout,             
           BLayout,             
           CLayout,             
           ADataType, 
           BDataType, 
           CDataType, 
           AccDataType, 
           CShuffleDataType,  
           AElementOp,  
           BElementOp,  
           CElementOp,    
           GemmDefault,   
           256,         // BlockSize
           128,         // MPerBlock
           128,         // NPerBlock
aska-0096's avatar
aska-0096 committed
41
           32,          // KPerBlock
aska-0096's avatar
aska-0096 committed
42
43
44
           8,           // K1
           16,          // MPerWmma
           16,          // NPerWmma
45
46
           1,           // M-Repeat // M-PerWmma / M-Repeat = M-Wave
           8,           // N-Repeat // N-PerWmma / N-Repeat = N-Wave
aska-0096's avatar
aska-0096 committed
47
48
49
50
51
52
53
           S<4, 64, 1>,     
           S<1, 0, 2>,     
           S<1, 0, 2>,              
           2,              
           8,              
           8,      
           true,     
54
           S<4, 64, 1>,     
aska-0096's avatar
aska-0096 committed
55
56
57
58
59
60
           S<1, 0, 2>,     
           S<1, 0, 2>,             
           2,              
           8,              
           8,      
           true,           
61
62
63
           1,           // C shuffle (M Repeat) Per store
           4,           // C shuffle (N Repeat) Per store
           S<1, 32, 1,  8>,               
aska-0096's avatar
aska-0096 committed
64
           8>;
aska-0096's avatar
tidy up  
aska-0096 committed
65
// clang-format on
66

aska-0096's avatar
aska-0096 committed
67
68
69
70
71
72
using ReferenceGemmInstance = ck::tensor_operation::host::
    ReferenceGemm<ADataType, BDataType, CDataType, AccDataType, AElementOp, BElementOp, CElementOp>;

#include "run_gemm_example.inc"

int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }