profile_gemm_impl.hpp 18.7 KB
Newer Older
1
#pragma once
Chao Liu's avatar
Chao Liu committed
2
#include <iomanip>
3
4

#include "check_err.hpp"
Chao Liu's avatar
Chao Liu committed
5
6
7
8
9
10
11
12
13
14
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "host_conv.hpp"
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "element_wise_operation.hpp"
#include "device_gemm.hpp"
#include "reference_gemm.hpp"
15
16
17
18
19
20

namespace ck {
namespace tensor_operation {
namespace device {
namespace device_gemm_instance {

ltqin's avatar
ltqin committed
21
22
23
24
25
using DeviceGemmNoOpPtr =
    ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
                                                ck::tensor_operation::element_wise::PassThrough,
                                                ck::tensor_operation::element_wise::PassThrough>;

26
27
28
29
void add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
30

Jianfeng yan's avatar
Jianfeng yan committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(
    std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(
    std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(
    std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(
    std::vector<DeviceGemmNoOpPtr>&);

void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);

Chao Liu's avatar
Chao Liu committed
45
46
47
48
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
Jianfeng yan's avatar
Jianfeng yan committed
49
50
51

void add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(
    std::vector<DeviceGemmNoOpPtr>&);
52

53
54
55
56
void add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
57

Jianfeng yan's avatar
Jianfeng yan committed
58
59
60
61
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
void add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(std::vector<DeviceGemmNoOpPtr>&);
62

63
64
65
66
67
68
69
70
71
72
73
74
75
76
} // namespace device_gemm_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

namespace ck {
namespace profiler {

template <typename ADataType,
          typename BDataType,
          typename CDataType,
          typename ALayout,
          typename BLayout,
          typename CLayout>
Chao Liu's avatar
Chao Liu committed
77
bool profile_gemm_impl(int do_verification,
Chao Liu's avatar
Chao Liu committed
78
79
80
81
82
83
84
85
                       int init_method,
                       bool do_log,
                       int nrepeat,
                       int M,
                       int N,
                       int K,
                       int StrideA,
                       int StrideB,
Chao Liu's avatar
Chao Liu committed
86
                       int StrideC)
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
    auto f_host_tensor_descriptor =
        [](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
            if(is_same<decltype(layout), tensor_layout::gemm::RowMajor>::value)
            {
                return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
                                            std::vector<std::size_t>({stride, 1}));
            }
            else
            {
                return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
                                            std::vector<std::size_t>({1, stride}));
            }
        };

    Tensor<ADataType> a_m_k(f_host_tensor_descriptor(M, K, StrideA, ALayout{}));
    Tensor<BDataType> b_k_n(f_host_tensor_descriptor(K, N, StrideB, BLayout{}));
Chao Liu's avatar
Chao Liu committed
104
    Tensor<CDataType> c_m_n_host_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
105
106
107
108
    Tensor<CDataType> c_m_n_device_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));

    std::cout << "a_m_k: " << a_m_k.mDesc << std::endl;
    std::cout << "b_k_n: " << b_k_n.mDesc << std::endl;
109
    std::cout << "c_m_n: " << c_m_n_device_result.mDesc << std::endl;
110

111
    std::size_t num_thread = 1;
112
113
114
115
    switch(init_method)
    {
    case 0: break;
    case 1:
ltqin's avatar
ltqin committed
116
117
        a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5}, num_thread);
        b_k_n.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5}, num_thread);
118
119
        break;
    default:
ltqin's avatar
ltqin committed
120
121
        a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 1.0}, num_thread);
        b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5}, num_thread);
122
    }
Chao Liu's avatar
Chao Liu committed
123
124
125
126
127
128
129
130
131

    using AElementOp = ck::tensor_operation::element_wise::PassThrough;
    using BElementOp = ck::tensor_operation::element_wise::PassThrough;
    using CElementOp = ck::tensor_operation::element_wise::PassThrough;

    const auto a_element_op = AElementOp{};
    const auto b_element_op = BElementOp{};
    const auto c_element_op = CElementOp{};

Chao Liu's avatar
Chao Liu committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
    // reference calculation
    if(do_verification)
    {
        using ReferenceGemmInstance = ck::tensor_operation::host::
            ReferenceGemm<ADataType, BDataType, CDataType, AElementOp, BElementOp, CElementOp>;

        auto ref_gemm    = ReferenceGemmInstance{};
        auto ref_invoker = ref_gemm.MakeInvoker();

        auto ref_argument = ref_gemm.MakeArgument(
            a_m_k, b_k_n, c_m_n_host_result, a_element_op, b_element_op, c_element_op);

        ref_invoker.Run(ref_argument);
    }

147
148
149
150
151
152
153
154
155
    DeviceMem a_device_buf(sizeof(ADataType) * a_m_k.mDesc.GetElementSpace());
    DeviceMem b_device_buf(sizeof(BDataType) * b_k_n.mDesc.GetElementSpace());
    DeviceMem c_device_buf(sizeof(CDataType) * c_m_n_device_result.mDesc.GetElementSpace());

    a_device_buf.ToDevice(a_m_k.mData.data());
    b_device_buf.ToDevice(b_k_n.mData.data());
    c_device_buf.ToDevice(c_m_n_device_result.mData.data());

    // add device GEMM instances
Chao Liu's avatar
Chao Liu committed
156
    std::vector<ck::tensor_operation::device::device_gemm_instance::DeviceGemmNoOpPtr> gemm_ptrs;
157

ltqin's avatar
ltqin committed
158
159
160
161
162
163
164
    if constexpr(is_same<ADataType, float>::value && is_same<BDataType, float>::value &&
                 is_same<CDataType, float>::value)
    {
        if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
165
166
167
168
169
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemm_ptrs);
            //
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instances(gemm_ptrs);
ltqin's avatar
ltqin committed
170
171
172
173
174
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
175
176
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
177

Chao Liu's avatar
Chao Liu committed
178
179
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instances(gemm_ptrs);
ltqin's avatar
ltqin committed
180
181
182
183
184
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
185
186
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
187

Chao Liu's avatar
Chao Liu committed
188
189
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instances(gemm_ptrs);
ltqin's avatar
ltqin committed
190
191
192
193
194
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
195
196
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
197

Chao Liu's avatar
Chao Liu committed
198
199
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instances(gemm_ptrs);
ltqin's avatar
ltqin committed
200
201
        }
    }
202
203
204
205
206
207
208
    else if constexpr(is_same<ADataType, half_t>::value && is_same<BDataType, half_t>::value &&
                      is_same<CDataType, half_t>::value)
    {
        if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
209
210
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemm_ptrs);
211

Chao Liu's avatar
Chao Liu committed
212
213
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instances(gemm_ptrs);
214
215
216
217
218
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
219
220
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemm_ptrs);
221

Chao Liu's avatar
Chao Liu committed
222
223
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
224

Chao Liu's avatar
Chao Liu committed
225
226
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instances(gemm_ptrs);
227
228
229
230
231
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
232
233
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemm_ptrs);
234

Chao Liu's avatar
Chao Liu committed
235
236
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instances(gemm_ptrs);
237
238
239
240
241
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
Chao Liu's avatar
Chao Liu committed
242
243
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemm_ptrs);
244

Chao Liu's avatar
Chao Liu committed
245
246
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instances(gemm_ptrs);
247
248
249
250
251
252
        }
    }
    else if constexpr(is_same<ADataType, ck::bhalf_t>::value &&
                      is_same<BDataType, ck::bhalf_t>::value &&
                      is_same<CDataType, ck::bhalf_t>::value)
    {
Jianfeng yan's avatar
Jianfeng yan committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
        if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instances(gemm_ptrs);
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instances(gemm_ptrs);
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instances(gemm_ptrs);
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
                add_device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instances(gemm_ptrs);
        }
281
282
283
284
    }
    else if constexpr(is_same<ADataType, int8_t>::value && is_same<BDataType, int8_t>::value &&
                      is_same<CDataType, int8_t>::value)
    {
Jianfeng yan's avatar
Jianfeng yan committed
285
286
287
288
289
        if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                     is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
Chao Liu's avatar
Chao Liu committed
290
                add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
291
292
293
294
295
296
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
Chao Liu's avatar
Chao Liu committed
297
                add_device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
298
299
300
301
302
303
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
Chao Liu's avatar
Chao Liu committed
304
                add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
305
306
307
308
309
310
        }
        else if constexpr(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
                          is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
        {
            ck::tensor_operation::device::device_gemm_instance::
Chao Liu's avatar
Chao Liu committed
311
                add_device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instances(gemm_ptrs);
Jianfeng yan's avatar
Jianfeng yan committed
312
        }
313
    }
314

Chao Liu's avatar
Chao Liu committed
315
    std::cout << "found " << gemm_ptrs.size() << " instances" << std::endl;
316

Chao Liu's avatar
Chao Liu committed
317
    std::string best_gemm_name;
318
319
320
    float best_ave_time   = 0;
    float best_tflops     = 0;
    float best_gb_per_sec = 0;
Chao Liu's avatar
Chao Liu committed
321
    bool pass             = true;
322
323
324
325
326
327
328
329
330
331
332
333
334

    // profile device GEMM instances
    for(auto& gemm_ptr : gemm_ptrs)
    {
        auto argument_ptr =
            gemm_ptr->MakeArgumentPointer(static_cast<ADataType*>(a_device_buf.GetDeviceBuffer()),
                                          static_cast<BDataType*>(b_device_buf.GetDeviceBuffer()),
                                          static_cast<CDataType*>(c_device_buf.GetDeviceBuffer()),
                                          M,
                                          N,
                                          K,
                                          StrideA,
                                          StrideB,
Chao Liu's avatar
Chao Liu committed
335
336
337
                                          StrideC,
                                          ck::tensor_operation::element_wise::PassThrough{},
                                          ck::tensor_operation::element_wise::PassThrough{},
Chao Liu's avatar
Chao Liu committed
338
                                          ck::tensor_operation::element_wise::PassThrough{});
339
340
341
342
343

        auto invoker_ptr = gemm_ptr->MakeInvokerPointer();

        if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
        {
344
            // re-init C to zero before profiling next kernel
Chao Liu's avatar
Chao Liu committed
345
            c_device_buf.SetZero();
346

Chao Liu's avatar
Chao Liu committed
347
348
            std::string gemm_name = gemm_ptr->GetTypeString();

349
350
351
            float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat);

            std::size_t flop = std::size_t(2) * M * N * K;
Chao Liu's avatar
Chao Liu committed
352

353
            std::size_t num_btype =
Chao Liu's avatar
Chao Liu committed
354
                sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + sizeof(CDataType) * M * N;
355
356
357
358
359

            float tflops = static_cast<float>(flop) / 1.E9 / ave_time;

            float gb_per_sec = num_btype / 1.E6 / ave_time;

Chao Liu's avatar
Chao Liu committed
360
361
            std::cout << "Perf: " << std::setw(10) << ave_time << " ms, " << tflops << " TFlops, "
                      << gb_per_sec << " GB/s, " << gemm_name << std::endl;
362
363
364

            if(tflops > best_tflops)
            {
Chao Liu's avatar
Chao Liu committed
365
                best_gemm_name  = gemm_name;
366
367
368
369
370
371
372
373
374
                best_tflops     = tflops;
                best_ave_time   = ave_time;
                best_gb_per_sec = gb_per_sec;
            }

            if(do_verification)
            {
                c_device_buf.FromDevice(c_m_n_device_result.mData.data());

Chao Liu's avatar
Chao Liu committed
375
376
                pass = pass &&
                       ck::utils::check_err(c_m_n_device_result.mData, c_m_n_host_result.mData);
377
378
379
380
381

                if(do_log)
                {
                    LogRangeAsType<float>(std::cout << "a : ", a_m_k.mData, ",") << std::endl;
                    LogRangeAsType<float>(std::cout << "b: ", b_k_n.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
382
383
                    LogRangeAsType<float>(std::cout << "c_host  : ", c_m_n_host_result.mData, ",")
                        << std::endl;
384
385
386
387
388
389
390
                    LogRangeAsType<float>(std::cout << "c_device: ", c_m_n_device_result.mData, ",")
                        << std::endl;
                }
            }
        }
        else
        {
Chao Liu's avatar
Chao Liu committed
391
            std::cout << "does not support this problem" << std::endl;
392
393
394
395
        }
    }

    std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
Chao Liu's avatar
Chao Liu committed
396
              << best_gb_per_sec << " GB/s, " << best_gemm_name << std::endl;
Chao Liu's avatar
Chao Liu committed
397
398

    return pass;
399
400
401
402
}

} // namespace profiler
} // namespace ck