profile_reduce.cpp 17.9 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

4
5
6
7
8
9
10
11
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <stdexcept>
#include <sstream>
#include <getopt.h>

Chao Liu's avatar
Chao Liu committed
12
#include "ck/utility/reduction_enums.hpp"
13

14
#include "ck/library/utility/host_common_util.hpp"
Chao Liu's avatar
Chao Liu committed
15

16
17
18
#include "profiler/profile_reduce_impl.hpp"
#include "profiler/data_type_enum.hpp"
#include "profiler_operation_registry.hpp"
19
20
21

using namespace std;

22
using ck::ReduceTensorOp;
23
24

static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
Qianfeng's avatar
Qianfeng committed
25
                                       {"reduceDims", required_argument, nullptr, 'R'},
26
27
28
29
30
31
32
33
                                       {"reduceOp", required_argument, nullptr, 'O'},
                                       {"compType", required_argument, nullptr, 'C'},
                                       {"outType", required_argument, nullptr, 'W'},
                                       {"nanOpt", required_argument, nullptr, 'N'},
                                       {"indicesOpt", required_argument, nullptr, 'I'},
                                       {"scales", required_argument, nullptr, 'S'},
                                       {"half", no_argument, nullptr, '?'},
                                       {"double", no_argument, nullptr, '?'},
34
35
                                       {"int8", no_argument, nullptr, '?'},
                                       {"bf16", no_argument, nullptr, '?'},
36
37
38
39
40
                                       {"dumpout", required_argument, nullptr, 'o'},
                                       {"verify", required_argument, nullptr, 'v'},
                                       {"help", no_argument, nullptr, '?'},
                                       {nullptr, 0, nullptr, 0}};

Qianfeng's avatar
Qianfeng committed
41
static void check_reduce_dims(const int rank, const std::vector<int>& reduceDims)
42
{
Qianfeng's avatar
Qianfeng committed
43
    for(auto dim : reduceDims)
44
45
46
47
48
49
50
    {
        if(dim < 0 || dim >= rank)
            throw std::runtime_error("Invalid dimension index specified for Reducing");
    };

    unsigned int flag = 0;

Qianfeng's avatar
Qianfeng committed
51
    for(auto dim : reduceDims)
52
53
54
55
56
57
58
    {
        if(flag & (0x1 << dim))
            throw std::runtime_error("All toReduce dimensions should be different!");
        flag = flag | (0x1 << dim);
    };
};

59
class ReduceProfilerArgs
60
61
62
63
64
65
66
{
    private:
    int option_index = 0;

    public:
    bool use_half   = false;
    bool use_double = false;
67
68
    bool use_int8   = false;
    bool use_bf16   = false;
69
70
71

    std::vector<size_t> inLengths;
    std::vector<size_t> outLengths;
Qianfeng's avatar
Qianfeng committed
72
    std::vector<int> reduceDims;
73
74
75

    std::vector<float> scales;

76
77
78
    ReduceTensorOp reduceOp     = ReduceTensorOp::ADD;
    ck::DataTypeEnum compTypeId = ck::DataTypeEnum::Float;
    ck::DataTypeEnum outTypeId  = ck::DataTypeEnum::Float;
79
80
81
82

    bool compType_assigned = false;
    bool outType_assigned  = false;

83
84
85
86
    int nanOpt           = 0;
    int indicesOpt       = 0;
    bool do_verification = false;
    bool do_dumpout      = false;
87
88

    int init_method;
JD's avatar
JD committed
89
    bool time_kernel;
90

91
92
    ReduceProfilerArgs()  = default;
    ~ReduceProfilerArgs() = default;
93
94
95
96
97
98

    void show_usage(const char* cmd)
    {
        std::cout << "Usage of " << cmd << std::endl;
        std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths"
                  << std::endl;
Qianfeng's avatar
Qianfeng committed
99
        std::cout << "--reduceDims or -R, comma separated list of to-reduce dimensions"
100
101
102
103
104
105
106
107
108
                  << std::endl;
        std::cout << "--reduceOp or -O, enum value indicating the reduction operations"
                  << std::endl;
        std::cout << "--compType or -C, enum value indicating the type of accumulated values used "
                     "during the reduction"
                  << std::endl;
        std::cout << "--outType or -W, optional enum value indicating the type of the reduced "
                     "output, which could be float when the input data is half"
                  << std::endl;
109
110
111
112
113
        std::cout
            << "--nanOpt or -N, 1/0 value indicates the selection to use or not use Nan-Propagation"
            << std::endl;
        std::cout << "--indicesOpt or -I, 1/0 value indicates the selection to use or not use "
                     "index in reduction"
114
115
116
117
118
                  << std::endl;
        std::cout << "--scales or -S, comma separated two float values for alpha and beta"
                  << std::endl;
        std::cout << "--half, use fp16 for the input and output tensor data types" << std::endl;
        std::cout << "--double, use fp64 for the input and output tensor data types" << std::endl;
119
120
        std::cout << "--int8, use int8 for the input and output tensor data types" << std::endl;
        std::cout << "--bf16, use bfloat16 for the input and output tensor data types" << std::endl;
121
122
123
124
125
126
127
128
129
130
        std::cout << "--verify or -v, 1/0 to indicate whether to verify the reduction result by "
                     "comparing with the host-based reduction"
                  << std::endl;
        std::cout << "--dumpout or -o, 1/0 to indicate where to save the reduction result to files "
                     "for further analysis"
                  << std::endl;
    };

    int processArgs(int argc, char* argv[])
    {
131
132
        using ck::host_common::getTypeValuesFromString;

133
        int ch;
134
135
136
137
138

        optind++; // to skip the "reduce" module name

        while(1)
        {
139
            ch = getopt_long(argc, argv, "D:R:O:C:W:N:I:S:v:o:", long_options, &option_index);
140
141
142
143
144
145
146
147
148
149
150
151
152
153
            if(ch == -1)
                break;
            switch(ch)
            {
            case 'D':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

                inLengths = getTypeValuesFromString<size_t>(optarg);
                break;
            case 'R':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

Qianfeng's avatar
Qianfeng committed
154
                reduceDims = getTypeValuesFromString<int>(optarg);
155
156
157
158
159
                break;
            case 'O':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

160
                reduceOp = static_cast<ReduceTensorOp>(std::atoi(optarg));
161
162
163
164
165
                break;
            case 'C':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

166
                compTypeId        = static_cast<ck::DataTypeEnum>(std::atoi(optarg));
167
168
169
170
171
172
                compType_assigned = true;
                break;
            case 'W':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

173
                outTypeId        = static_cast<ck::DataTypeEnum>(std::atoi(optarg));
174
175
176
177
178
179
                outType_assigned = true;
                break;
            case 'N':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

180
                nanOpt = std::atoi(optarg);
181
182
183
184
185
                break;
            case 'I':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

186
                indicesOpt = std::atoi(optarg);
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
                break;
            case 'S':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

                scales = getTypeValuesFromString<float>(optarg);

                if(scales.size() != 2)
                    throw std::runtime_error("Invalid option format!");
                break;
            case 'v':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

                do_verification = static_cast<bool>(std::atoi(optarg));
                break;
            case 'o':
                if(!optarg)
                    throw std::runtime_error("Invalid option format!");

                do_dumpout = static_cast<bool>(std::atoi(optarg));
                break;
            case '?':
                if(std::string(long_options[option_index].name) == "half")
                    use_half = true;
                else if(std::string(long_options[option_index].name) == "double")
                    use_double = true;
214
215
216
217
                else if(std::string(long_options[option_index].name) == "int8")
                    use_int8 = true;
                else if(std::string(long_options[option_index].name) == "bf16")
                    use_bf16 = true;
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
                else if(std::string(long_options[option_index].name) == "help")
                {
                    show_usage(argv[0]);
                    return (-1);
                };
                break;

            default:
                show_usage(argv[0]);
                std::cerr << "Invalid cmd-line options!" << std::endl;
                return (-1);
            };
        };

        if(optind + 2 > argc)
            throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");

        init_method = std::atoi(argv[optind++]);
236
        time_kernel = static_cast<bool>(std::atoi(argv[optind]));
237
238
239
240
241
242
243

        if(scales.empty())
        {
            scales.push_back(1.0f);
            scales.push_back(0.0f);
        };

244
245
        if(reduceOp == ReduceTensorOp::MIN || reduceOp == ReduceTensorOp::MAX ||
           reduceOp == ReduceTensorOp::AMAX)
246
247
248
249
250
251
252
253
254
255
256
257
258
259
        {
            // for indexable operations, no need to assign compType and outType, just let them be
            // same as inType
            compType_assigned = false;
            outType_assigned  = false;
        };

        return (0);
    };

}; // end of class AppArgs

int profile_reduce(int argc, char* argv[])
{
260
261
    using ck::DataTypeEnum;
    using ck::profiler::profile_reduce_impl;
262

263
    ReduceProfilerArgs args;
264
265
266
267
268
269

    if(args.processArgs(argc, argv) < 0)
        return (-1);

    int rank = args.inLengths.size();

Qianfeng's avatar
Qianfeng committed
270
    check_reduce_dims(rank, args.reduceDims);
271

272
    if(args.reduceOp == ReduceTensorOp::MUL || args.reduceOp == ReduceTensorOp::NORM1)
273
274
275
276
277
        throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");

    if(args.use_half)
    {
        if(!args.compType_assigned)
278
            args.compTypeId = DataTypeEnum::Half;
279

Chao Liu's avatar
Chao Liu committed
280
        if(args.outType_assigned &&
281
282
           (args.outTypeId != DataTypeEnum::Half && args.outTypeId != DataTypeEnum::Float))
            args.outTypeId = DataTypeEnum::Float;
283
284

        if(!args.outType_assigned)
285
            args.outTypeId = DataTypeEnum::Half;
286

287
        if(args.compTypeId == DataTypeEnum::Half)
288
        {
289
290
291
292
293
294
295
296
297
298
299
300
            profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(
                args.do_verification,
                args.init_method,
                args.do_dumpout,
                args.time_kernel,
                args.inLengths,
                args.reduceDims,
                args.reduceOp,
                static_cast<bool>(args.nanOpt),
                static_cast<bool>(args.indicesOpt),
                args.scales[0],
                args.scales[1]);
301
        }
302
        else if(args.compTypeId == DataTypeEnum::Float)
303
304
305
306
        {
            profile_reduce_impl<ck::half_t, float, ck::half_t>(args.do_verification,
                                                               args.init_method,
                                                               args.do_dumpout,
JD's avatar
JD committed
307
                                                               args.time_kernel,
308
                                                               args.inLengths,
Qianfeng's avatar
Qianfeng committed
309
                                                               args.reduceDims,
310
                                                               args.reduceOp,
311
312
                                                               static_cast<bool>(args.nanOpt),
                                                               static_cast<bool>(args.indicesOpt),
313
314
315
316
317
318
319
320
321
322
323
                                                               args.scales[0],
                                                               args.scales[1]);
        }
        else
            throw std::runtime_error("Invalid compType assignment!");
    }
    else if(args.use_double)
    {
        profile_reduce_impl<double, double, double>(args.do_verification,
                                                    args.init_method,
                                                    args.do_dumpout,
JD's avatar
JD committed
324
                                                    args.time_kernel,
325
                                                    args.inLengths,
Qianfeng's avatar
Qianfeng committed
326
                                                    args.reduceDims,
327
                                                    args.reduceOp,
328
329
                                                    static_cast<bool>(args.nanOpt),
                                                    static_cast<bool>(args.indicesOpt),
330
331
332
                                                    args.scales[0],
                                                    args.scales[1]);
    }
333
334
335
    else if(args.use_int8)
    {
        if(!args.compType_assigned)
336
            args.compTypeId = DataTypeEnum::Int8;
337

Chao Liu's avatar
Chao Liu committed
338
        if(args.outType_assigned &&
339
340
           (args.outTypeId != DataTypeEnum::Int8 && args.outTypeId != DataTypeEnum::Int32))
            args.outTypeId = DataTypeEnum::Int32;
341
342

        if(!args.outType_assigned)
343
            args.outTypeId = DataTypeEnum::Int8;
344

345
        if(args.compTypeId == DataTypeEnum::Int8)
346
347
348
349
        {
            profile_reduce_impl<int8_t, int8_t, int8_t>(args.do_verification,
                                                        args.init_method,
                                                        args.do_dumpout,
JD's avatar
JD committed
350
                                                        args.time_kernel,
351
352
353
                                                        args.inLengths,
                                                        args.reduceDims,
                                                        args.reduceOp,
354
355
                                                        static_cast<bool>(args.nanOpt),
                                                        static_cast<bool>(args.indicesOpt),
356
357
358
                                                        args.scales[0],
                                                        args.scales[1]);
        }
359
        else if(args.compTypeId == DataTypeEnum::Int32)
360
361
362
363
        {
            profile_reduce_impl<int8_t, int32_t, int8_t>(args.do_verification,
                                                         args.init_method,
                                                         args.do_dumpout,
JD's avatar
JD committed
364
                                                         args.time_kernel,
365
366
367
                                                         args.inLengths,
                                                         args.reduceDims,
                                                         args.reduceOp,
368
369
                                                         static_cast<bool>(args.nanOpt),
                                                         static_cast<bool>(args.indicesOpt),
370
371
372
373
374
375
376
377
                                                         args.scales[0],
                                                         args.scales[1]);
        }
        else
            throw std::runtime_error("Invalid compType assignment!");
    }
    else if(args.use_bf16)
    {
378
        if(args.outType_assigned &&
379
380
           (args.outTypeId != DataTypeEnum::BFloat16 && args.outTypeId != DataTypeEnum::Float))
            args.outTypeId = DataTypeEnum::Float;
381
382

        if(!args.outType_assigned)
383
            args.outTypeId = DataTypeEnum::BFloat16;
384
385
386
387

        profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(args.do_verification,
                                                             args.init_method,
                                                             args.do_dumpout,
JD's avatar
JD committed
388
                                                             args.time_kernel,
389
390
391
                                                             args.inLengths,
                                                             args.reduceDims,
                                                             args.reduceOp,
392
393
                                                             static_cast<bool>(args.nanOpt),
                                                             static_cast<bool>(args.indicesOpt),
394
395
396
                                                             args.scales[0],
                                                             args.scales[1]);
    }
397
398
    else
    {
399
        if(args.compTypeId == DataTypeEnum::Float)
400
401
402
403
        {
            profile_reduce_impl<float, float, float>(args.do_verification,
                                                     args.init_method,
                                                     args.do_dumpout,
JD's avatar
JD committed
404
                                                     args.time_kernel,
405
                                                     args.inLengths,
Qianfeng's avatar
Qianfeng committed
406
                                                     args.reduceDims,
407
                                                     args.reduceOp,
408
409
                                                     static_cast<bool>(args.nanOpt),
                                                     static_cast<bool>(args.indicesOpt),
410
411
412
                                                     args.scales[0],
                                                     args.scales[1]);
        }
413
        else if(args.compTypeId == DataTypeEnum::Double)
414
415
416
417
        {
            profile_reduce_impl<float, double, float>(args.do_verification,
                                                      args.init_method,
                                                      args.do_dumpout,
JD's avatar
JD committed
418
                                                      args.time_kernel,
419
                                                      args.inLengths,
Qianfeng's avatar
Qianfeng committed
420
                                                      args.reduceDims,
421
                                                      args.reduceOp,
422
423
                                                      static_cast<bool>(args.nanOpt),
                                                      static_cast<bool>(args.indicesOpt),
424
425
426
427
428
429
430
431
432
                                                      args.scales[0],
                                                      args.scales[1]);
        }
        else
            throw std::runtime_error("Invalid compType assignment!");
    };

    return (0);
};
433
434

REGISTER_PROFILER_OPERATION("reduce", "Reduce", profile_reduce);