"vscode:/vscode.git/clone" did not exist on "e0b6ce021595c933b17f99853600762be1a1704f"
quantization.cpp 42.4 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
2
3
4
5
#include <iostream>
#include <vector>
#include <migraphx/literal.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/instruction.hpp>
6
#include <migraphx/generate.hpp>
7
#include <migraphx/ref/target.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
8
9
10
#include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/propagate_constant.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
12
13
14
15
16
#include <migraphx/pass_manager.hpp>
#include <migraphx/onnx.hpp>
#include "test.hpp"
#include <migraphx/half.hpp>

kahmed10's avatar
kahmed10 committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
migraphx::instruction_ref
create_clip_op(migraphx::program& p, float max, float min, migraphx::instruction_ref input)
{
    auto input_lens = input->get_shape().lens();
    auto max_val    = p.add_literal(max);
    auto min_val    = p.add_literal(min);
    max_val         = p.add_instruction(migraphx::op::multibroadcast{input_lens}, max_val);
    min_val         = p.add_instruction(migraphx::op::multibroadcast{input_lens}, min_val);
    return p.add_instruction(migraphx::op::clip{}, input, min_val, max_val);
}

migraphx::instruction_ref create_clip_op(migraphx::instruction_ref insert_loc,
                                         migraphx::program& p,
                                         float max,
                                         float min,
                                         migraphx::instruction_ref input)
{
    auto input_lens = input->get_shape().lens();
    auto max_val    = p.add_literal(max);
    auto min_val    = p.add_literal(min);
    max_val = p.insert_instruction(insert_loc, migraphx::op::multibroadcast{input_lens}, max_val);
    min_val = p.insert_instruction(insert_loc, migraphx::op::multibroadcast{input_lens}, min_val);
    return p.insert_instruction(insert_loc, migraphx::op::clip{}, input, min_val, max_val);
}

Shucai Xiao's avatar
Shucai Xiao committed
42
43
TEST_CASE(param_add)
{
44
    auto create_program_float = [](bool add_return = false) {
Shucai Xiao's avatar
Shucai Xiao committed
45
46
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
47
48
49
50
51
52
53
        auto p1  = p.add_parameter("x", s);
        auto p2  = p.add_parameter("y", s);
        auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
        if(add_return)
        {
            p.add_return({sum});
        }
Shucai Xiao's avatar
Shucai Xiao committed
54
55
56
57

        return p;
    };

58
    auto create_program_half = [](bool add_return = false) {
Shucai Xiao's avatar
Shucai Xiao committed
59
60
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
Shucai Xiao's avatar
Shucai Xiao committed
61
        auto p1  = p.add_parameter("x", s);
Shucai Xiao's avatar
Shucai Xiao committed
62
        auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
Shucai Xiao's avatar
Shucai Xiao committed
63
        auto p2  = p.add_parameter("y", s);
Shucai Xiao's avatar
Shucai Xiao committed
64
65
        auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
        auto hs  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
66
67
68
69
70
        auto res = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
        if(add_return)
        {
            p.add_return({res});
        }
Shucai Xiao's avatar
Shucai Xiao committed
71
72
73
74
75
76
77
78

        return p;
    };

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half();

Shucai Xiao's avatar
Shucai Xiao committed
79
        migraphx::quantize_fp16(p1);
Shucai Xiao's avatar
Shucai Xiao committed
80
81
82
83
84
85
86
        EXPECT(p1 == p2);
    }

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half();

Shucai Xiao's avatar
Shucai Xiao committed
87
        migraphx::quantize_fp16(p1, {"add"});
Shucai Xiao's avatar
Shucai Xiao committed
88
89
        EXPECT(p1 == p2);
    }
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

    {
        auto p1 = create_program_float(true);
        auto p2 = create_program_half(true);

        migraphx::quantize_fp16(p1);
        EXPECT(p1 == p2);
    }

    {
        auto p1 = create_program_float(true);
        auto p2 = create_program_half(true);

        migraphx::quantize_fp16(p1, {"add"});
        EXPECT(p1 == p2);
    }
Shucai Xiao's avatar
Shucai Xiao committed
106
107
108
109
110
111
112
}

TEST_CASE(param_add_sub)
{
    auto create_program_float = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
Shucai Xiao's avatar
Shucai Xiao committed
113
114
115
        auto p1   = p.add_parameter("x", s);
        auto p2   = p.add_parameter("y", s);
        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
Shucai Xiao's avatar
Shucai Xiao committed
116
117
118
119
120
121
122
123
124
        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
        p.add_instruction(migraphx::op::add{}, diff, p1);

        return p;
    };

    auto create_program_half_add = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
Shucai Xiao's avatar
Shucai Xiao committed
125
126
127
128
129
130
131
132
133
134
135
        auto p1  = p.add_parameter("x", s);
        auto hp1 = p.insert_instruction(
            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
        auto p2  = p.add_parameter("y", s);
        auto hp2 = p.insert_instruction(
            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
        auto sum   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
        auto diff  = p.add_instruction(migraphx::op::sub{}, sum, p2);
        auto hdiff = p.add_instruction(
            migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
Shucai Xiao's avatar
Shucai Xiao committed
136
137
138
139
140
141
142
143
144
        auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);

        return p;
    };

    auto create_program_half_sub = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
Shucai Xiao's avatar
Shucai Xiao committed
145
146
147
148
149
150
        auto p1  = p.add_parameter("x", s);
        auto p2  = p.add_parameter("y", s);
        auto hp2 = p.insert_instruction(
            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
        auto sum   = p.add_instruction(migraphx::op::add{}, p1, p2);
        auto hsum  = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
Shucai Xiao's avatar
Shucai Xiao committed
151
        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
Shucai Xiao's avatar
Shucai Xiao committed
152
        auto diff  = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
Shucai Xiao's avatar
Shucai Xiao committed
153
154
155
156
157
        p.add_instruction(migraphx::op::add{}, diff, p1);

        return p;
    };

158
159
160
161
162
163
164
165
166
    auto create_program_half_all = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
        auto p1  = p.add_parameter("x", s);
        auto hp1 = p.insert_instruction(
            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
        auto p2  = p.add_parameter("y", s);
        auto hp2 = p.insert_instruction(
            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
Shucai Xiao's avatar
Shucai Xiao committed
167
        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
168
        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
Shucai Xiao's avatar
Shucai Xiao committed
169
        auto hres  = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
170
171
172
173
174
        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);

        return p;
    };

Shucai Xiao's avatar
Shucai Xiao committed
175
176
177
178
    {
        auto p1 = create_program_float();
        auto p2 = create_program_half_add();

Shucai Xiao's avatar
Shucai Xiao committed
179
        migraphx::quantize_fp16(p1, {"add"});
Shucai Xiao's avatar
Shucai Xiao committed
180
181
182
183
184
185
186
        EXPECT(p1 == p2);
    }

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half_sub();

Shucai Xiao's avatar
Shucai Xiao committed
187
        migraphx::quantize_fp16(p1, {"sub"});
Shucai Xiao's avatar
Shucai Xiao committed
188
189
        EXPECT(p1 == p2);
    }
190
191
192
193
194

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half_all();

Shucai Xiao's avatar
Shucai Xiao committed
195
        migraphx::quantize_fp16(p1);
196
        migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
Shucai Xiao's avatar
Shucai Xiao committed
197

198
199
        EXPECT(p1 == p2);
    }
Shucai Xiao's avatar
Shucai Xiao committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
}

TEST_CASE(literal_add)
{
    auto create_program_float = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
        std::vector<float> data(2 * 3);
        std::iota(data.begin(), data.end(), 1.0f);
        auto l1 = p.add_literal(migraphx::literal(s, data));
        auto l2 = p.add_literal(migraphx::literal(s, data));
        p.add_instruction(migraphx::op::add{}, l1, l2);

        return p;
    };

    auto create_program_half = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::half_type, {2, 3}};
        std::vector<migraphx::half> data(2 * 3);
        std::iota(data.begin(), data.end(), 1.0f);
        auto l1 = p.add_literal(migraphx::literal(s, data));
        auto l2 = p.add_literal(migraphx::literal(s, data));
        auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);

        return p;
    };

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half();

Shucai Xiao's avatar
Shucai Xiao committed
233
        migraphx::quantize_fp16(p1, {"all"});
Shucai Xiao's avatar
Shucai Xiao committed
234
235
236
237
        migraphx::run_passes(p1,
                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
        migraphx::run_passes(p2,
                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
Shucai Xiao's avatar
Shucai Xiao committed
238
239
240
241
242
243
244
245

        EXPECT(p1 == p2);
    }

    {
        auto p1 = create_program_float();
        auto p2 = create_program_half();

Shucai Xiao's avatar
Shucai Xiao committed
246
        migraphx::quantize_fp16(p1, {"add"});
Shucai Xiao's avatar
Shucai Xiao committed
247
248
249
250
        migraphx::run_passes(p1,
                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
        migraphx::run_passes(p2,
                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
Shucai Xiao's avatar
Shucai Xiao committed
251
252
253
254
        EXPECT(p1 == p2);
    }
}

255
256
TEST_CASE(op_capture)
{
Shucai Xiao's avatar
Shucai Xiao committed
257
    auto test_func = [&](std::size_t ins_index, const std::vector<migraphx::argument>& args) {
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
        (void)ins_index;
        (void)args;
    };

    auto create_program_float = [] {
        migraphx::program p;
        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};

        auto p1 = p.add_parameter("x", s1);
        auto p2 = p.add_parameter("y", s1);
        auto pb = p.add_parameter("b", s2);
        auto pc = p.add_parameter("c", s2);
        auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
        auto ps = p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
        p.add_instruction(migraphx::op::dot{}, pa, ps);

        return p;
    };

    auto create_program_op = [&] {
        migraphx::program p;
        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};

Shucai Xiao's avatar
Shucai Xiao committed
283
284
285
286
287
        auto p1  = p.add_parameter("x", s1);
        auto p2  = p.add_parameter("y", s1);
        auto pb  = p.add_parameter("b", s2);
        auto pc  = p.add_parameter("c", s2);
        auto pa  = p.add_instruction(migraphx::op::add{}, p1, p2);
288
289
290
        auto opb = p.insert_instruction(std::next(pb), migraphx::op::capture{1, test_func}, pb);
        auto opc = p.insert_instruction(std::next(pc), migraphx::op::capture{2, test_func}, pc);
        auto opa = p.add_instruction(migraphx::op::capture{0, test_func}, pa);
Shucai Xiao's avatar
Shucai Xiao committed
291
        auto ps  = p.add_instruction(migraphx::op::dot{}, opa, opb, opc);
292
293
294
295
296
297
298
        auto ops = p.add_instruction(migraphx::op::capture{3, test_func}, ps);
        p.add_instruction(migraphx::op::dot{}, opa, ops);

        return p;
    };

    {
Shucai Xiao's avatar
Shucai Xiao committed
299
300
        auto p             = create_program_float();
        auto op_capture_p  = create_program_op();
301
        migraphx::target t = migraphx::ref::target{};
Shucai Xiao's avatar
Shucai Xiao committed
302
        migraphx::capture_arguments(p, t, {"dot", "convolution"});
303
304
305
306
        EXPECT(p == op_capture_p);
    }
}

307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
TEST_CASE(dot_float)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);

        p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb, pc);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfa(sa.elements(), 0.1f);
        auto fa = p.add_literal(migraphx::literal(sa, vfa));
        auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
Shucai Xiao's avatar
Shucai Xiao committed
335
        auto ra = p.add_instruction(migraphx::op::round{}, ma);
kahmed10's avatar
kahmed10 committed
336
        auto ca = create_clip_op(p, 127.0f, -128.0f, ra);
Shucai Xiao's avatar
Shucai Xiao committed
337
        auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
338
339
340
341
342
343
344

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal(sb, vfb));
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
345
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
346
347
348
349
350
351
352
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
        auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
        std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
        auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
353
        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
354
        std::vector<float> v_beta(pc->get_shape().elements(), 1.5f);
Shucai Xiao's avatar
Shucai Xiao committed
355
        auto beta   = p.add_literal(migraphx::literal(pc->get_shape(), v_beta));
356
357
358
359
360
361
362
363
364
        auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, pc);
        p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{
        {0.1f, 0.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
365
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
366
367
    migraphx::run_passes(p, {migraphx::dead_code_elimination{}});

368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

TEST_CASE(dot_double_2args)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);

        p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::double_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfa(sa.elements(), 0.1f);
        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
Shucai Xiao's avatar
Shucai Xiao committed
397
398
399
        auto fa  = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
        auto ma  = p.add_instruction(migraphx::op::mul{}, fa, fpa);
        auto ra  = p.add_instruction(migraphx::op::round{}, ma);
kahmed10's avatar
kahmed10 committed
400
        auto ca  = create_clip_op(p, 127.0f, -128.0f, ra);
Shucai Xiao's avatar
Shucai Xiao committed
401
        auto qa  = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
402
403
404

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
Shucai Xiao's avatar
Shucai Xiao committed
405
406
        auto fpb        = p.insert_instruction(
            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
407
408
409
410
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, fpb);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
411
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
412
413
414
415
416
417
418
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
        auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
        std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
        auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
419
        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
420
421
422
423
424
425
        p.add_instruction(migraphx::op::convert{migraphx::shape::double_type}, alpha_ab);

        return p;
    };

    auto p = create_program();
Shucai Xiao's avatar
Shucai Xiao committed
426
    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
427
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

TEST_CASE(dot_large_alpha_beta_float)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);

        p.add_instruction(migraphx::op::dot{20.0f, 50.5f}, pa, pb, pc);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfa(sa.elements(), 0.1f);
        auto fa = p.add_literal(migraphx::literal(sa, vfa));
        auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
        // add the shift
        std::vector<float> vsa(sa.elements(), 1.0f);
        auto sfta = p.add_literal(migraphx::literal(sa, vsa));
        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
kahmed10's avatar
kahmed10 committed
466
        auto ca   = create_clip_op(p, 127.0f, -128.0f, ra);
467
468
469
470
471
472
473
474
        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal(sb, vfb));
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
475
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        // quantize parameter c to int32 type
        auto qc = p.insert_instruction(
            std::next(pc), migraphx::op::convert{migraphx::shape::int32_type}, pc);

        auto qdot = p.add_instruction(migraphx::op::quant_dot{2000, 51}, qa, qb, qc);
        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{
        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
492
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

TEST_CASE(dot_large_alpha_beta_int32)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);

        p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pb, pc);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfa(sa.elements(), 0.1f);
        auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
        auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
Shucai Xiao's avatar
Shucai Xiao committed
526
        auto ma     = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
527
528
529
530
531
532

        // add the shift
        std::vector<float> vsa(sa.elements(), 1.0f);
        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
kahmed10's avatar
kahmed10 committed
533
        auto ca   = create_clip_op(p, 127.0f, -128.0f, ra);
534
535
536
537
538
539
        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
Shucai Xiao's avatar
Shucai Xiao committed
540
541
        auto conv_b = p.insert_instruction(
            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
542
543
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
544
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
545
546
547
548
549
550
551
552
553
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        p.add_instruction(migraphx::op::quant_dot{2000, 50}, qa, qb, pc);

        return p;
    };

    auto p = create_program();
Shucai Xiao's avatar
Shucai Xiao committed
554
555
    const std::vector<std::pair<float, float>>& quant_params{
        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
556
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
557
558
559
560
561
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
TEST_CASE(dot_int32_one_arg)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
        auto pa = p.add_parameter("a", s);

        p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pa);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
        auto pa = p.add_parameter("a", s);

        // add the shift
        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
        std::vector<float> vsa(s.elements(), 1.0f);
        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, s.lens()}, vsa));
        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, fpa);
        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
kahmed10's avatar
kahmed10 committed
585
        auto ca   = create_clip_op(p, 127.0f, -128.0f, ra);
586
587
588
589
590
591
        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);

        auto q_dot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qa);
        auto f_dot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_dot);
        std::vector<float> v_alpha(f_dot->get_shape().elements(), 20.0f);
        auto new_alpha = p.add_literal(migraphx::literal{f_dot->get_shape(), v_alpha});
Shucai Xiao's avatar
Shucai Xiao committed
592
        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, f_dot);
593
594
595
596
597
598
599
        p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, alpha_ab);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{{1.0f, 1.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
600
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
601
602
603
604
605
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

606
607
TEST_CASE(dot_int32)
{
608
    auto create_program = [](bool add_return = false) {
609
610
611
612
613
614
615
616
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);

617
618
619
620
621
        auto res = p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, pa, pb, pc);
        if(add_return)
        {
            p.add_return({res});
        }
622
623
624
625

        return p;
    };

626
    auto create_int8_quantized_prog = [](bool add_return = false) {
627
628
629
630
631
632
633
634
635
636
637
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfa(sa.elements(), 0.1f);
        auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
        auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
Shucai Xiao's avatar
Shucai Xiao committed
638
        auto ma     = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
639
640
641
642
643
644

        // add the shift
        std::vector<float> vsa(sa.elements(), 1.0f);
        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
kahmed10's avatar
kahmed10 committed
645
        auto ca   = create_clip_op(p, 127.0f, -128.0f, ra);
646
647
648
649
650
651
        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
Shucai Xiao's avatar
Shucai Xiao committed
652
653
        auto conv_b = p.insert_instruction(
            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
654
655
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
656
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
657
658
659
660
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
Shucai Xiao's avatar
Shucai Xiao committed
661
        auto fr   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
662
663
        std::vector<float> v_alpha(fr->get_shape().elements(), 20.0f);
        auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
664
665
        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fr);
        auto fc        = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pc);
666
        std::vector<float> v_beta(fc->get_shape().elements(), 5.5f);
Shucai Xiao's avatar
Shucai Xiao committed
667
        auto beta   = p.add_literal(migraphx::literal(fc->get_shape(), v_beta));
668
        auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, fc);
Shucai Xiao's avatar
Shucai Xiao committed
669
        auto f_res  = p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);
670
671
672
673
674
        auto res    = p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, f_res);
        if(add_return)
        {
            p.add_return({res});
        }
675
676
677
678
679

        return p;
    };

    auto p = create_program();
Shucai Xiao's avatar
Shucai Xiao committed
680
681
    const std::vector<std::pair<float, float>>& quant_params{
        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
682
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
683
684
    auto qp = create_int8_quantized_prog();
    EXPECT(p == qp);
685
686
687
688
689

    auto p_ret = create_program(true);
    migraphx::quantize_int8_impl(p_ret, quant_params, {"dot"});
    auto qp_ret = create_int8_quantized_prog(true);
    EXPECT(p_ret == qp_ret);
690
691
}

692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
TEST_CASE(dot_float_convert)
{
    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);

        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
        p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, fpa, pb);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pb);
        std::vector<float> vfb(sb.elements(), 0.1f);
        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
kahmed10's avatar
kahmed10 committed
720
        auto cb = create_clip_op(insert_loc, p, 127.0f, -128.0f, rb);
721
722
723
724
725
726
727
728
729
730
731
732
733
        auto qb =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);

        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, pa, qb);
        auto fr   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
        std::vector<float> v_alpha(fr->get_shape().elements(), 10.0f);
        auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
        p.add_instruction(migraphx::op::mul{}, new_alpha, fr);

        return p;
    };

    auto p = create_program();
Shucai Xiao's avatar
Shucai Xiao committed
734
    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 1.0f}, {0.1f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
735
    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
736
737
738
739
740
741
    migraphx::run_passes(p, {migraphx::dead_code_elimination{}});
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

742
743
TEST_CASE(conv_float)
{
Shucai Xiao's avatar
Shucai Xiao committed
744
    auto create_program = [] {
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
        migraphx::program p;
        auto input =
            p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
        auto weights =
            p.add_parameter("w", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
        p.add_instruction(migraphx::op::convolution{}, input, weights);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sx{migraphx::shape::float_type, {4, 3, 3, 3}};
        migraphx::shape sw{migraphx::shape::float_type, {4, 3, 3, 3}};
        auto px = p.add_parameter("x", sx);
        auto pw = p.add_parameter("w", sw);
        // quantize parameter a to int8 type, multiply the scale
        std::vector<float> vfx(sx.elements(), 0.1f);
Shucai Xiao's avatar
Shucai Xiao committed
763
764
765
        auto fx = p.add_literal(migraphx::literal(sx, vfx));
        auto mx = p.add_instruction(migraphx::op::mul{}, fx, px);
        auto rx = p.add_instruction(migraphx::op::round{}, mx);
kahmed10's avatar
kahmed10 committed
766
        auto cx = create_clip_op(p, 127.0f, -128.0f, rx);
Shucai Xiao's avatar
Shucai Xiao committed
767
        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
768
769
770
771
772
773
774

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pw);
        std::vector<float> vfw(sw.elements(), 0.1f);
        auto fw = p.add_literal(migraphx::literal(sw, vfw));
        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, pw);
        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
kahmed10's avatar
kahmed10 committed
775
        auto cw = create_clip_op(insert_loc, p, 127.0f, -128.0f, rw);
776
777
778
779
780
781
782
783
784
785
786
787
788
789
        auto qw =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);

        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
        auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
        std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
        auto adj = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
        p.add_instruction(migraphx::op::mul{}, adj, f_conv);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
790
    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
791
792
793
794
795
796
797
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

TEST_CASE(conv_int32)
{
Shucai Xiao's avatar
Shucai Xiao committed
798
    auto create_program = [] {
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
        migraphx::program p;
        auto input =
            p.add_parameter("x", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
        auto weights =
            p.add_parameter("w", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
        p.add_instruction(migraphx::op::convolution{}, input, weights);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sx{migraphx::shape::int32_type, {4, 3, 3, 3}};
        migraphx::shape sw{migraphx::shape::int32_type, {4, 3, 3, 3}};
        auto px = p.add_parameter("x", sx);
        auto pw = p.add_parameter("w", sw);
        // quantize parameter a to int8 type, multiply the scale
        auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
        std::vector<float> vfx(sx.elements(), 0.1f);
Shucai Xiao's avatar
Shucai Xiao committed
818
819
820
        auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
        auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
        auto rx = p.add_instruction(migraphx::op::round{}, mx);
kahmed10's avatar
kahmed10 committed
821
        auto cx = create_clip_op(p, 127.0f, -128.0f, rx);
Shucai Xiao's avatar
Shucai Xiao committed
822
        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
823
824
825

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pw);
Shucai Xiao's avatar
Shucai Xiao committed
826
827
        auto fpw        = p.insert_instruction(
            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
828
829
830
831
        std::vector<float> vfw(sw.elements(), 0.1f);
        auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
kahmed10's avatar
kahmed10 committed
832
        auto cw = create_clip_op(insert_loc, p, 127.0f, -128.0f, rw);
833
834
835
836
837
838
839
840
841
842
843
844
845
        auto qw =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);

        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
        std::vector<float> v_adj(q_conv->get_shape().elements(), 100.0f);
        auto adj = p.add_literal(migraphx::literal(q_conv->get_shape(), v_adj));
        p.add_instruction(migraphx::op::mul{}, q_conv, adj);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
846
    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
847
848
849
850
851
852
853
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

TEST_CASE(conv_half)
{
Shucai Xiao's avatar
Shucai Xiao committed
854
    auto create_program = [] {
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
        migraphx::program p;
        auto input =
            p.add_parameter("x", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
        auto weights =
            p.add_parameter("w", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
        p.add_instruction(migraphx::op::convolution{}, input, weights);

        return p;
    };

    auto create_int8_quantized_prog = [] {
        migraphx::program p;
        migraphx::shape sx{migraphx::shape::half_type, {4, 3, 3, 3}};
        migraphx::shape sw{migraphx::shape::half_type, {4, 3, 3, 3}};
        auto px = p.add_parameter("x", sx);
        auto pw = p.add_parameter("w", sw);
        // quantize parameter a to int8 type, multiply the scale
        auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
        std::vector<float> vfx(sx.elements(), 0.1f);
Shucai Xiao's avatar
Shucai Xiao committed
874
875
876
        auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
        auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
        auto rx = p.add_instruction(migraphx::op::round{}, mx);
kahmed10's avatar
kahmed10 committed
877
        auto cx = create_clip_op(p, 127.0f, -128.0f, rx);
Shucai Xiao's avatar
Shucai Xiao committed
878
        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
879
880
881

        // quantize parameter b to int8 type
        auto insert_loc = std::next(pw);
Shucai Xiao's avatar
Shucai Xiao committed
882
883
        auto fpw        = p.insert_instruction(
            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
884
885
886
887
        std::vector<float> vfw(sw.elements(), 0.1f);
        auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
kahmed10's avatar
kahmed10 committed
888
        auto cw = create_clip_op(insert_loc, p, 127.0f, -128.0f, rw);
889
890
891
892
893
894
        auto qw =
            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);

        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
        auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
        std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
Shucai Xiao's avatar
Shucai Xiao committed
895
        auto adj   = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
896
897
898
899
900
901
902
903
        auto f_res = p.add_instruction(migraphx::op::mul{}, adj, f_conv);
        p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, f_res);

        return p;
    };

    auto p = create_program();
    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
904
    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
905
906
907
908
909
    auto qp = create_int8_quantized_prog();

    EXPECT(p == qp);
}

910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
TEST_CASE(target_copy)
{
    auto run_prog = [](migraphx::program p,
                       const migraphx::target& t,
                       migraphx::program::parameter_map& m_in,
                       std::vector<float>& res) {
        p.compile(t);
        migraphx::program::parameter_map m;
        for(auto&& x : p.get_parameter_shapes())
        {
            if(m_in.count(x.first) > 0)
            {
                m[x.first] = t.copy_to(m_in[x.first]);
            }
            else
            {
                m[x.first] = t.allocate(x.second);
            }
        }

930
        auto result = t.copy_from(p.eval(m).back());
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
    };

    auto create_program = [] {
        migraphx::program p;
        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
        auto p1 = p.add_parameter("x", s);
        auto p2 = p.add_parameter("y", s);
        p.add_instruction(migraphx::op::add{}, p1, p2);

        return p;
    };

    {
        auto p = create_program();
        migraphx::program::parameter_map m;
        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
        m["x"] = migraphx::generate_argument(s);
949
950
951
        std::vector<float> ref_result;
        migraphx::target ref_t = migraphx::ref::target{};
        run_prog(p, ref_t, m, ref_result);
952
953

        std::vector<float> orig_result;
954
        run_prog(p, ref_t, m, orig_result);
955

956
        EXPECT(migraphx::verify_range(ref_result, orig_result));
957
958
959
    }
}

960
TEST_CASE(int8_quantization_dot)
961
962
963
964
965
966
{
    auto run_prog = [](migraphx::program p,
                       const migraphx::target& t,
                       migraphx::program::parameter_map& m_in,
                       std::vector<float>& res,
                       bool b_quantize = false) {
Shucai Xiao's avatar
Shucai Xiao committed
967
        if(b_quantize)
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
        {
            std::vector<migraphx::program::parameter_map> cali_data;
            cali_data.push_back(m_in);
            migraphx::quantize_int8(p, t, cali_data);
        }
        p.compile(t);
        migraphx::program::parameter_map m;
        for(auto&& x : p.get_parameter_shapes())
        {
            if(m_in.count(x.first) > 0)
            {
                m[x.first] = t.copy_to(m_in[x.first]);
            }
            else
            {
                m[x.first] = t.allocate(x.second);
            }
        }

987
        auto result = t.copy_from(p.eval(m).back());
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
    };

    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        auto pa = p.add_parameter("a", sa);
        auto pb = p.add_parameter("b", sb);
        auto pc = p.add_parameter("c", sc);
        p.add_instruction(migraphx::op::dot{}, pa, pb, pc);

        return p;
    };

    {
        auto p = create_program();
        migraphx::program::parameter_map m;
        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
        m["a"] = migraphx::generate_argument(sa);
        m["c"] = migraphx::generate_argument(sc);
        std::vector<float> quant_result;
1012
1013
        migraphx::target ref_t = migraphx::ref::target{};
        run_prog(p, ref_t, m, quant_result, true);
1014
1015

        std::vector<float> no_quant_result;
1016
        run_prog(p, ref_t, m, no_quant_result);
1017
1018
1019
1020
1021

        EXPECT(migraphx::verify_range(quant_result, no_quant_result));
    }
}

1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
TEST_CASE(int8_quantization_conv)
{
    auto run_prog = [](migraphx::program p,
                       const migraphx::target& t,
                       std::vector<float>& res,
                       bool b_quantize = false) {
        if(b_quantize)
        {
            std::vector<migraphx::program::parameter_map> cali_data;
            migraphx::quantize_int8(p, t, cali_data);
        }
        p.compile(t);
        migraphx::program::parameter_map m;

1036
        auto result = t.copy_from(p.eval(m).back());
1037
1038
1039
1040
1041
1042
1043
1044
        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
    };

    auto create_program = [] {
        migraphx::program p;
        migraphx::shape sx{migraphx::shape::float_type, {4, 2, 2, 2}};
        migraphx::shape sw{migraphx::shape::float_type, {4, 2, 2, 2}};
        std::vector<float> v(sx.elements(), 0.5f);
Shucai Xiao's avatar
Shucai Xiao committed
1045
1046
        auto input   = p.add_literal(migraphx::literal(sx, v));
        auto weights = p.add_literal(migraphx::literal(sw, v));
1047
1048
1049
1050
1051
1052
1053
1054
        p.add_instruction(migraphx::op::convolution{}, input, weights);

        return p;
    };

    {
        auto p = create_program();
        std::vector<float> quant_result;
1055
1056
        migraphx::target ref_t = migraphx::ref::target{};
        run_prog(p, ref_t, quant_result, true);
1057
1058

        std::vector<float> no_quant_result;
1059
        run_prog(p, ref_t, no_quant_result);
1060
1061
1062
1063
1064

        EXPECT(migraphx::verify_range(quant_result, no_quant_result));
    }
}

Shucai Xiao's avatar
Shucai Xiao committed
1065
int main(int argc, const char* argv[]) { test::run(argc, argv); }