schedule.cpp 21.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
Paul's avatar
Paul committed
24
25
26
27
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
28
#include <migraphx/iterator.hpp>
Paul's avatar
Paul committed
29
#include <migraphx/dfor.hpp>
30
#include <migraphx/par_for.hpp>
Paul's avatar
Paul committed
31
32
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
Paul Fultz II's avatar
Paul Fultz II committed
33
#include <migraphx/dom_info.hpp>
Paul's avatar
Paul committed
34
35
#include <unordered_map>
#include <unordered_set>
36
#include <queue>
37
38
#include <thread>
#include <mutex>
39
40
#include <migraphx/make_op.hpp>

Paul's avatar
Paul committed
41
42
#include <set>
#include <deque>
43
#include <chrono>
Paul Fultz II's avatar
Paul Fultz II committed
44
#include <iomanip>
Paul's avatar
Paul committed
45
46
47
48

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

49
50
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_SCHEDULE)

Paul's avatar
Paul committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
auto get_inputs()
{
    return [](auto i) { return i->inputs(); };
}

auto get_outputs()
{
    return [](auto i) { return i->outputs(); };
}

struct stream_info
{
    std::unordered_map<instruction_ref, std::size_t> ins2stream;
    std::unordered_map<instruction_ref, std::size_t> weights;
    std::unordered_map<instruction_ref, std::size_t> iweights;
Shucai Xiao's avatar
Shucai Xiao committed
66
67
    ins_dep_map mod_implicit_deps;

68
    void calc_implicit_deps(const module& m) { mod_implicit_deps = m.calc_implicit_deps(); }
Paul's avatar
Paul committed
69
70
71
72
73
74
75
76
77
78

    void accumulate_weights(instruction_ref last, const schedule_model& model)
    {
        fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
            if(not contains(weights, ins))
            {
                std::size_t weight = 0;
                auto&& op          = ins->get_operator();
                if(not is_context_free(op) and op.name()[0] != '@')
                    weight = model.weight(op);
79
80
81
                // This will ensure a stream will be assigned to return
                if(op.name() == "@return")
                    weight = 1;
Paul's avatar
Paul committed
82
                iweights[ins] = weight;
Shucai Xiao's avatar
Shucai Xiao committed
83
84
85
86
87
88
89
90
91
92
93
                auto inputs   = ins->inputs();
                if(contains(mod_implicit_deps, ins))
                {
                    const auto& impl_deps = mod_implicit_deps.at(ins);
                    inputs.insert(inputs.end(), impl_deps.begin(), impl_deps.end());
                }

                weights[ins] = std::accumulate(
                    inputs.begin(), inputs.end(), weight, [&](std::size_t w, instruction_ref i) {
                        return w + self(i);
                    });
Paul's avatar
Paul committed
94
95
96
97
98
            }
            return weights[ins];
        })(last);
    }

99
100
101
102
103
104
105
106
107
108
109
    template <class Compare>
    void sort_args_by_weight(std::vector<instruction_ref>& args, Compare compare) const
    {
        if(args.size() < 2)
            return;
        std::sort(args.begin(), args.end(), by(compare, [this](auto x) {
                      return std::make_tuple(
                          this->weights.at(x), x->inputs().size(), std::addressof(*x));
                  }));
    }

Paul's avatar
Paul committed
110
111
112
113
114
115
116
    std::vector<instruction_ref>::iterator sort_args(std::vector<instruction_ref>& args)
    {
        if(args.size() < 2)
        {
            return args.end();
        }

Paul Fultz II's avatar
Paul Fultz II committed
117
        const std::size_t min_partition_threshold = 2;
118
        sort_args_by_weight(args, std::greater<>{});
Paul's avatar
Paul committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

        auto it = std::lower_bound(std::next(args.begin()),
                                   args.end(),
                                   min_partition_threshold,
                                   [&](auto i, std::size_t w) { return this->weights[i] > w; });
        assert(it == args.end() or this->weights[*it] <= min_partition_threshold);
        assert(it == args.end() or std::prev(it) == args.begin() or
               this->weights[*std::prev(it)] > min_partition_threshold);
        return it;
    }

    struct partition
    {
        std::size_t weight = 0;
        std::vector<instruction_ref> instructions{};

        void add(instruction_ref ins, std::size_t w)
        {
            weight += w;
            instructions.push_back(ins);
        }
    };

142
    std::size_t assign_streams(module& m, std::size_t n)
Paul's avatar
Paul committed
143
    {
144
        assert(n > 0);
Paul's avatar
Paul committed
145
146
147
148
        partition critical;
        std::unordered_map<instruction_ref, std::deque<partition>> partitions;
        partitions.reserve(weights.size());
        fix([&](auto self, auto ins, auto& part) {
149
150
            assert(not is_end(ins, m.end()));
            if(not m.has_instruction(ins))
Shucai Xiao's avatar
Shucai Xiao committed
151
                return;
152
153
            if(contains(partitions, ins))
                return;
Shucai Xiao's avatar
Shucai Xiao committed
154

Paul's avatar
Paul committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
            // Add an entry so we know the instruction was visited
            partitions[ins];
            part.add(ins, this->iweights[ins]);

            auto args         = ins->inputs();
            auto threshold_it = this->sort_args(args);

            if(not args.empty())
            {
                assert(threshold_it != args.begin());
                self(args.front(), part);
                for(auto i : range(std::next(args.begin()), threshold_it))
                {
                    partitions[ins].emplace_back();
                    self(i, partitions[ins].back());
                }
                for(auto i : range(threshold_it, args.end()))
                {
                    self(i, part);
                }
            }
            // Sort instructions
177
178
            m.move_instruction(ins, m.end());
        })(std::prev(m.end()), critical);
Paul's avatar
Paul committed
179
180
181

        // Set the critical partition to stream 0
        set_stream(critical, 0);
182
183
184
185
186
187
188
189
190
        if(n == 1)
        {
            // Assign streams for the other partitions
            for(auto&& ins_part : partitions)
                for(auto&& part : ins_part.second)
                    set_stream(part, 0);
            return 1;
        }
        else
Paul's avatar
Paul committed
191
        {
192
193
194
            std::vector<std::size_t> streams(n - 1);
            // Assign streams for the other partitions
            for(auto&& ins_part : partitions)
Paul's avatar
Paul committed
195
            {
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
                std::sort(ins_part.second.begin(),
                          ins_part.second.end(),
                          by(std::greater<>{}, [](auto&& x) {
                              return std::make_tuple(x.weight, x.instructions.size());
                          }));
                for(auto&& part : ins_part.second)
                {
                    auto stream =
                        std::min_element(streams.begin(), streams.end()) - streams.begin();
                    set_stream(part, stream + 1);
                    streams[stream] += part.weight;
                }
            }
            return 1 + std::count_if(streams.begin(), streams.end(), [](auto x) { return x > 0; });
        }
    }

    using weight_ins = std::pair<std::size_t, instruction_ref>;
    struct compare_weight_ins
    {
        bool operator()(const weight_ins& x, const weight_ins& y) const
        {
            return std::make_pair(x.first, std::addressof(*x.second)) <
                   std::make_pair(y.first, std::addressof(*y.second));
        }
    };

223
    void sort(module& m, std::size_t)
224
225
226
    {
        std::set<weight_ins, compare_weight_ins> children;
        std::unordered_map<instruction_ref, std::size_t> visited;
227
        auto last      = std::prev(m.end());
228
        auto mw        = this->weights.at(last);
229
        auto nw        = mw / (m.size() + 1);
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
        auto add_child = [&](auto ins) {
            auto x  = 1 + (mw - this->weights.at(ins)) / (nw + 1);
            auto w  = x * this->iweights.at(ins);
            auto& v = visited[ins];
            auto it = children.find(std::make_pair(v * w, ins));
            if(it == children.end())
            {
                v++;
                children.insert(std::make_pair(v * w, ins));
            }
        };
        add_child(last);

        while(not children.empty())
        {
            // Pop the first element
            auto top = children.begin()->second;
            children.erase(children.begin());
248
            m.move_instruction(top, m.begin());
249
250
            for(auto ins : top->inputs())
            {
251
                if(not m.has_instruction(ins))
Shucai Xiao's avatar
Shucai Xiao committed
252
                    continue;
253
                add_child(ins);
Paul's avatar
Paul committed
254
            }
Shucai Xiao's avatar
Shucai Xiao committed
255
256
257
258
259

            if(contains(mod_implicit_deps, top))
            {
                for(auto ins : mod_implicit_deps.at(top))
                {
260
                    assert(m.has_instruction(ins));
Shucai Xiao's avatar
Shucai Xiao committed
261
262
263
                    add_child(ins);
                }
            }
Paul's avatar
Paul committed
264
        }
265
266
267

        // move dangling parameter to the front so as not be removed
        auto ins = std::next(last);
268
        while(ins != m.end())
269
270
271
272
        {
            auto next = std::next(ins);
            if(ins->name() == "@param")
            {
273
                m.move_instruction(ins, m.begin());
274
275
276
            }
            ins = next;
        }
Paul's avatar
Paul committed
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    }

    void set_stream(const partition& p, std::size_t n)
    {
        for(auto ins : p.instructions)
            if(iweights[ins] > 0)
                set_stream(ins, n);
    }

    void set_stream(instruction_ref ins, std::size_t n)
    {
        assert(iweights[ins] > 0);
        ins2stream[ins] = n;
    }

    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }

    bool has_stream(instruction_ref ins) const { return contains(ins2stream, ins); }

    template <class F>
    bool different(F f, std::size_t stream) const
    {
        bool result = false;
        f([&](auto s) {
            if(s != stream)
            {
                result = true;
                return false;
            }
            // cppcheck-suppress uselessAssignmentArg
            stream = s;
            return true;
        });
        return result;
    }

    template <class F>
    bool different(F f) const
    {
        bool result = false;
        f([&](auto s) {
            result = this->different(f, s);
            return false;
        });
        return result;
    }

    template <class Selector>
    auto get_streams_from(instruction_ref start, Selector select) const
    {
        return [=](auto f) {
            return fix<bool>([&](auto self, auto ins) {
329
                return all_of(select(ins), [&](auto i) {
Charlie Lin's avatar
Charlie Lin committed
330
                    if(has_stream(i))
331
                        return f(this->get_stream(i));
Charlie Lin's avatar
Charlie Lin committed
332
333
                    else
                        return self(i);
334
                });
Paul's avatar
Paul committed
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
            })(start);
        };
    }

    std::unordered_set<std::size_t> get_streams(instruction_ref ins) const
    {
        if(has_stream(ins))
            return {get_stream(ins)};
        std::unordered_set<std::size_t> result;
        get_streams_from(ins, get_inputs())([&](auto s) {
            result.insert(s);
            return true;
        });
        return result;
    }

    template <class... Ts>
    bool is_merge_point(instruction_ref ins, Ts... xs) const
    {
        return different(get_streams_from(ins, get_inputs()), xs...);
    }

    template <class... Ts>
    bool is_split_point(instruction_ref ins, Ts... xs) const
    {
        return different(get_streams_from(ins, get_outputs()), xs...);
    }

    std::vector<instruction_ref> get_recorded_instructions(instruction_ref start)
    {
        std::vector<instruction_ref> result;
        std::unordered_map<std::size_t, instruction_ref> m;
        fix([&](auto self, auto ins) {
            for(auto i : ins->inputs())
            {
                if(iweights.at(i) == 0)
                {
                    self(i);
                    continue;
                }
                auto stream = this->get_stream(i);
                if(not contains(m, stream))
                    m[stream] = i;
                else
                    m[stream] = std::min(m[stream], i, by(std::less<>{}, [&](auto x) {
                                             return std::distance(x, start);
                                         }));
            }
        })(start);
        std::transform(
            m.begin(), m.end(), std::back_inserter(result), [](auto&& p) { return p.second; });
        return result;
    }

    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
390
    find_concurrent_instructions(module& m) const
Paul's avatar
Paul committed
391
392
393
    {
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
394
395
396
397
        dominator_info di = compute_dominator(m);
        result.reserve(m.size());
        merge_from.reserve(m.size());
        for(auto ins : reverse_iterator_for(m))
Paul's avatar
Paul committed
398
399
400
        {
            for(auto&& arg : ins->outputs())
            {
401
                if(not m.has_instruction(arg))
Shucai Xiao's avatar
Shucai Xiao committed
402
                    continue;
Paul's avatar
Paul committed
403
404
405
406
407
                if(is_merge_point(arg))
                    merge_from[ins].insert(arg);
                merge_from[ins].insert(merge_from[arg].begin(), merge_from[arg].end());
            }

Paul Fultz II's avatar
Paul Fultz II committed
408
409
410
411
412
            if(is_split_point(ins))
            {
                erase_if(merge_from[ins],
                         [&](auto merge) { return di.strictly_dominate(ins, merge); });
            }
Paul's avatar
Paul committed
413

Paul Fultz II's avatar
Paul Fultz II committed
414
            auto streams = this->get_streams(ins);
Paul's avatar
Paul committed
415
            // Collect concur instructions for each merge point.
416
            for(const auto& merge : merge_from[ins])
Paul's avatar
Paul committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
            {
                for(auto stream : streams)
                {
                    if(result[merge].size() <= stream)
                        result[merge].resize(stream + 1);
                    auto&& r = result[merge][stream];
                    r.push_back(ins);
                    // Copy inputs if they dont have a stream(and are not a builtin and context
                    // free). Inputs without a stream can have a implicit dependency
                    std::copy_if(ins->inputs().begin(),
                                 ins->inputs().end(),
                                 std::back_inserter(r),
                                 [&](auto x) {
                                     return not this->has_stream(x) and
                                            not is_context_free(x->get_operator()) and
                                            x->name().front() != '@';
                                 });
                }
            }
        }
        return result;
    }

    std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>
441
    get_conflicts(module& m)
Paul's avatar
Paul committed
442
    {
Paul Fultz II's avatar
Paul Fultz II committed
443

444
445
446
        using conflict_table_type =
            std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>;
        conflict_table_type conflict_table;
447
        auto concur_ins = this->find_concurrent_instructions(m);
448

Paul Fultz II's avatar
Paul Fultz II committed
449
450
451
        // Compute an index for each instruction
        std::unordered_map<instruction_ref, std::size_t> ins2index;
        std::size_t index_total = 0;
452
        for(auto ins : iterator_for(m))
Paul Fultz II's avatar
Paul Fultz II committed
453
454
            ins2index[ins] = index_total++;

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
        std::vector<conflict_table_type> thread_conflict_tables(
            std::thread::hardware_concurrency());
        std::vector<instruction_ref> index_to_ins;
        index_to_ins.reserve(concur_ins.size());
        std::transform(concur_ins.begin(),
                       concur_ins.end(),
                       std::back_inserter(index_to_ins),
                       [](auto&& it) { return it.first; });

        par_for(concur_ins.size(), [&](auto ins_index, auto tid) {
            auto merge_first = index_to_ins[ins_index];
            assert(concur_ins.count(merge_first) > 0);
            auto& merge_second = concur_ins.at(merge_first);

            // ensure there are enough elements for different threads
            assert(tid < thread_conflict_tables.size());
            auto& thrd_table = thread_conflict_tables.at(tid);

            std::unordered_set<instruction_ref> checked_ins_set;
            auto range_i = range(merge_second.begin(), std::prev(merge_second.end()));
            for(auto it_i : iterator_for(range_i))
            {
                std::unordered_set<instruction_ref> ins1_set;
                std::copy_if(it_i->begin(),
                             it_i->end(),
                             std::inserter(ins1_set, ins1_set.end()),
                             [&](auto i) { return not contains(checked_ins_set, i); });
                checked_ins_set.insert(ins1_set.begin(), ins1_set.end());

                auto range_j = range(std::next(it_i), merge_second.end());
                std::unordered_set<instruction_ref> ins2_set;
                for(auto it_j : iterator_for(range_j))
                {
                    std::copy_if(it_j->begin(),
                                 it_j->end(),
                                 std::inserter(ins2_set, ins2_set.end()),
                                 [&](auto i) { return not contains(checked_ins_set, i); });
                }

                for(auto ins1 : ins1_set)
Paul's avatar
Paul committed
495
                {
Paul Fultz II's avatar
Paul Fultz II committed
496
                    auto p1 = ins2index.at(ins1);
497
                    for(auto ins2 : ins2_set)
Paul's avatar
Paul committed
498
499
500
                    {
                        if(ins1 == ins2)
                            continue;
Paul Fultz II's avatar
Paul Fultz II committed
501
502
                        auto p2 = ins2index.at(ins2);
                        if(p2 > p1)
503
                            thrd_table[ins2].insert(ins1);
Paul's avatar
Paul committed
504
                        else
505
                            thrd_table[ins1].insert(ins2);
Paul's avatar
Paul committed
506
507
                    }
                }
508
509
510
511
512
513
514
515
516
517
            }
        });

        // merge thread_conflict_tables together
        for(auto& tbl : thread_conflict_tables)
        {
            for(auto& it : tbl)
            {
                conflict_table[it.first].insert(it.second.begin(), it.second.end());
            }
Paul's avatar
Paul committed
518
        }
519
520

        // Remove instructions from the conflict table of an ealier instruction
Paul's avatar
Paul committed
521
522
523
524
525
526
527
        for(auto&& ip : conflict_table)
        {
            auto ins1 = ip.first;
            for(auto ins2 : ip.second)
                if(contains(conflict_table[ins2], ins1))
                    conflict_table[ins2].erase(ins1);
        }
528

Paul's avatar
Paul committed
529
530
531
532
        return conflict_table;
    }
};

533
void schedule::apply(module& m) const
Paul's avatar
Paul committed
534
{
Paul's avatar
Paul committed
535
    if(not enable)
Paul's avatar
Paul committed
536
        return;
Shucai Xiao's avatar
Shucai Xiao committed
537

Paul's avatar
Paul committed
538
    stream_info si;
539
540
    si.calc_implicit_deps(m);
    auto last = std::prev(m.end());
Paul's avatar
Paul committed
541
    si.accumulate_weights(last, model);
542
543
    auto nstreams = si.assign_streams(m, model.concurrency());
    si.sort(m, model.concurrency());
Paul's avatar
Paul committed
544

545
    if(enabled(MIGRAPHX_TRACE_COMPILE{}) or enabled(MIGRAPHX_TRACE_SCHEDULE{}))
Paul's avatar
Paul committed
546
    {
547
        m.annotate(std::cout, [&](auto ins) {
548
549
550
            if(ins->name() == "@param" and not contains(si.weights, ins))
                return;

Paul's avatar
Paul committed
551
552
553
554
555
556
557
558
559
560
561
562
563
564
            std::cout << ":";
            std::cout << " weight=" << si.weights.at(ins);
            std::cout << " input={";
            si.get_streams_from(ins, get_inputs())([&](auto s) {
                std::cout << s << ",";
                return true;
            });
            std::cout << "}";
            if(si.has_stream(ins))
                std::cout << " stream=" << si.get_stream(ins);
        });
        std::cout << std::endl;
    }

565
566
567
568
    // No concurrency
    if(nstreams < 2)
        return;

Paul's avatar
Paul committed
569
570
571
572
573
    // Schedule instructions
    std::size_t wait_id = 0;
    std::unordered_map<instruction_ref, std::size_t> ins2wait;
    std::unordered_map<std::size_t, std::unordered_set<std::size_t>> waited_for;
    std::unordered_map<instruction_ref, std::unordered_set<std::size_t>> ins2waited;
574
575
576
    ins2wait.reserve(m.size());
    ins2waited.reserve(m.size());
    for(auto ins : iterator_for(m))
Paul's avatar
Paul committed
577
578
579
580
581
582
583
584
    {
        // Only schedule instructions that have a stream
        if(not si.has_stream(ins))
            continue;
        assert(si.weights[ins] > 0);
        // Schedule instruction on the stream
        auto stream = si.get_stream(ins);
        assert(stream < model.concurrency());
585
        model.sched(m, ins, stream);
Paul's avatar
Paul committed
586
587
588
589
590
        // Insert wait instructions
        if(si.is_merge_point(ins, stream))
        {
            for(auto i : si.get_recorded_instructions(ins))
            {
591
                if(not si.has_stream(i) or si.get_stream(i) == stream)
Paul's avatar
Paul committed
592
                    continue;
593

Paul's avatar
Paul committed
594
595
596
597
                // Create a new event if it hasn't been recorded
                if(not contains(ins2wait, i))
                {
                    ins2wait[i] = wait_id;
598
                    model.record(m, i, wait_id);
Paul's avatar
Paul committed
599
600
601
602
603
604
                    wait_id++;
                }
                auto w = ins2wait.at(i);
                // If we already waited for the event on this stream then dont
                // insert another wait event
                if(not contains(waited_for[stream], w))
605
                    model.wait(m, ins, w);
Paul's avatar
Paul committed
606
607
608
609
610
611
612
613
614
615
616
617
618
619
                // Store the event as waited
                waited_for[stream].insert(w);
                // Store all wait events that have been waited on prior to the recorded instruction
                waited_for[stream].insert(ins2waited[i].begin(), ins2waited[i].end());
            }
        }
        // Store wait events that have already been waited on
        if(si.is_split_point(ins, stream))
        {
            ins2waited[ins] = waited_for[stream];
        }
    }

    // Add memory conflicts
620
    auto conflict_table = si.get_conflicts(m);
Paul's avatar
Paul committed
621
622
623
624
625
626
627
    for(auto&& ip : conflict_table)
    {
        if(ip.second.empty())
            continue;
        std::vector<instruction_ref> args;
        args.push_back(ip.first);
        args.insert(args.end(), ip.second.begin(), ip.second.end());
628
        m.insert_instruction(std::next(ip.first), make_op("identity"), args);
Paul's avatar
Paul committed
629
630
631
632
633
    }
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx