schedule.cpp 13.4 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
Paul's avatar
Paul committed
4
#include <migraphx/operators.hpp>
Paul's avatar
Paul committed
5
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
6
#include <migraphx/dfor.hpp>
Paul's avatar
Paul committed
7
8
9
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
Paul's avatar
Paul committed
10
#include <unordered_set>
11
#include <set>
Paul's avatar
Paul committed
12
#include <deque>
Paul's avatar
Paul committed
13
14
15
16

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Paul's avatar
Paul committed
17
18
19
20
21
22
23
24
25
26
auto get_inputs()
{
    return [](auto i) { return i->inputs(); };
}

auto get_outputs()
{
    return [](auto i) { return i->outputs(); };
}

27
28
29
struct stream_info
{
    std::unordered_map<instruction_ref, std::size_t> ins2stream;
Paul's avatar
Paul committed
30
    std::unordered_map<instruction_ref, std::size_t> weights;
31
    std::unordered_map<instruction_ref, std::size_t> iweights;
Paul's avatar
Paul committed
32
33
34
35

    void accumulate_weights(instruction_ref last, const schedule_model& model)
    {
        fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
Paul's avatar
Paul committed
36
            if(not contains(weights, ins))
Paul's avatar
Paul committed
37
            {
Paul's avatar
Paul committed
38
                std::size_t weight = 0;
Paul's avatar
Paul committed
39
                auto&& op          = ins->get_operator();
Paul's avatar
Paul committed
40
41
                if(not is_context_free(op) and op.name()[0] != '@')
                    weight = model.weight(op);
42
                iweights[ins] = weight;
Paul's avatar
Paul committed
43
44
45
                weights[ins] =
                    std::accumulate(ins->inputs().begin(),
                                    ins->inputs().end(),
Paul's avatar
Paul committed
46
                                    weight,
Paul's avatar
Paul committed
47
48
49
50
51
52
                                    [&](std::size_t w, instruction_ref i) { return w + self(i); });
            }
            return weights[ins];
        })(last);
    }

Paul's avatar
Paul committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    std::vector<instruction_ref>::iterator sort_args(std::vector<instruction_ref>& args)
    {
        const std::size_t min_partition_threshold = 2;
        auto compare = by(std::less<>{}, [&](auto x) {
                      return std::make_tuple(this->weights[x], x->inputs().size());
                  });
        if (args.size() < 2)
        {
            return args.end();
        }
        else if (args.size() == 2)
        {
            auto w1 = this->weights[args[0]];
            auto w2 = this->weights[args[1]];
            if (std::make_tuple(w1, args[0]->inputs().size()) > std::make_tuple(w2, args[1]->inputs().size()))
            {
                std::swap(args[0], args[1]);
                std::swap(w1, w2);
            }
            if (w1 > min_partition_threshold)
                return args.begin();
            if (w2 > min_partition_threshold)
                return args.begin()+1;
            return args.end();
        }

        std::sort(args.begin(), args.end(), compare);

        return std::upper_bound(args.begin(), args.end(), min_partition_threshold, [&](std::size_t w, auto i) {
            return w < this->weights[i];
        });
    }

Paul's avatar
Paul committed
86
    struct partition
Paul's avatar
Paul committed
87
    {
Paul's avatar
Paul committed
88
89
90
91
        std::size_t weight = 0;
        std::vector<instruction_ref> instructions{};

        void add(instruction_ref ins, std::size_t w)
Paul's avatar
Paul committed
92
        {
Paul's avatar
Paul committed
93
94
95
96
97
98
99
100
101
            weight += w;
            instructions.push_back(ins);
        }
    };

    void assign_streams(program& p, std::size_t n)
    {
        partition critical;
        std::unordered_map<instruction_ref, std::deque<partition>> partitions;
Paul's avatar
Paul committed
102
        partitions.reserve(weights.size());
Paul's avatar
Paul committed
103
        fix([&](auto self, auto ins, auto& part) {
Paul's avatar
Paul committed
104
            if (contains(partitions, ins))
Paul's avatar
Paul committed
105
                return;
Paul's avatar
Paul committed
106
            partitions[ins];
Paul's avatar
Paul committed
107
108
            part.add(ins, this->iweights[ins]);

Paul's avatar
Paul committed
109
110
111
            auto args = ins->inputs();
            auto threshold_it = sort_args(args);
            for(auto i:range(args.begin(), threshold_it))
Paul's avatar
Paul committed
112
            {
Paul's avatar
Paul committed
113
114
115
116
117
                self(i, part);
            }
            for(auto i:range(threshold_it, args.end()))
            {
                if (i == args.back())
Paul's avatar
Paul committed
118
                {
Paul's avatar
Paul committed
119
                    self(i, part);
Paul's avatar
Paul committed
120
                }
Paul's avatar
Paul committed
121
122
123
124
125
126
                else
                {
                    partitions[ins].emplace_back();
                    self(i, partitions[ins].back());
                }
            }
Paul's avatar
Paul committed
127
128
            // Sort instructions
            p.move_instruction(ins, p.end());
Paul's avatar
Paul committed
129
130
131
132
        })(std::prev(p.end()), critical);

        // Set the critical partition to stream 0
        set_stream(critical, 0);
Paul's avatar
Paul committed
133
        std::vector<std::size_t> streams(n - 1);
Paul's avatar
Paul committed
134
        // Assign streams for the other partitions
Paul's avatar
Paul committed
135
        for(auto&& ins_part : partitions)
Paul's avatar
Paul committed
136
        {
Paul's avatar
Paul committed
137
138
139
140
141
            std::sort(
                ins_part.second.begin(), ins_part.second.end(), by(std::greater<>{}, [](auto&& x) {
                    return std::make_tuple(x.weight, x.instructions.size());
                }));
            for(auto&& part : ins_part.second)
Paul's avatar
Paul committed
142
143
            {
                auto stream = std::min_element(streams.begin(), streams.end()) - streams.begin();
Paul's avatar
Paul committed
144
                set_stream(part, stream + 1);
Paul's avatar
Paul committed
145
146
                streams[stream] += part.weight;
            }
Paul's avatar
Paul committed
147
148
        }
    }
149

Paul's avatar
Paul committed
150
151
    void set_stream(const partition& p, std::size_t n)
    {
Paul's avatar
Paul committed
152
153
        for(auto ins : p.instructions)
            if(iweights[ins] > 0)
Paul's avatar
Paul committed
154
155
156
                set_stream(ins, n);
    }

Paul's avatar
Paul committed
157
    void set_stream(instruction_ref ins, std::size_t n)
158
    {
Paul's avatar
Paul committed
159
160
        assert(iweights[ins] > 0);
        ins2stream[ins] = n;
161
    }
162

Paul's avatar
Paul committed
163
    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
164

Paul's avatar
Paul committed
165
    bool has_stream(instruction_ref ins) const { return contains(ins2stream, ins); }
166

Paul's avatar
Paul committed
167
    template <class F>
Paul's avatar
Paul committed
168
    bool different(F f, std::size_t stream) const
Paul's avatar
Paul committed
169
    {
Paul's avatar
Paul committed
170
        bool result = false;
Paul's avatar
Paul committed
171
        f([&](auto s) {
Paul's avatar
Paul committed
172
            if(s != stream)
Paul's avatar
Paul committed
173
            {
Paul's avatar
Paul committed
174
175
                result = true;
                return false;
Paul's avatar
Paul committed
176
            }
Paul's avatar
Paul committed
177
            // cppcheck-suppress uselessAssignmentArg
Paul's avatar
Paul committed
178
179
180
            stream = s;
            return true;
        });
Paul's avatar
Paul committed
181
182
        return result;
    }
183

Paul's avatar
Paul committed
184
185
186
187
188
189
190
191
192
193
194
    template <class F>
    bool different(F f) const
    {
        bool result = false;
        f([&](auto s) {
            result = different(f, s);
            return false;
        });
        return result;
    }

Paul's avatar
Paul committed
195
    template <class Selector>
Paul's avatar
Paul committed
196
    auto get_streams_from(instruction_ref start, Selector select) const
Paul's avatar
Paul committed
197
198
199
200
201
    {
        return [=](auto f) {
            return fix<bool>([&](auto self, auto ins) {
                for(auto i : select(ins))
                {
202
                    if(iweights.at(i) == 0)
Paul's avatar
Paul committed
203
                    {
Paul's avatar
Paul committed
204
                        if(not self(i))
Paul's avatar
Paul committed
205
206
207
208
                            return false;
                    }
                    else
                    {
Paul's avatar
Paul committed
209
                        if(not f(get_stream(i)))
Paul's avatar
Paul committed
210
211
212
213
214
215
216
217
                            return false;
                    }
                }
                return true;
            })(start);
        };
    }

Paul's avatar
Paul committed
218
    std::unordered_set<std::size_t> get_streams(instruction_ref ins) const
Paul's avatar
Paul committed
219
    {
Paul's avatar
Paul committed
220
        if(has_stream(ins))
Paul's avatar
Paul committed
221
222
223
224
225
226
227
228
229
            return {get_stream(ins)};
        std::unordered_set<std::size_t> result;
        get_streams_from(ins, get_inputs())([&](auto s) {
            result.insert(s);
            return true;
        });
        return result;
    }

Paul's avatar
Paul committed
230
231
232
    template <class... Ts>
    bool is_merge_point(instruction_ref ins, Ts... xs) const
    {
Paul's avatar
Paul committed
233
        return different(get_streams_from(ins, get_inputs()), xs...);
Paul's avatar
Paul committed
234
    }
Paul's avatar
Paul committed
235

Paul's avatar
Paul committed
236
237
238
    template <class... Ts>
    bool is_split_point(instruction_ref ins, Ts... xs) const
    {
Paul's avatar
Paul committed
239
        return different(get_streams_from(ins, get_outputs()), xs...);
Paul's avatar
Paul committed
240
    }
241

Paul's avatar
Paul committed
242
243
244
245
246
247
248
249
250
251
252
253
254
    std::vector<instruction_ref> get_recorded_instructions(instruction_ref start)
    {
        std::vector<instruction_ref> result;
        std::unordered_map<std::size_t, instruction_ref> m;
        fix([&](auto self, auto ins) {
            for(auto i : ins->inputs())
            {
                if(iweights.at(i) == 0)
                {
                    self(i);
                    continue;
                }
                auto stream = get_stream(i);
Paul's avatar
Paul committed
255
                if(not contains(m, stream))
Paul's avatar
Paul committed
256
257
                    m[stream] = i;
                else
Paul's avatar
Paul committed
258
259
260
                    m[stream] = std::min(m[stream], i, by(std::less<>{}, [&](auto x) {
                                             return std::distance(x, start);
                                         }));
Paul's avatar
Paul committed
261
262
            }
        })(start);
Paul's avatar
Paul committed
263
264
        std::transform(
            m.begin(), m.end(), std::back_inserter(result), [](auto&& p) { return p.second; });
Paul's avatar
Paul committed
265
266
267
        return result;
    }

Paul's avatar
Paul committed
268
269
    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
    find_concurrent_instructions(program& p)
Paul's avatar
Paul committed
270
    {
Paul's avatar
Paul committed
271
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
Paul's avatar
Paul committed
272
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
Paul's avatar
Paul committed
273
274
        result.reserve(p.size());
        merge_from.reserve(p.size());
Paul's avatar
Paul committed
275
        for(auto ins : reverse_iterator_for(p))
Paul's avatar
Paul committed
276
        {
Paul's avatar
Paul committed
277
            for(auto&& arg : ins->outputs())
Paul's avatar
Paul committed
278
            {
Paul's avatar
Paul committed
279
280
281
                if(is_merge_point(arg))
                    merge_from[ins].insert(arg);
                merge_from[ins].insert(merge_from[arg].begin(), merge_from[arg].end());
Paul's avatar
Paul committed
282
283
            }

Paul's avatar
Paul committed
284
285
286
287
            auto streams = get_streams(ins);

            // Collect concur instructions for each merge point.
            for(auto& merge : merge_from[ins])
Paul's avatar
Paul committed
288
            {
Paul's avatar
Paul committed
289
                for(auto stream : streams)
Paul's avatar
Paul committed
290
291
292
                {
                    if(result[merge].size() <= stream)
                        result[merge].resize(stream + 1);
Paul's avatar
Paul committed
293
294
                    auto&& r = result[merge][stream];
                    r.push_back(ins);
Paul's avatar
Paul committed
295
                    // Copy inputs if they dont have a stream(and are not a builtin and context
Paul's avatar
Paul committed
296
                    // free). Inputs without a stream can have a implicit dependency
Paul's avatar
Paul committed
297
298
299
300
301
302
303
304
                    std::copy_if(ins->inputs().begin(),
                                 ins->inputs().end(),
                                 std::back_inserter(r),
                                 [&](auto x) {
                                     return not this->has_stream(x) and
                                            not is_context_free(x->get_operator()) and
                                            x->name().front() != '@';
                                 });
Paul's avatar
Paul committed
305
                }
Paul's avatar
Paul committed
306
307
            }
        }
Paul's avatar
Paul committed
308
        return result;
Paul's avatar
Paul committed
309
    }
310
311
};

Paul's avatar
Paul committed
312
313
void schedule::apply(program& p) const
{
314
    stream_info si;
Paul's avatar
Paul committed
315
316
317
    auto last = std::prev(p.end());
    si.accumulate_weights(last, model);
    si.assign_streams(p, model.concurrency());
318

Paul's avatar
Paul committed
319
320
321
322
323
    if(enabled(MIGRAPHX_TRACE_COMPILE{}))
    {
        p.annotate(std::cout, [&](auto ins) {
            std::cout << ":";
            std::cout << " weight=" << si.weights.at(ins);
Paul's avatar
Paul committed
324
            std::cout << " input={";
Paul's avatar
Paul committed
325
            si.get_streams_from(ins, get_inputs())([&](auto s) {
Paul's avatar
Paul committed
326
327
328
329
                std::cout << s << ",";
                return true;
            });
            std::cout << "}";
Paul's avatar
Paul committed
330
            if(si.has_stream(ins))
Paul's avatar
Paul committed
331
332
333
334
335
                std::cout << " stream=" << si.get_stream(ins);
        });
        std::cout << std::endl;
    }

336
    // Schedule instructions
Paul's avatar
Paul committed
337
    std::size_t wait_id = 0;
Paul's avatar
Paul committed
338
    std::unordered_map<instruction_ref, std::size_t> ins2wait;
Paul's avatar
Paul committed
339
340
    std::unordered_map<std::size_t, std::unordered_set<std::size_t>> waited_for;
    std::unordered_map<instruction_ref, std::unordered_set<std::size_t>> ins2waited;
Paul's avatar
Paul committed
341
342
    ins2wait.reserve(p.size());
    ins2waited.reserve(p.size());
Paul's avatar
Paul committed
343
    for(auto ins : iterator_for(p))
344
    {
Paul's avatar
Paul committed
345
        // Only schedule instructions that have a stream
Paul's avatar
Paul committed
346
        if(not si.has_stream(ins))
Paul's avatar
Paul committed
347
            continue;
348
        assert(si.weights[ins] > 0);
Paul's avatar
Paul committed
349
        // Schedule instruction on the stream
Paul's avatar
Paul committed
350
        auto stream = si.get_stream(ins);
Paul's avatar
Paul committed
351
        assert(stream < model.concurrency());
Paul's avatar
Paul committed
352
353
        model.sched(p, ins, stream);
        // Insert wait instructions
Paul's avatar
Paul committed
354
        if(si.is_merge_point(ins, stream))
Paul's avatar
Paul committed
355
        {
Paul's avatar
Paul committed
356
            for(auto i : si.get_recorded_instructions(ins))
Paul's avatar
Paul committed
357
            {
Paul's avatar
Paul committed
358
                if(not si.has_stream(i))
Paul's avatar
Paul committed
359
                    continue;
Paul's avatar
Paul committed
360
361
                auto istream = si.get_stream(i);
                if(stream == istream)
Paul's avatar
Paul committed
362
363
                    continue;
                // Create a new event if it hasn't been recorded
Paul's avatar
Paul committed
364
                if(not contains(ins2wait, i))
Paul's avatar
Paul committed
365
366
367
368
369
                {
                    ins2wait[i] = wait_id;
                    model.record(p, i, wait_id);
                    wait_id++;
                }
Paul's avatar
Paul committed
370
371
372
                auto w = ins2wait.at(i);
                // If we already waited for the event on this stream then dont
                // insert another wait event
Paul's avatar
Paul committed
373
                if(not contains(waited_for[stream], w))
Paul's avatar
Paul committed
374
375
376
377
378
                    model.wait(p, ins, w);
                // Store the event as waited
                waited_for[stream].insert(w);
                // Store all wait events that have been waited on prior to the recorded instruction
                waited_for[stream].insert(ins2waited[i].begin(), ins2waited[i].end());
Paul's avatar
Paul committed
379
            }
Paul's avatar
Paul committed
380
        }
Paul's avatar
Paul committed
381
382
383
384
385
        // Store wait events that have already been waited on
        if(si.is_split_point(ins, stream))
        {
            ins2waited[ins] = waited_for[stream];
        }
386
    }
Paul's avatar
Paul committed
387

Paul's avatar
Paul committed
388
389
    // Add memory conflicts
    auto concur_ins = si.find_concurrent_instructions(p);
Paul's avatar
Paul committed
390
    for(auto&& merge : concur_ins)
Paul's avatar
Paul committed
391
    {
Paul's avatar
Paul committed
392
        dfor(merge.second.size(), merge.second.size())([&](auto i, auto j) {
Paul's avatar
Paul committed
393
            if(i == j)
Paul's avatar
Paul committed
394
                return;
Paul's avatar
Paul committed
395
396
397
398
            if (merge.second[i].empty())
                return;
            if (merge.second[j].empty())
                return;
Paul's avatar
Paul committed
399
            for(auto ins1 : merge.second[i])
Paul's avatar
Paul committed
400
            {
Paul's avatar
Paul committed
401
                auto args = merge.second[j];
Paul's avatar
Paul committed
402
                args.insert(args.begin(), ins1);
Paul's avatar
Paul committed
403
                p.insert_instruction(merge.first, op::identity{}, args);
Paul's avatar
Paul committed
404
405
406
            }
        });
    }
Paul's avatar
Paul committed
407
408
409
410
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx