schedule.cpp 11.6 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
Paul's avatar
Paul committed
4
#include <migraphx/operators.hpp>
Paul's avatar
Paul committed
5
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
6
#include <migraphx/dfor.hpp>
Paul's avatar
Paul committed
7
8
9
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
Paul's avatar
Paul committed
10
#include <unordered_set>
11
#include <set>
Paul's avatar
Paul committed
12
#include <deque>
Paul's avatar
Paul committed
13
14
15
16

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Paul's avatar
Paul committed
17
18
19
20
21
22
23
24
25
26
auto get_inputs()
{
    return [](auto i) { return i->inputs(); };
}

auto get_outputs()
{
    return [](auto i) { return i->outputs(); };
}

27
28
29
struct stream_info
{
    std::unordered_map<instruction_ref, std::size_t> ins2stream;
Paul's avatar
Paul committed
30
    std::unordered_map<instruction_ref, std::size_t> weights;
31
    std::unordered_map<instruction_ref, std::size_t> iweights;
Paul's avatar
Paul committed
32
33
34
35
36
37

    void accumulate_weights(instruction_ref last, const schedule_model& model)
    {
        fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
            if(weights.count(ins) == 0)
            {
Paul's avatar
Paul committed
38
                std::size_t weight = 0;
Paul's avatar
Paul committed
39
                auto&& op          = ins->get_operator();
Paul's avatar
Paul committed
40
41
                if(not is_context_free(op) and op.name()[0] != '@')
                    weight = model.weight(op);
42
                iweights[ins] = weight;
Paul's avatar
Paul committed
43
44
45
                weights[ins] =
                    std::accumulate(ins->inputs().begin(),
                                    ins->inputs().end(),
Paul's avatar
Paul committed
46
                                    weight,
Paul's avatar
Paul committed
47
48
49
50
51
52
                                    [&](std::size_t w, instruction_ref i) { return w + self(i); });
            }
            return weights[ins];
        })(last);
    }

Paul's avatar
Paul committed
53
    struct partition
Paul's avatar
Paul committed
54
    {
Paul's avatar
Paul committed
55
56
57
58
        std::size_t weight = 0;
        std::vector<instruction_ref> instructions{};

        void add(instruction_ref ins, std::size_t w)
Paul's avatar
Paul committed
59
        {
Paul's avatar
Paul committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
            weight += w;
            instructions.push_back(ins);
        }
    };

    void assign_streams(program& p, std::size_t n)
    {
        const std::size_t min_partition_threshold = 2;
        partition critical;
        std::unordered_map<instruction_ref, std::deque<partition>> partitions;
        fix([&](auto self, auto ins, auto& part) {
            // If weight is zero then stop
            if(this->weights[ins] == 0)
                return;
            part.add(ins, this->iweights[ins]);

Paul's avatar
Paul committed
76
77
78
            auto max_it = std::max_element(ins->inputs().begin(),
                                           ins->inputs().end(),
                                           by(std::less<>{}, index_of(this->weights)));
Paul's avatar
Paul committed
79
80
81
            for(auto i : ins->inputs())
            {
                const auto weight = this->weights[i];
Paul's avatar
Paul committed
82
                if(i == *max_it or weight <= min_partition_threshold)
Paul's avatar
Paul committed
83
                {
Paul's avatar
Paul committed
84
                    self(i, part);
Paul's avatar
Paul committed
85
                }
Paul's avatar
Paul committed
86
87
88
89
90
91
92
93
94
95
                else
                {
                    partitions[ins].emplace_back();
                    self(i, partitions[ins].back());
                }
            }
        })(std::prev(p.end()), critical);

        // Set the critical partition to stream 0
        set_stream(critical, 0);
Paul's avatar
Paul committed
96
        std::vector<std::size_t> streams(n - 1);
Paul's avatar
Paul committed
97
        // Assign streams for the other partitions
Paul's avatar
Paul committed
98
        for(auto&& ins_part : partitions)
Paul's avatar
Paul committed
99
        {
Paul's avatar
Paul committed
100
101
102
103
104
            std::sort(
                ins_part.second.begin(), ins_part.second.end(), by(std::greater<>{}, [](auto&& x) {
                    return std::make_tuple(x.weight, x.instructions.size());
                }));
            for(auto&& part : ins_part.second)
Paul's avatar
Paul committed
105
106
            {
                auto stream = std::min_element(streams.begin(), streams.end()) - streams.begin();
Paul's avatar
Paul committed
107
                set_stream(part, stream + 1);
Paul's avatar
Paul committed
108
109
                streams[stream] += part.weight;
            }
Paul's avatar
Paul committed
110
111
        }
    }
112

Paul's avatar
Paul committed
113
114
    void set_stream(const partition& p, std::size_t n)
    {
Paul's avatar
Paul committed
115
116
        for(auto ins : p.instructions)
            if(iweights[ins] > 0)
Paul's avatar
Paul committed
117
118
119
                set_stream(ins, n);
    }

Paul's avatar
Paul committed
120
    void set_stream(instruction_ref ins, std::size_t n)
121
    {
Paul's avatar
Paul committed
122
123
        assert(iweights[ins] > 0);
        ins2stream[ins] = n;
124
    }
125

Paul's avatar
Paul committed
126
    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
127

Paul's avatar
Paul committed
128
    bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
129

Paul's avatar
Paul committed
130
    bool different(const std::vector<std::size_t>& v) const
131
    {
Paul's avatar
Paul committed
132
        if(v.size() < 2)
133
            return false;
Paul's avatar
Paul committed
134
        return not std::all_of(v.begin(), v.end(), [&](std::size_t x) { return x == v.front(); });
135
136
    }

Paul's avatar
Paul committed
137
    template <class F>
Paul's avatar
Paul committed
138
    bool different(F f, std::size_t stream) const
Paul's avatar
Paul committed
139
    {
Paul's avatar
Paul committed
140
        bool result = false;
Paul's avatar
Paul committed
141
        f([&](auto s) {
Paul's avatar
Paul committed
142
            if(s != stream)
Paul's avatar
Paul committed
143
            {
Paul's avatar
Paul committed
144
145
                result = true;
                return false;
Paul's avatar
Paul committed
146
            }
Paul's avatar
Paul committed
147
148
149
            stream = s;
            return true;
        });
Paul's avatar
Paul committed
150
151
        return result;
    }
152

Paul's avatar
Paul committed
153
154
155
156
157
158
159
160
161
162
163
    template <class F>
    bool different(F f) const
    {
        bool result = false;
        f([&](auto s) {
            result = different(f, s);
            return false;
        });
        return result;
    }

Paul's avatar
Paul committed
164
165
166
167
168
169
170
    template <class Selector>
    auto get_streams(instruction_ref start, Selector select) const
    {
        return [=](auto f) {
            return fix<bool>([&](auto self, auto ins) {
                for(auto i : select(ins))
                {
171
                    if(iweights.at(i) == 0)
Paul's avatar
Paul committed
172
                    {
Paul's avatar
Paul committed
173
                        if(not self(i))
Paul's avatar
Paul committed
174
175
176
177
                            return false;
                    }
                    else
                    {
Paul's avatar
Paul committed
178
                        if(not f(get_stream(i)))
Paul's avatar
Paul committed
179
180
181
182
183
184
185
186
                            return false;
                    }
                }
                return true;
            })(start);
        };
    }

Paul's avatar
Paul committed
187
188
189
190
191
    template <class... Ts>
    bool is_merge_point(instruction_ref ins, Ts... xs) const
    {
        return different(get_streams(ins, get_inputs()), xs...);
    }
Paul's avatar
Paul committed
192

Paul's avatar
Paul committed
193
194
195
196
197
    template <class... Ts>
    bool is_split_point(instruction_ref ins, Ts... xs) const
    {
        return different(get_streams(ins, get_outputs()), xs...);
    }
198

Paul's avatar
Paul committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    std::vector<instruction_ref> get_recorded_instructions(instruction_ref start)
    {
        std::vector<instruction_ref> result;
        std::unordered_map<std::size_t, instruction_ref> m;
        fix([&](auto self, auto ins) {
            for(auto i : ins->inputs())
            {
                if(iweights.at(i) == 0)
                {
                    self(i);
                    continue;
                }
                auto stream = get_stream(i);
                if (m.count(stream) == 0)
                    m[stream] = i;
                else
                    m[stream] = std::min(m[stream], i, by(std::less<>{}, [&](auto x) { return std::distance(x, start); }));
            }
        })(start);
        std::transform(m.begin(), m.end(), std::back_inserter(result), [](auto&& p) { return p.second; });
        return result;
    }

222
223
    std::vector<std::size_t> wait_for(instruction_ref ins) const
    {
Paul's avatar
Paul committed
224
        std::vector<std::size_t> result;
Paul's avatar
Paul committed
225
        get_streams(ins, get_inputs())([&](auto s) {
Paul's avatar
Paul committed
226
227
228
229
            result.push_back(s);
            return true;
        });
        // Remove duplicates
Paul's avatar
Paul committed
230
231
        std::sort(result.begin(), result.end());
        result.erase(std::unique(result.begin(), result.end()), result.end());
Paul's avatar
Paul committed
232
        // Remove the merged stream
Paul's avatar
Paul committed
233
        auto it = std::find(result.begin(), result.end(), get_stream(ins));
Paul's avatar
Paul committed
234
        if(it != result.end())
Paul's avatar
Paul committed
235
            result.erase(it);
Paul's avatar
Paul committed
236
        return result;
237
    }
Paul's avatar
Paul committed
238

Paul's avatar
Paul committed
239
240
    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
    find_concurrent_instructions(program& p)
Paul's avatar
Paul committed
241
    {
Paul's avatar
Paul committed
242
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
Paul's avatar
Paul committed
243
244
245
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> split_from;
        for(auto ins : iterator_for(p))
        {
246
            if(iweights[ins] == 0)
Paul's avatar
Paul committed
247
248
249
                continue;
            for(auto&& arg : ins->inputs())
            {
Paul's avatar
Paul committed
250
                if(is_split_point(arg))
Paul's avatar
Paul committed
251
252
253
254
                    split_from[ins].insert(arg);
                split_from[ins].insert(split_from[arg].begin(), split_from[arg].end());
            }

Paul's avatar
Paul committed
255
            auto stream = get_stream(ins);
Paul's avatar
Paul committed
256
257
258
259
260
261
262
263
264
265
            // if (is_merge_point(ins))
            // {
            //     // post-dominator kills split point.
            //     for(auto& split : split_from[ins])
            //     {
            //         if(strictly_post_dominates(ins, split))
            //             split_from[ins].erase(split);
            //     }
            // }

Paul's avatar
Paul committed
266
267
268
            // Collect concur instructions for each split point.
            for(auto& split : split_from[ins])
            {
Paul's avatar
Paul committed
269
270
                if(result[split].size() <= stream)
                    result[split].resize(stream + 1);
Paul's avatar
Paul committed
271
                result[split][stream].push_back(ins);
Paul's avatar
Paul committed
272
273
            }
        }
Paul's avatar
Paul committed
274
        return result;
Paul's avatar
Paul committed
275
    }
276
277
};

Paul's avatar
Paul committed
278
279
void schedule::apply(program& p) const
{
280
    stream_info si;
Paul's avatar
Paul committed
281
282
283
    auto last = std::prev(p.end());
    si.accumulate_weights(last, model);
    si.assign_streams(p, model.concurrency());
284

Paul's avatar
Paul committed
285
286
    // Topo sort
    fix([&](auto self, auto ins) {
Paul's avatar
Paul committed
287
        auto args = ins->inputs();
Paul's avatar
Paul committed
288
        std::sort(args.begin(), args.end(), by(std::less<>{}, [&](auto x) {
Paul's avatar
Paul committed
289
290
                      return std::make_tuple(si.weights[x], x->inputs().size());
                  }));
Paul's avatar
Paul committed
291
        for(auto i : args)
292
        {
Paul's avatar
Paul committed
293
294
            p.move_instruction(i, p.begin());
            self(i);
295
        }
Paul's avatar
Paul committed
296
    })(last);
Paul's avatar
Paul committed
297

Paul's avatar
Paul committed
298
299
300
301
302
    if(enabled(MIGRAPHX_TRACE_COMPILE{}))
    {
        p.annotate(std::cout, [&](auto ins) {
            std::cout << ":";
            std::cout << " weight=" << si.weights.at(ins);
Paul's avatar
Paul committed
303
304
305
306
307
308
            std::cout << " input={";
            si.get_streams(ins, get_inputs())([&](auto s) {
                std::cout << s << ",";
                return true;
            });
            std::cout << "}";
Paul's avatar
Paul committed
309
            if(si.has_stream(ins))
Paul's avatar
Paul committed
310
311
312
313
314
                std::cout << " stream=" << si.get_stream(ins);
        });
        std::cout << std::endl;
    }

315
    // Schedule instructions
Paul's avatar
Paul committed
316
317
    std::unordered_map<instruction_ref, std::size_t> ins2wait;
    std::size_t wait_id = 0;
Paul's avatar
Paul committed
318
    for(auto ins : iterator_for(p))
319
    {
Paul's avatar
Paul committed
320
        // Only schedule instructions that have a stream
Paul's avatar
Paul committed
321
        if(not si.has_stream(ins))
Paul's avatar
Paul committed
322
            continue;
323
        assert(si.weights[ins] > 0);
Paul's avatar
Paul committed
324
        // Schedule instruction on the stream
Paul's avatar
Paul committed
325
        auto stream = si.get_stream(ins);
Paul's avatar
Paul committed
326
        assert(stream < model.concurrency());
Paul's avatar
Paul committed
327
328
        model.sched(p, ins, stream);
        // Insert wait instructions
Paul's avatar
Paul committed
329
        if(si.is_merge_point(ins, stream))
Paul's avatar
Paul committed
330
        {
Paul's avatar
Paul committed
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
            for(auto i:si.get_recorded_instructions(ins))
            {
                if (not si.has_stream(i))
                    continue;
                if (stream == si.get_stream(i))
                    continue;
                // Create a new event if it hasn't been recorded
                if(ins2wait.count(i) == 0)
                {
                    ins2wait[i] = wait_id;
                    model.record(p, i, wait_id);
                    wait_id++;
                }
                model.wait(p, ins, ins2wait.at(i));
            }
Paul's avatar
Paul committed
346
        }
347
    }
Paul's avatar
Paul committed
348

Paul's avatar
Paul committed
349
350
    // Add memory conflicts
    auto concur_ins = si.find_concurrent_instructions(p);
Paul's avatar
Paul committed
351
    for(auto&& split : concur_ins)
Paul's avatar
Paul committed
352
353
    {
        dfor(split.second.size(), split.second.size())([&](auto i, auto j) {
Paul's avatar
Paul committed
354
            if(i == j)
Paul's avatar
Paul committed
355
                return;
Paul's avatar
Paul committed
356
357
            for(auto ins1 : split.second[i])
            {
Paul's avatar
Paul committed
358
                auto args = split.second[j];
Paul's avatar
Paul committed
359
                args.insert(args.begin(), ins1);
Paul's avatar
Paul committed
360
361
362
363
364

                auto point = std::max_element(args.begin(), args.end(), [&](auto x, auto y) {
                    return std::distance(split.first, x) < std::distance(split.first, y);
                });
                p.insert_instruction(std::next(*point), op::identity{}, args);
Paul's avatar
Paul committed
365
366
367
            }
        });
    }
Paul's avatar
Paul committed
368
369
370
371
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx