schedule.cpp 8.4 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
Paul's avatar
Paul committed
4
#include <migraphx/operators.hpp>
Paul's avatar
Paul committed
5
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
6
#include <migraphx/dfor.hpp>
Paul's avatar
Paul committed
7
8
9
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
Paul's avatar
Paul committed
10
#include <unordered_set>
11
#include <set>
Paul's avatar
Paul committed
12
13
14
15

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Paul's avatar
Paul committed
16
17
18
19
20
bool stream_free(instruction_ref ins)
{
    return is_context_free(ins->get_operator()) or ins->get_operator().name().front() == '@';
}

21
22
23
struct stream_info
{
    std::unordered_map<instruction_ref, std::size_t> ins2stream;
Paul's avatar
Paul committed
24
25
26
27
28
29
30
    std::unordered_map<instruction_ref, std::size_t> weights;

    void accumulate_weights(instruction_ref last, const schedule_model& model)
    {
        fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
            if(weights.count(ins) == 0)
            {
Paul's avatar
Paul committed
31
32
33
34
                std::size_t weight = 0;
                auto&& op = ins->get_operator();
                if(not is_context_free(op) and op.name()[0] != '@')
                    weight = model.weight(op);
Paul's avatar
Paul committed
35
36
37
                weights[ins] =
                    std::accumulate(ins->inputs().begin(),
                                    ins->inputs().end(),
Paul's avatar
Paul committed
38
                                    weight,
Paul's avatar
Paul committed
39
40
41
42
43
44
45
46
47
48
49
50
                                    [&](std::size_t w, instruction_ref i) { return w + self(i); });
            }
            return weights[ins];
        })(last);
    }

    void assign_streams(program& p, std::size_t streams)
    {
        const std::size_t min_partition_threshold = 2;
        for(std::size_t stream = 0; stream < streams; stream++)
        {
            fix([&](auto self, auto ins) {
Paul's avatar
Paul committed
51
                // If weight is zero then stop
Paul's avatar
Paul committed
52
                if(weights[ins] == 0)
Paul's avatar
Paul committed
53
54
                    return;
                // Only assign streams if not already assigned
Paul's avatar
Paul committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
                if(not has_stream(ins))
                    set_stream(ins, stream);
                instruction_ref child = p.end();
                std::size_t w         = 0;
                for(auto i : ins->inputs())
                {
                    const auto weight = weights[i];
                    // Skip instruction that already have stream assignment or too low of weights
                    if(has_stream(i) or weight <= min_partition_threshold)
                    {
                        self(i);
                    }
                    // Accumulate the max weight
                    else if(weight > w)
                    {
                        child = i;
                        w     = weight;
                    }
                }
                if(child != p.end())
                    self(child);
            })(std::prev(p.end()));
        }
        // Assign remaining instructions
        for(auto ins : iterator_for(p))
        {
            if(has_stream(ins))
                continue;
Paul's avatar
Paul committed
83
            if(weights[ins] == 0)
Paul's avatar
Paul committed
84
                continue;
Paul's avatar
Paul committed
85
86
87
            set_stream(ins, streams - 1);
        }
    }
88

Paul's avatar
Paul committed
89
    void set_stream(instruction_ref ins, std::size_t n) { ins2stream[ins] = n; }
90

Paul's avatar
Paul committed
91
    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
92

Paul's avatar
Paul committed
93
    bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
94

Paul's avatar
Paul committed
95
    bool different(const std::vector<std::size_t>& v) const
96
    {
Paul's avatar
Paul committed
97
        if(v.size() < 2)
98
            return false;
Paul's avatar
Paul committed
99
        return not std::all_of(v.begin(), v.end(), [&](std::size_t x) { return x == v.front(); });
100
101
    }

Paul's avatar
Paul committed
102
103
    template<class F>
    bool different(F f) const
Paul's avatar
Paul committed
104
    {
Paul's avatar
Paul committed
105
106
107
108
109
        bool first = true;
        std::size_t stream = 0;
        bool result = false;
        f([&](auto s) {
            if (not first and s != stream) 
Paul's avatar
Paul committed
110
            {
Paul's avatar
Paul committed
111
112
                result = true;
                return false;
Paul's avatar
Paul committed
113
            }
Paul's avatar
Paul committed
114
115
116
117
            stream = s;
            first = false;
            return true;
        });
Paul's avatar
Paul committed
118
119
        return result;
    }
120

Paul's avatar
Paul committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    template <class Selector>
    auto get_streams(instruction_ref start, Selector select) const
    {
        return [=](auto f) {
            return fix<bool>([&](auto self, auto ins) {
                for(auto i : select(ins))
                {
                    if(weights.at(i) == 0)
                    {
                        if (not self(i))
                            return false;
                    }
                    else
                    {
                        if (not f(get_stream(i)))
                            return false;
                    }
                }
                return true;
            })(start);
        };
    }

    auto get_input_streams(instruction_ref ins) const
Paul's avatar
Paul committed
145
    {
Paul's avatar
Paul committed
146
        return get_streams(ins, [](auto i) { return i->inputs(); });
Paul's avatar
Paul committed
147
148
    }

Paul's avatar
Paul committed
149
    auto get_output_streams(instruction_ref ins) const
Paul's avatar
Paul committed
150
    {
Paul's avatar
Paul committed
151
        return get_streams(ins, [](auto i) { return i->outputs(); });
Paul's avatar
Paul committed
152
153
    }

Paul's avatar
Paul committed
154
    bool is_merge_point(instruction_ref ins) const { return different(get_input_streams(ins)); }
Paul's avatar
Paul committed
155

Paul's avatar
Paul committed
156
    bool is_split_point(instruction_ref ins) const { return different(get_output_streams(ins)); }
157
158
159

    std::vector<std::size_t> wait_for(instruction_ref ins) const
    {
Paul's avatar
Paul committed
160
161
162
163
164
165
        std::vector<std::size_t> result;
        get_input_streams(ins)([&](auto s) {
            result.push_back(s);
            return true;
        });
        // Remove duplicates
Paul's avatar
Paul committed
166
167
        std::sort(result.begin(), result.end());
        result.erase(std::unique(result.begin(), result.end()), result.end());
Paul's avatar
Paul committed
168
169
        // Remove the merged stream
        result.erase(std::find(result.begin(), result.end(), get_stream(ins)));
Paul's avatar
Paul committed
170
        return result;
171
    }
Paul's avatar
Paul committed
172

Paul's avatar
Paul committed
173
    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> find_concurrent_instructions(program& p)
Paul's avatar
Paul committed
174
    {
Paul's avatar
Paul committed
175
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
Paul's avatar
Paul committed
176
177
178
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> split_from;
        for(auto ins : iterator_for(p))
        {
Paul's avatar
Paul committed
179
            if(weights[ins] == 0)
Paul's avatar
Paul committed
180
181
182
                continue;
            for(auto&& arg : ins->inputs())
            {
Paul's avatar
Paul committed
183
                if(is_split_point(arg))
Paul's avatar
Paul committed
184
185
186
187
                    split_from[ins].insert(arg);
                split_from[ins].insert(split_from[arg].begin(), split_from[arg].end());
            }

Paul's avatar
Paul committed
188
189
190
191
192
193
194
195
196
197
            // if (is_merge_point(ins))
            // {
            //     // post-dominator kills split point.
            //     for(auto& split : split_from[ins])
            //     {
            //         if(strictly_post_dominates(ins, split))
            //             split_from[ins].erase(split);
            //     }
            // }

Paul's avatar
Paul committed
198
199
200
            // Collect concur instructions for each split point.
            for(auto& split : split_from[ins])
            {
Paul's avatar
Paul committed
201
202
203
204
                auto stream = get_stream(ins);
                if (result[split].size() <= stream)
                    result[split].resize(stream+1);
                result[split][stream].push_back(ins);
Paul's avatar
Paul committed
205
206
            }
        }
Paul's avatar
Paul committed
207
        return result;
Paul's avatar
Paul committed
208
    }
209
210
};

Paul's avatar
Paul committed
211
212
void schedule::apply(program& p) const
{
213
    stream_info si;
Paul's avatar
Paul committed
214
215
216
    auto last = std::prev(p.end());
    si.accumulate_weights(last, model);
    si.assign_streams(p, model.concurrency());
217

Paul's avatar
Paul committed
218
219
    // Topo sort
    fix([&](auto self, auto ins) {
Paul's avatar
Paul committed
220
        for(auto i : ins->inputs())
Paul's avatar
Paul committed
221
            p.move_instruction(i, p.begin());
Paul's avatar
Paul committed
222
        for(auto i : ins->inputs())
Paul's avatar
Paul committed
223
224
            self(i);
    })(last);
Paul's avatar
Paul committed
225

226
    // Schedule instructions
Paul's avatar
Paul committed
227
    for(auto ins : iterator_for(p))
228
    {
Paul's avatar
Paul committed
229
        // Only schedule instructions that have a stream
Paul's avatar
Paul committed
230
        if(not si.has_stream(ins))
Paul's avatar
Paul committed
231
            continue;
Paul's avatar
Paul committed
232
        if(si.is_merge_point(ins))
233
            model.wait(p, ins, si.get_stream(ins), si.wait_for(ins));
Paul's avatar
Paul committed
234
235
        else
            model.schedule_instruction(p, ins, si.get_stream(ins));
236
    }
Paul's avatar
Paul committed
237

Paul's avatar
Paul committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    // Add memory conflicts
    auto concur_ins = si.find_concurrent_instructions(p);
    for(auto&& split:concur_ins)
    {
        dfor(split.second.size(), split.second.size())([&](auto i, auto j) {
            if (i == j)
                return;
            for(auto ins1:split.second[i]) 
            {   
                auto idx1 = std::distance(split.first, ins1);
                for(auto ins2:split.second[j])
                {
                    if (ins1 == ins2)
                        continue;
                    auto idx2 = std::distance(split.first, ins2);
                    auto point = idx1 > idx2 ? ins1 : ins2;
                    p.insert_instruction(std::next(point), op::identity{}, ins1, ins2);
                }
            }
        });
    }
Paul's avatar
Paul committed
259
260
261
262
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx