Commit da29ed61 authored by Paul's avatar Paul
Browse files

Add some comments

parent acbb2e88
...@@ -81,17 +81,20 @@ struct compile_plan ...@@ -81,17 +81,20 @@ struct compile_plan
context* ctx; context* ctx;
operation preop; operation preop;
instruction_ref ins; instruction_ref ins;
optional<tuning_config> config = nullopt; optional<tuning_config> config = nullopt;
std::vector<compiled_result> results = {}; std::vector<compiled_result> results = {};
void update_config() { config = get_tuning_config(*ctx, ins, preop); } void update_config()
template <class Vector> {
config = get_tuning_config(*ctx, ins, preop);
}
template<class Vector>
void add_compiles(Vector& compiles) void add_compiles(Vector& compiles)
{ {
if(config.has_value()) if (config.has_value())
{ {
const auto& solutions = config.value().solutions; const auto& solutions = config.value().solutions;
results.resize(solutions.size()); results.resize(solutions.size());
for(auto i : range(solutions.size())) for(auto i:range(solutions.size()))
{ {
auto solution = solutions[i]; auto solution = solutions[i];
compiles.emplace_back([=] { compiles.emplace_back([=] {
...@@ -131,6 +134,7 @@ void par_compile(std::size_t n, F f) ...@@ -131,6 +134,7 @@ void par_compile(std::size_t n, F f)
void compile_ops::apply(module& m) const void compile_ops::apply(module& m) const
{ {
std::vector<compile_plan> cps; std::vector<compile_plan> cps;
// Find all precompile opes
for(auto ins : iterator_for(m)) for(auto ins : iterator_for(m))
{ {
if(ins->name() != "gpu::precompile_op") if(ins->name() != "gpu::precompile_op")
...@@ -138,15 +142,20 @@ void compile_ops::apply(module& m) const ...@@ -138,15 +142,20 @@ void compile_ops::apply(module& m) const
operation preop = any_cast<precompile_op>(ins->get_operator()).op; operation preop = any_cast<precompile_op>(ins->get_operator()).op;
cps.push_back({ctx, preop, ins}); cps.push_back({ctx, preop, ins});
} }
par_compile(cps.size(), [&](auto i) { cps[i].update_config(); }); // Get the tuning configs for all ops
par_compile(cps.size(), [&](auto i) {
cps[i].update_config();
});
// Compile everything in parallel
std::vector<std::function<void()>> compiles; std::vector<std::function<void()>> compiles;
for(auto& cp : cps) for(auto& cp:cps)
{ {
cp.add_compiles(compiles); cp.add_compiles(compiles);
} }
par_compile(compiles.size(), [&](auto i) { compiles[i](); }); par_compile(compiles.size(), [&](auto i) { compiles[i](); });
for(const auto& cp : cps) // Replace and/or benchmark
for(const auto& cp:cps)
{ {
cp.replace(m); cp.replace(m);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment