Unverified Commit add6fb3b authored by kahmed10's avatar kahmed10 Committed by GitHub
Browse files

Create allocate op and replace_allocate pass (#1183)



* add allocate op header

* formatting

* add replace_allocate pass

* formatting

* move output param to remove_allocate pass

* formatting

* fix bugs in replace_allocate pass

* formatting

* fix verify if tests

* formatting

* move if op logic

* formatting

* cleanup lowering

* cleanup lowering

* formatting

* fix tidy

* formatting

* fix tidy

* add cpu allocate check

* formatting

* change cpu allocate in pass

* formatting

* add some tests for replace_allocate pass

* formatting

* pass by ref

* fix run_pass

* formatting

* update variable name for module

* update dce to use contains() and fix tidy

* formatting

* update cppcheck

* add if test

* formatting

* add if test

* rename var to mod_output_names

* formatting

* remove conditional

* update allocate op and tests

* formatting

* update replace_allocate tests

* update create_output_names() and conditional in replace_allocate

* formatting

* remove extra variable in replace_allocate

* update tools script for allocation_model
Co-authored-by: default avatarUmang Yadav <29876643+umangyadav@users.noreply.github.com>
Co-authored-by: default avatarChris Austen <causten@users.noreply.github.com>
Co-authored-by: default avatarPaul Fultz II <pfultz2@yahoo.com>
parent f5980619
...@@ -54,6 +54,7 @@ add_library(migraphx ...@@ -54,6 +54,7 @@ add_library(migraphx
reduce_dims.cpp reduce_dims.cpp
register_op.cpp register_op.cpp
register_target.cpp register_target.cpp
replace_allocate.cpp
simplify_qdq.cpp simplify_qdq.cpp
rewrite_batchnorm.cpp rewrite_batchnorm.cpp
rewrite_pooling.cpp rewrite_pooling.cpp
...@@ -80,6 +81,7 @@ register_migraphx_ops( ...@@ -80,6 +81,7 @@ register_migraphx_ops(
acosh acosh
acos acos
add add
allocate
argmax argmax
argmin argmin
asinh asinh
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <unordered_set> #include <unordered_set>
namespace migraphx { namespace migraphx {
...@@ -24,9 +25,10 @@ void dead_code_elimination::apply(module& m) const ...@@ -24,9 +25,10 @@ void dead_code_elimination::apply(module& m) const
// Skip the last instruction // Skip the last instruction
if(i == last) if(i == last)
break; break;
// Skip instruction with empty shape as output unless its a builtin or undefined or identity // Skip instruction with empty shape as output unless its a builtin, undefined, identity, or
// allocate
if(i->get_shape().elements() == 0 and i->name().front() != '@' and if(i->get_shape().elements() == 0 and i->name().front() != '@' and
i->name() != "undefined" and i->name() != "identity") not contains({"undefined", "identity", "allocate"}, i->name()))
continue; continue;
assert(std::distance(m.begin(), i) <= std::distance(m.begin(), last)); assert(std::distance(m.begin(), i) <= std::distance(m.begin(), last));
std::unordered_set<instruction_ref> visited; std::unordered_set<instruction_ref> visited;
......
...@@ -28,6 +28,8 @@ struct allocation_model ...@@ -28,6 +28,8 @@ struct allocation_model
operation allocate(const shape& s) const; operation allocate(const shape& s) const;
/// Create a preallocated operator for the given shape /// Create a preallocated operator for the given shape
operation preallocate(const shape& s, const std::string& id) const; operation preallocate(const shape& s, const std::string& id) const;
/// Check if outputs are to be inserted
bool needs_out_params() const;
}; };
#else #else
...@@ -45,6 +47,8 @@ struct allocation_model ...@@ -45,6 +47,8 @@ struct allocation_model
operation allocate(const shape& s) const; operation allocate(const shape& s) const;
// //
operation preallocate(const shape& s, std::string id) const; operation preallocate(const shape& s, std::string id) const;
//
bool needs_out_params() const;
}; };
#else #else
...@@ -136,6 +140,12 @@ struct allocation_model ...@@ -136,6 +140,12 @@ struct allocation_model
return (*this).private_detail_te_get_handle().preallocate(s, std::move(id)); return (*this).private_detail_te_get_handle().preallocate(s, std::move(id));
} }
bool needs_out_params() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().needs_out_params();
}
friend bool is_shared(const allocation_model& private_detail_x, friend bool is_shared(const allocation_model& private_detail_x,
const allocation_model& private_detail_y) const allocation_model& private_detail_y)
{ {
...@@ -154,6 +164,7 @@ struct allocation_model ...@@ -154,6 +164,7 @@ struct allocation_model
virtual std::string copy() const = 0; virtual std::string copy() const = 0;
virtual operation allocate(const shape& s) const = 0; virtual operation allocate(const shape& s) const = 0;
virtual operation preallocate(const shape& s, std::string id) const = 0; virtual operation preallocate(const shape& s, std::string id) const = 0;
virtual bool needs_out_params() const = 0;
}; };
template <typename PrivateDetailTypeErasedT> template <typename PrivateDetailTypeErasedT>
...@@ -200,6 +211,12 @@ struct allocation_model ...@@ -200,6 +211,12 @@ struct allocation_model
return private_detail_te_value.preallocate(s, std::move(id)); return private_detail_te_value.preallocate(s, std::move(id));
} }
bool needs_out_params() const override
{
return private_detail_te_value.needs_out_params();
}
PrivateDetailTypeErasedT private_detail_te_value; PrivateDetailTypeErasedT private_detail_te_value;
}; };
......
#ifndef MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#define MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
#include <migraphx/shape.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct allocate
{
shape s{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.s, "shape"));
}
std::string name() const { return "allocate"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
migraphx::check_shapes{inputs, *this}.has(0);
return s;
}
argument compute(const shape& output_shape, const std::vector<argument>&) const
{
return {output_shape};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#define MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#include <migraphx/config.hpp>
#include <migraphx/allocation_model.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
struct replace_allocate
{
allocation_model model;
bool offload_copy = false;
std::string name() const { return "replace_allocate"; }
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/allocate.hpp>
#include <map>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
std::unordered_map<instruction_ref, std::string> create_output_names(const module& mod)
{
std::unordered_map<instruction_ref, std::string> mod_output_names{};
auto last = std::prev(mod.end());
if(last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
mod_output_names[ins] = mod.name() + ":#output_" + std::to_string(index++);
}
}
else
{
auto ins = instruction::get_output_alias(last);
mod_output_names[ins] = "output";
}
return mod_output_names;
}
void insert_submod_allocations(instruction_ref ins, module& mod, const allocation_model& model)
{
std::vector<instruction_ref> inputs = ins->inputs();
std::vector<module_ref> mod_args = ins->module_inputs();
std::map<std::string, shape> name_shapes;
for(const auto& smod : mod_args)
{
auto ps = smod->get_parameter_shapes();
name_shapes.insert(ps.begin(), ps.end());
}
for(auto& pn : name_shapes)
{
const auto& s = pn.second;
instruction_ref output{};
output = mod.insert_instruction(ins, model.allocate(s));
inputs.push_back(output);
}
mod.replace_instruction(ins, ins->get_operator(), inputs, mod_args);
}
void replace_allocate::apply(module& m) const
{
auto mod_output_names = create_output_names(m);
bool main_offload_copy = m.name() == "main" ? this->offload_copy : false;
for(auto ins : iterator_for(m))
{
auto op = ins->get_operator();
auto op_name = op.name();
// check if allocations from submodules need to be inserted
// for now, only the "if" operator is affected
if(op_name == "if")
{
insert_submod_allocations(ins, m, model);
continue;
}
if(op_name != "allocate")
continue;
auto s = ins->get_shape();
if(not main_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
{
auto out_param = m.add_parameter(mod_output_names[ins], s);
m.replace_instruction(ins, out_param);
continue;
}
m.replace_instruction(
ins,
m.insert_instruction(ins,
make_op(model.name(), migraphx::value{{"shape", to_value(s)}})));
}
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -15,6 +15,7 @@ struct cpu_allocation_model ...@@ -15,6 +15,7 @@ struct cpu_allocation_model
std::string copy() const; std::string copy() const;
operation allocate(const shape& s) const; operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const; operation preallocate(const shape& s, const std::string& id) const;
bool needs_out_params() const { return false; }
}; };
} // namespace cpu } // namespace cpu
......
...@@ -291,30 +291,8 @@ struct cpu_apply ...@@ -291,30 +291,8 @@ struct cpu_apply
{ {
module* modl; module* modl;
std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{}; std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
std::unordered_map<instruction_ref, std::string> prog_output_names{};
instruction_ref last{}; instruction_ref last{};
void create_output_names()
{
this->last = instruction::get_output_alias(std::prev(modl->end()));
if(this->last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
prog_output_names[ins] = modl->name() + ":#output_" + std::to_string(index++);
}
}
}
void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true) void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
{ {
apply_map.emplace(op_name, [=](instruction_ref ins) { apply_map.emplace(op_name, [=](instruction_ref ins) {
...@@ -360,7 +338,6 @@ struct cpu_apply ...@@ -360,7 +338,6 @@ struct cpu_apply
void init() void init()
{ {
create_output_names();
extend_dnnl_algos("dnnl::binary", extend_dnnl_algos("dnnl::binary",
{ {
{"add", "binary_add"}, {"add", "binary_add"},
...@@ -490,7 +467,7 @@ struct cpu_apply ...@@ -490,7 +467,7 @@ struct cpu_apply
instruction_ref insert_allocation(instruction_ref ins, const shape& s) const instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{ {
return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}})); return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
} }
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <migraphx/memory_coloring.hpp> #include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
...@@ -70,6 +71,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -70,6 +71,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering{}, lowering{},
eliminate_contiguous{"dnnl::reorder"}, eliminate_contiguous{"dnnl::reorder"},
dead_code_elimination{}, dead_code_elimination{},
replace_allocate{cpu_allocation_model{}},
dead_code_elimination{},
adjust_allocation{cpu_allocation_model{}}, adjust_allocation{cpu_allocation_model{}},
dead_code_elimination{}, dead_code_elimination{},
fuse_ops{&ctx}, fuse_ops{&ctx},
......
...@@ -16,6 +16,7 @@ struct gpu_allocation_model ...@@ -16,6 +16,7 @@ struct gpu_allocation_model
std::string copy() const; std::string copy() const;
operation allocate(const shape& s) const; operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const; operation preallocate(const shape& s, const std::string& id) const;
bool needs_out_params() const { return true; }
}; };
} // namespace gpu } // namespace gpu
......
...@@ -58,7 +58,6 @@ struct miopen_apply ...@@ -58,7 +58,6 @@ struct miopen_apply
const lowering* pass = nullptr; const lowering* pass = nullptr;
std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{}; std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
instruction_ref last{}; instruction_ref last{};
std::unordered_map<instruction_ref, std::string> prog_output_names{};
bool offload_copy = false; bool offload_copy = false;
bool int8_x4_format = true; bool int8_x4_format = true;
bool compute_fp32 = false; bool compute_fp32 = false;
...@@ -77,27 +76,6 @@ struct miopen_apply ...@@ -77,27 +76,6 @@ struct miopen_apply
(void)i; (void)i;
} }
void create_output_names()
{
this->last = instruction::get_output_alias(std::prev(mod->end()));
if(this->last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
prog_output_names[ins] = mod->name() + ":#output_" + std::to_string(index++);
}
}
}
const std::unordered_set<std::string>& get_rocblas_fp32_archs() const std::unordered_set<std::string>& get_rocblas_fp32_archs()
{ {
static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"}; static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
...@@ -120,7 +98,6 @@ struct miopen_apply ...@@ -120,7 +98,6 @@ struct miopen_apply
#endif #endif
offload_copy = (mod->name() == "main") ? pass->offload_copy : false; offload_copy = (mod->name() == "main") ? pass->offload_copy : false;
create_output_names();
add_generic_op("acos"); add_generic_op("acos");
add_generic_op("acosh"); add_generic_op("acosh");
...@@ -201,7 +178,7 @@ struct miopen_apply ...@@ -201,7 +178,7 @@ struct miopen_apply
add_quant_convolution_op(); add_quant_convolution_op();
} }
void copy_params() void copy_params() const
{ {
if(not offload_copy) if(not offload_copy)
return; return;
...@@ -261,7 +238,7 @@ struct miopen_apply ...@@ -261,7 +238,7 @@ struct miopen_apply
copy_params(); copy_params();
} }
instruction_ref insert_precompile_op(instruction_ref ins) instruction_ref insert_precompile_op(instruction_ref ins) const
{ {
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
std::vector<instruction_ref> refs = ins->inputs(); std::vector<instruction_ref> refs = ins->inputs();
...@@ -274,28 +251,9 @@ struct miopen_apply ...@@ -274,28 +251,9 @@ struct miopen_apply
ins->module_inputs()); ins->module_inputs());
} }
instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "") instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{ {
// Instruction's output is an input of the ret instruction return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
if(offload_copy)
{
auto result = mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}, {"tag", std::move(tag)}}));
return result;
}
auto ins_alias = instruction::get_output_alias(ins);
if(last->name() == "@return" and tag.empty() and prog_output_names.count(ins_alias) > 0)
{
return mod->add_parameter(prog_output_names[ins_alias], s);
}
else if(ins == last and tag.empty())
{
return mod->add_parameter("output", s);
}
return mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}, {"tag", std::move(tag)}}));
} }
void add_convolution_op() void add_convolution_op()
...@@ -306,7 +264,7 @@ struct miopen_apply ...@@ -306,7 +264,7 @@ struct miopen_apply
auto conv = miopen_convolution{op, make_conv(op)}; auto conv = miopen_convolution{op, make_conv(op)};
auto ws = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs())); auto ws = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws, "workspace"); auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction( return mod->replace_instruction(
...@@ -322,7 +280,7 @@ struct miopen_apply ...@@ -322,7 +280,7 @@ struct miopen_apply
auto conv = miopen_deconvolution{op, make_deconv(op)}; auto conv = miopen_deconvolution{op, make_deconv(op)};
auto ws = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs())); auto ws = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws, "workspace"); auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction( return mod->replace_instruction(
...@@ -383,7 +341,7 @@ struct miopen_apply ...@@ -383,7 +341,7 @@ struct miopen_apply
} }
auto args = ins->inputs(); auto args = ins->inputs();
auto workspace = insert_allocation(ins, ws, "workspace"); auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(ins, conv, args[0], args[1], workspace, output); return mod->replace_instruction(ins, conv, args[0], args[1], workspace, output);
...@@ -480,33 +438,7 @@ struct miopen_apply ...@@ -480,33 +438,7 @@ struct miopen_apply
auto sync_cond = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_cond); auto sync_cond = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_cond);
inputs.front() = sync_cond; inputs.front() = sync_cond;
std::vector<module_ref> mod_args = ins->module_inputs(); return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
std::map<std::string, shape> name_shapes;
for(const auto& smod : mod_args)
{
auto ps = smod->get_parameter_shapes();
name_shapes.insert(ps.begin(), ps.end());
}
bool ins_output_allocated = false;
for(auto& pn : name_shapes)
{
const auto& s = pn.second;
instruction_ref output{};
if(s == ins->get_shape() and not ins_output_allocated)
{
output = insert_allocation(ins, s);
ins_output_allocated = true;
}
else
{
output = mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}}));
}
inputs.push_back(output);
}
return mod->replace_instruction(ins, ins->get_operator(), inputs, mod_args);
}); });
} }
...@@ -525,20 +457,17 @@ struct miopen_apply ...@@ -525,20 +457,17 @@ struct miopen_apply
inputs.at(0) = synced_max_iter; inputs.at(0) = synced_max_iter;
inputs.at(1) = cpu_cond; inputs.at(1) = cpu_cond;
auto copy_inputs = inputs; auto copy_inputs = inputs;
std::transform( std::transform(copy_inputs.begin(),
copy_inputs.begin(), copy_inputs.end(), std::back_inserter(inputs), [&](auto in) { copy_inputs.end(),
return mod->insert_instruction( std::back_inserter(inputs),
ins, make_op("hip::allocate", {{"shape", to_value(in->get_shape())}})); [&](auto in) { return insert_allocation(ins, in->get_shape()); });
});
auto mod_args = ins->module_inputs(); auto mod_args = ins->module_inputs();
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
const auto* sub_mod = mod_args.front(); const auto* sub_mod = mod_args.front();
auto cond_out = mod->insert_instruction( auto cond_out = insert_allocation(ins, sub_mod->get_output_shapes().front());
ins,
make_op("hip::allocate",
{{"shape", to_value(sub_mod->get_output_shapes().front())}}));
// add cond and mod outputs to the argument list // add cond and mod outputs to the argument list
inputs.push_back(cond_out); inputs.push_back(cond_out);
inputs.push_back(output); inputs.push_back(output);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <migraphx/preallocate_param.hpp> #include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
...@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering{&ctx, options.offload_copy}, lowering{&ctx, options.offload_copy},
eliminate_contiguous{"gpu::contiguous"}, eliminate_contiguous{"gpu::contiguous"},
dead_code_elimination{}, dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{},
eliminate_concat{concat_gpu_optimization{}}, eliminate_concat{concat_gpu_optimization{}},
dead_code_elimination{}, dead_code_elimination{},
pack_int8_args{}, pack_int8_args{},
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <migraphx/auto_contiguous.hpp> #include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_contiguous.hpp> #include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/op/add.hpp> #include <migraphx/op/add.hpp>
...@@ -20,12 +21,15 @@ ...@@ -20,12 +21,15 @@
void run_lowering(migraphx::program& p, bool offload_copy = false) void run_lowering(migraphx::program& p, bool offload_copy = false)
{ {
auto ctx = migraphx::gpu::context{}; auto ctx = migraphx::gpu::context{};
migraphx::run_passes(*p.get_main_module(), migraphx::run_passes(
{migraphx::auto_contiguous{}, *p.get_main_module(),
migraphx::gpu::lowering{&ctx, offload_copy}, {migraphx::auto_contiguous{},
migraphx::dead_code_elimination{}, migraphx::gpu::lowering{&ctx, offload_copy},
migraphx::eliminate_contiguous{"gpu::contiguous"}, migraphx::dead_code_elimination{},
migraphx::dead_code_elimination{}}); migraphx::eliminate_contiguous{"gpu::contiguous"},
migraphx::dead_code_elimination{},
migraphx::replace_allocate{migraphx::gpu::gpu_allocation_model{}, offload_copy},
migraphx::dead_code_elimination{}});
} }
TEST_CASE(tanh_shape) TEST_CASE(tanh_shape)
......
...@@ -2,13 +2,14 @@ ...@@ -2,13 +2,14 @@
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp> #include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp> #include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/apply_alpha_beta.hpp> #include <migraphx/apply_alpha_beta.hpp>
#include <migraphx/adjust_allocation.hpp> #include <migraphx/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp> #include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/rocblas.hpp> #include <migraphx/gpu/rocblas.hpp>
#include <migraphx/auto_contiguous.hpp> #include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_contiguous.hpp> #include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
...@@ -22,6 +23,8 @@ void run_passes(migraphx::module& m) ...@@ -22,6 +23,8 @@ void run_passes(migraphx::module& m)
{migraphx::auto_contiguous{}, {migraphx::auto_contiguous{},
migraphx::gpu::lowering{&ctx, false}, migraphx::gpu::lowering{&ctx, false},
migraphx::dead_code_elimination{}, migraphx::dead_code_elimination{},
migraphx::replace_allocate{migraphx::gpu::gpu_allocation_model{}},
migraphx::dead_code_elimination{},
migraphx::gpu::pack_int8_args{}, migraphx::gpu::pack_int8_args{},
migraphx::dead_code_elimination{}}); migraphx::dead_code_elimination{}});
} }
......
#include <migraphx/allocation_model.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/register_op.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
struct allocate_no_out : migraphx::auto_register_op<allocate_no_out>
{
migraphx::shape s{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::pack(f(self.s, "shape"));
}
std::string name() const { return "allocate_no_out"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
migraphx::check_shapes{inputs, *this}.has(0);
return s;
}
migraphx::argument compute(migraphx::context&,
const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const
{
return {output_shape};
}
};
struct allocate_with_out : migraphx::auto_register_op<allocate_with_out>
{
migraphx::shape s{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::pack(f(self.s, "shape"));
}
std::string name() const { return "allocate_with_out"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
migraphx::check_shapes{inputs, *this}.has(0);
return s;
}
migraphx::argument compute(migraphx::context&,
const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const
{
return {output_shape};
}
};
// allocation model that has no out params
struct allocation_no_out_model
{
std::string name() const { return "allocate_no_out"; }
migraphx::operation allocate(const migraphx::shape& s) const
{
return migraphx::make_op(name(), {{"shape", to_value(s)}});
}
migraphx::operation preallocate(const migraphx::shape&, const std::string&) const { return {}; }
std::string copy() const { return {}; }
bool needs_out_params() const { return false; }
};
// allocation model with out params
struct allocation_with_out_model
{
std::string name() const { return "allocate_with_out"; }
migraphx::operation allocate(const migraphx::shape& s) const
{
return migraphx::make_op(name(), {{"shape", to_value(s)}});
}
migraphx::operation preallocate(const migraphx::shape&, const std::string&) const { return {}; }
std::string copy() const { return {}; }
bool needs_out_params() const { return true; }
};
void run_pass(migraphx::module& m, migraphx::allocation_model model, bool offload_copy = false)
{
migraphx::run_passes(m,
{migraphx::replace_allocate{std::move(model), offload_copy},
migraphx::dead_code_elimination{}});
}
void run_pass(migraphx::program& p, migraphx::allocation_model model, bool offload_copy = false)
{
migraphx::run_passes(p,
{migraphx::replace_allocate{std::move(model), offload_copy},
migraphx::dead_code_elimination{}});
}
migraphx::module create_simple_program()
{
migraphx::module m;
migraphx::shape s{migraphx::shape::float_type, {5}};
auto x = m.add_parameter("x", s);
auto y = m.add_parameter("y", s);
auto alloc =
m.add_instruction(migraphx::make_op("allocate", {{"shape", migraphx::to_value(s)}}));
m.add_instruction(pass_op{}, alloc, x, y);
return m;
}
TEST_CASE(allocate_no_out)
{
migraphx::module m = create_simple_program();
run_pass(m, allocation_no_out_model{});
EXPECT(std::any_of(m.begin(), m.end(), [](const migraphx::instruction& ins) {
return migraphx::contains(ins.name(), "allocate_no_out");
}));
}
TEST_CASE(allocate_with_out_param)
{
migraphx::module m = create_simple_program();
run_pass(m, allocation_with_out_model{});
EXPECT(std::none_of(m.begin(), m.end(), [](const migraphx::instruction& ins) {
return migraphx::contains(ins.name(), "allocate");
}));
}
TEST_CASE(allocate_with_out_return)
{
migraphx::module m = create_simple_program();
m.add_return({std::prev(m.end())});
run_pass(m, allocation_with_out_model{});
EXPECT(std::none_of(m.begin(), m.end(), [](const migraphx::instruction& ins) {
return migraphx::contains(ins.name(), "allocate");
}));
}
TEST_CASE(allocate_with_out_no_params)
{
migraphx::module m;
migraphx::shape s{migraphx::shape::float_type, {5}};
auto x = m.add_parameter("x", s);
auto y = m.add_parameter("y", s);
auto z = m.add_parameter("z", s);
auto alloc =
m.add_instruction(migraphx::make_op("allocate", {{"shape", migraphx::to_value(s)}}));
auto pass1 = m.add_instruction(pass_op{}, alloc, x, y);
auto alloc2 =
m.add_instruction(migraphx::make_op("allocate", {{"shape", migraphx::to_value(s)}}));
m.add_instruction(pass_op{}, alloc2, z, pass1);
run_pass(m, allocation_with_out_model{});
EXPECT(std::any_of(m.begin(), m.end(), [](const migraphx::instruction& ins) {
return migraphx::contains(ins.name(), "allocate_with_out");
}));
}
TEST_CASE(if_allocate)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape cond_s{migraphx::shape::bool_type};
auto cond = mm->add_parameter("cond", cond_s);
migraphx::shape s{migraphx::shape::float_type, {5}};
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto* then_mod = p.create_module("If_0_if");
auto alloc = then_mod->add_instruction(
migraphx::make_op("allocate", {{"shape", migraphx::to_value(s)}}));
auto a1 = then_mod->add_instruction(pass_op{}, alloc, x);
then_mod->add_return({a1});
auto* else_mod = p.create_module("If_0_else");
auto alloc1 = else_mod->add_instruction(
migraphx::make_op("allocate", {{"shape", migraphx::to_value(s)}}));
auto a2 = else_mod->add_instruction(pass_op{}, alloc1, y);
else_mod->add_return({a2});
mm->add_instruction(migraphx::make_op("if"), {cond}, {then_mod, else_mod});
run_pass(p, allocation_with_out_model{});
EXPECT(std::any_of(mm->begin(), mm->end(), [](const migraphx::instruction& ins) {
return migraphx::contains(ins.name(), "allocate_with_out");
}));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -28,6 +28,8 @@ struct allocation_model ...@@ -28,6 +28,8 @@ struct allocation_model
operation allocate(const shape& s) const; operation allocate(const shape& s) const;
/// Create a preallocated operator for the given shape /// Create a preallocated operator for the given shape
operation preallocate(const shape& s, const std::string& id) const; operation preallocate(const shape& s, const std::string& id) const;
/// Check if outputs are to be inserted
bool needs_out_params() const;
}; };
#else #else
...@@ -37,7 +39,8 @@ interface('allocation_model', ...@@ -37,7 +39,8 @@ interface('allocation_model',
virtual('name', returns='std::string', const=True), virtual('name', returns='std::string', const=True),
virtual('copy', returns='std::string', const=True), virtual('copy', returns='std::string', const=True),
virtual('allocate', s='const shape&', returns='operation', const=True), virtual('allocate', s='const shape&', returns='operation', const=True),
virtual('preallocate', s='const shape&', id='std::string', returns='operation', const=True) virtual('preallocate', s='const shape&', id='std::string', returns='operation', const=True),
virtual('needs_out_params', returns='bool', const=True)
) )
%> %>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment