Commit ad16770f authored by Paul's avatar Paul
Browse files

Merge branch 'jit-layernorm' into bert-opt2

parents c06d254a b3955af4
......@@ -37,7 +37,7 @@ def rocmtestnode(Map conf) {
stage("checkout ${variant}") {
checkout scm
}
gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'AMDMIGraphX') {
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'AMDMIGraphX') {
pre()
stage("image ${variant}") {
try {
......
......@@ -54,6 +54,7 @@ add_library(migraphx
reduce_dims.cpp
register_op.cpp
register_target.cpp
replace_allocate.cpp
simplify_qdq.cpp
rewrite_batchnorm.cpp
rewrite_pooling.cpp
......@@ -80,6 +81,7 @@ register_migraphx_ops(
acosh
acos
add
allocate
argmax
argmin
asinh
......
......@@ -4,6 +4,7 @@
#include <migraphx/iterator_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <unordered_set>
namespace migraphx {
......@@ -24,9 +25,10 @@ void dead_code_elimination::apply(module& m) const
// Skip the last instruction
if(i == last)
break;
// Skip instruction with empty shape as output unless its a builtin or undefined or identity
// Skip instruction with empty shape as output unless its a builtin, undefined, identity, or
// allocate
if(i->get_shape().elements() == 0 and i->name().front() != '@' and
i->name() != "undefined" and i->name() != "identity")
not contains({"undefined", "identity", "allocate"}, i->name()))
continue;
assert(std::distance(m.begin(), i) <= std::distance(m.begin(), last));
std::unordered_set<instruction_ref> visited;
......
......@@ -28,6 +28,8 @@ struct allocation_model
operation allocate(const shape& s) const;
/// Create a preallocated operator for the given shape
operation preallocate(const shape& s, const std::string& id) const;
/// Check if outputs are to be inserted
bool needs_out_params() const;
};
#else
......@@ -45,6 +47,8 @@ struct allocation_model
operation allocate(const shape& s) const;
//
operation preallocate(const shape& s, std::string id) const;
//
bool needs_out_params() const;
};
#else
......@@ -136,6 +140,12 @@ struct allocation_model
return (*this).private_detail_te_get_handle().preallocate(s, std::move(id));
}
bool needs_out_params() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().needs_out_params();
}
friend bool is_shared(const allocation_model& private_detail_x,
const allocation_model& private_detail_y)
{
......@@ -154,6 +164,7 @@ struct allocation_model
virtual std::string copy() const = 0;
virtual operation allocate(const shape& s) const = 0;
virtual operation preallocate(const shape& s, std::string id) const = 0;
virtual bool needs_out_params() const = 0;
};
template <typename PrivateDetailTypeErasedT>
......@@ -200,6 +211,12 @@ struct allocation_model
return private_detail_te_value.preallocate(s, std::move(id));
}
bool needs_out_params() const override
{
return private_detail_te_value.needs_out_params();
}
PrivateDetailTypeErasedT private_detail_te_value;
};
......
#ifndef MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#define MIGRAPHX_GUARD_OPERATORS_ALLOCATE_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
#include <migraphx/shape.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct allocate
{
shape s{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.s, "shape"));
}
std::string name() const { return "allocate"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
migraphx::check_shapes{inputs, *this}.has(0);
return s;
}
argument compute(const shape& output_shape, const std::vector<argument>&) const
{
return {output_shape};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#define MIGRAPHX_GUARD_RTGLIB_REPLACE_ALLOCATE_HPP
#include <migraphx/config.hpp>
#include <migraphx/allocation_model.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
struct replace_allocate
{
allocation_model model;
bool offload_copy = false;
std::string name() const { return "replace_allocate"; }
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -81,6 +81,7 @@ bool instruction::valid(instruction_ref start, bool check_order) const
bool ret = self != i->outputs().end();
if(check_order)
{
// check arguments for this instruction before this instruction
ret = ret and (std::distance(start, i) < std::distance(start, *self));
}
return ret;
......
......@@ -510,9 +510,8 @@ instruction_ref module::validate() const
return std::find_if(
impl->instructions.begin(), impl->instructions.end(), [&](const instruction& i) {
auto inputs = i.inputs();
bool check_order = std::all_of(inputs.begin(), inputs.end(), [&](auto in) {
return contains(impl->instructions, *in);
});
bool check_order = std::all_of(
inputs.begin(), inputs.end(), [&](auto in) { return has_instruction(in); });
return !i.valid(impl->instructions.begin(), check_order);
});
}
......
#include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/allocate.hpp>
#include <map>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
std::unordered_map<instruction_ref, std::string> create_output_names(const module& mod)
{
std::unordered_map<instruction_ref, std::string> mod_output_names{};
auto last = std::prev(mod.end());
if(last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
mod_output_names[ins] = mod.name() + ":#output_" + std::to_string(index++);
}
}
else
{
auto ins = instruction::get_output_alias(last);
mod_output_names[ins] = "output";
}
return mod_output_names;
}
void insert_submod_allocations(instruction_ref ins, module& mod, const allocation_model& model)
{
std::vector<instruction_ref> inputs = ins->inputs();
std::vector<module_ref> mod_args = ins->module_inputs();
std::map<std::string, shape> name_shapes;
for(const auto& smod : mod_args)
{
auto ps = smod->get_parameter_shapes();
name_shapes.insert(ps.begin(), ps.end());
}
for(auto& pn : name_shapes)
{
const auto& s = pn.second;
instruction_ref output{};
output = mod.insert_instruction(ins, model.allocate(s));
inputs.push_back(output);
}
mod.replace_instruction(ins, ins->get_operator(), inputs, mod_args);
}
void replace_allocate::apply(module& m) const
{
auto mod_output_names = create_output_names(m);
bool main_offload_copy = m.name() == "main" ? this->offload_copy : false;
for(auto ins : iterator_for(m))
{
auto op = ins->get_operator();
auto op_name = op.name();
// check if allocations from submodules need to be inserted
// for now, only the "if" operator is affected
if(op_name == "if")
{
insert_submod_allocations(ins, m, model);
continue;
}
if(op_name != "allocate")
continue;
auto s = ins->get_shape();
if(not main_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
{
auto out_param = m.add_parameter(mod_output_names[ins], s);
m.replace_instruction(ins, out_param);
continue;
}
m.replace_instruction(
ins,
m.insert_instruction(ins,
make_op(model.name(), migraphx::value{{"shape", to_value(s)}})));
}
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -15,6 +15,7 @@ struct cpu_allocation_model
std::string copy() const;
operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const;
bool needs_out_params() const { return false; }
};
} // namespace cpu
......
......@@ -291,30 +291,8 @@ struct cpu_apply
{
module* modl;
std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
std::unordered_map<instruction_ref, std::string> prog_output_names{};
instruction_ref last{};
void create_output_names()
{
this->last = instruction::get_output_alias(std::prev(modl->end()));
if(this->last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
prog_output_names[ins] = modl->name() + ":#output_" + std::to_string(index++);
}
}
}
void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
{
apply_map.emplace(op_name, [=](instruction_ref ins) {
......@@ -360,7 +338,6 @@ struct cpu_apply
void init()
{
create_output_names();
extend_dnnl_algos("dnnl::binary",
{
{"add", "binary_add"},
......@@ -490,7 +467,7 @@ struct cpu_apply
instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{
return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
}
};
......
......@@ -13,6 +13,7 @@
#include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
......@@ -70,6 +71,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering{},
eliminate_contiguous{"dnnl::reorder"},
dead_code_elimination{},
replace_allocate{cpu_allocation_model{}},
dead_code_elimination{},
adjust_allocation{cpu_allocation_model{}},
dead_code_elimination{},
fuse_ops{&ctx},
......
......@@ -16,6 +16,7 @@ struct gpu_allocation_model
std::string copy() const;
operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const;
bool needs_out_params() const { return true; }
};
} // namespace gpu
......
......@@ -11,9 +11,10 @@
#include <sstream>
#ifdef HAS_FIND_MODE_API
extern "C" miopenStatus_t miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc,
int findMode);
#ifdef MIGRAPHX_HAS_FIND_MODE_API
extern "C" miopenStatus_t
miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, // NOLINT
int findMode); // NOLINT
#endif
namespace migraphx {
......@@ -104,7 +105,7 @@ inline convolution_descriptor make_conv(const T& op)
c.get(), padding.size(), padding.data(), stride.data(), dilation.data(), c_mode);
if(op.group > 1)
miopenSetConvolutionGroupCount(c.get(), op.group);
#ifdef HAS_FIND_MODE_API
#ifdef MIGRAPHX_HAS_FIND_MODE_API
miopenHiddenSetConvolutionFindMode(c.get(), 1); // Normal mode
#endif
return c;
......
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
using namespace migraphx::gpu::gen; // NOLINT
static const char* const layernorm_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/layernorm.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void layernorm_kernel(void* input_p, void* output_p)
{
transform_args(make_tensors(), rotate_last(), ${transformers})(input_p, output_p)([](auto... xs) {
layernorm<${axis}>(op::id{}, xs...);
});
}
}
} // namespace migraphx
)__migraphx__";
struct layernorm_compiler : compiler<layernorm_compiler>
{
std::vector<std::string> names() const { return {"layernorm", "gpu::prelayernorm"}; }
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
// TODO: Use reduce_dims
auto axis = inputs.front().lens().size() - 1;
auto faxis = find_fast_axis({inputs.front()});
vectorize vec{};
// Vectorize if the axis is a reduction axis
if(inputs.back().lens()[faxis] == 1)
{
vec = vectorize::elements(faxis, inputs);
}
auto relements = inputs[0].lens()[axis] / vec.size;
auto nelements = inputs.back().elements() / relements;
auto block_size = compute_block_size(relements, 256);
hip_compile_options options;
options.set_launch_params(
v, compute_global_for(ctx, nelements * block_size, 256), block_size);
options.output = inputs.back();
options.inputs = inputs;
options.kernel_name = "layernorm_kernel";
auto src = interpolate_string(
layernorm_kernel,
{{"transformers", make_transformer_args(vec)}, {"axis", to_string(axis)}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value()));
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/print.hpp>
namespace migraphx {
template <index_int Axis, class F, class Output, class Input, class... Inputs>
__device__ void layernorm(F compute, Output output, Input input, Inputs... inputs)
{
using reduce_output = reduce::with_axis<Input, Axis>;
constexpr auto relements =
get_shape_c<Input>{}.elements() / get_shape_c<reduce_output>{}.elements();
MIGRAPHX_ASSERT(relements > 0);
reduce::block::run<reduce_output>([&](auto, auto r) {
using value_type = typename Input::type;
auto mean = [&](auto f) {
return r.reduce(op::sum{}, 0, [&](auto x) { return f(x) / value_type{relements}; })(
input);
};
// mean(x)
auto mean_x = mean(op::id{});
// mean(m ^ 2)
auto mean_m2 = mean([&](auto x) {
auto m = x - mean_x;
return m * m;
});
r.inner([&](auto& y, auto x, auto... xs) {
auto m = x - mean_x;
// m * rsqrt(mean(m ^ 2) + 1e-12)
y = compute(m * rsqrt(mean_m2 + value_type{1e-12}), xs...);
})(output, input, inputs...);
});
}
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
......@@ -58,7 +58,6 @@ struct miopen_apply
const lowering* pass = nullptr;
std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
instruction_ref last{};
std::unordered_map<instruction_ref, std::string> prog_output_names{};
bool offload_copy = false;
bool int8_x4_format = true;
bool compute_fp32 = false;
......@@ -77,27 +76,6 @@ struct miopen_apply
(void)i;
}
void create_output_names()
{
this->last = instruction::get_output_alias(std::prev(mod->end()));
if(this->last->name() == "@return")
{
const auto& prog_outputs = last->inputs();
std::vector<instruction_ref> outputs_alias(prog_outputs.size());
std::transform(prog_outputs.begin(),
prog_outputs.end(),
outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0;
for(auto ins : outputs_alias)
{
prog_output_names[ins] = mod->name() + ":#output_" + std::to_string(index++);
}
}
}
const std::unordered_set<std::string>& get_rocblas_fp32_archs()
{
static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
......@@ -120,7 +98,6 @@ struct miopen_apply
#endif
offload_copy = (mod->name() == "main") ? pass->offload_copy : false;
create_output_names();
add_generic_op("acos");
add_generic_op("acosh");
......@@ -200,7 +177,7 @@ struct miopen_apply
add_quant_convolution_op();
}
void copy_params()
void copy_params() const
{
if(not offload_copy)
return;
......@@ -260,7 +237,7 @@ struct miopen_apply
copy_params();
}
instruction_ref insert_precompile_op(instruction_ref ins)
instruction_ref insert_precompile_op(instruction_ref ins) const
{
auto output = insert_allocation(ins, ins->get_shape());
std::vector<instruction_ref> refs = ins->inputs();
......@@ -273,28 +250,9 @@ struct miopen_apply
ins->module_inputs());
}
instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{
// Instruction's output is an input of the ret instruction
if(offload_copy)
{
auto result = mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}, {"tag", std::move(tag)}}));
return result;
}
auto ins_alias = instruction::get_output_alias(ins);
if(last->name() == "@return" and tag.empty() and prog_output_names.count(ins_alias) > 0)
{
return mod->add_parameter(prog_output_names[ins_alias], s);
}
else if(ins == last and tag.empty())
{
return mod->add_parameter("output", s);
}
return mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}, {"tag", std::move(tag)}}));
return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
}
void add_convolution_op()
......@@ -305,7 +263,7 @@ struct miopen_apply
auto conv = miopen_convolution{op, make_conv(op)};
auto ws = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws, "workspace");
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(
......@@ -321,7 +279,7 @@ struct miopen_apply
auto conv = miopen_deconvolution{op, make_deconv(op)};
auto ws = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws, "workspace");
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(
......@@ -334,27 +292,9 @@ struct miopen_apply
{
apply_map.emplace(name, [=](instruction_ref ins) {
std::vector<instruction_ref> refs = ins->inputs();
if(refs.size() == 2)
{
auto output = insert_allocation(ins, ins->get_shape());
refs.push_back(output);
}
else
{
auto c_alias = instruction::get_output_alias(refs.back());
if(ins == last or refs.back()->outputs().size() > 1 or c_alias->inputs().empty())
{
auto output = insert_allocation(ins, ins->get_shape());
auto copy_out =
mod->insert_instruction(ins, make_op("hip::copy"), refs.back(), output);
refs.back() = copy_out;
refs.push_back(copy_out);
}
else
{
refs.push_back(refs.back());
}
}
assert(refs.size() == 2);
auto output = insert_allocation(ins, ins->get_shape());
refs.push_back(output);
return mod->replace_instruction(
ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs);
});
......@@ -382,7 +322,7 @@ struct miopen_apply
}
auto args = ins->inputs();
auto workspace = insert_allocation(ins, ws, "workspace");
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(ins, conv, args[0], args[1], workspace, output);
......@@ -479,33 +419,7 @@ struct miopen_apply
auto sync_cond = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_cond);
inputs.front() = sync_cond;
std::vector<module_ref> mod_args = ins->module_inputs();
std::map<std::string, shape> name_shapes;
for(const auto& smod : mod_args)
{
auto ps = smod->get_parameter_shapes();
name_shapes.insert(ps.begin(), ps.end());
}
bool ins_output_allocated = false;
for(auto& pn : name_shapes)
{
const auto& s = pn.second;
instruction_ref output{};
if(s == ins->get_shape() and not ins_output_allocated)
{
output = insert_allocation(ins, s);
ins_output_allocated = true;
}
else
{
output = mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(s)}}));
}
inputs.push_back(output);
}
return mod->replace_instruction(ins, ins->get_operator(), inputs, mod_args);
return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
});
}
......@@ -524,20 +438,17 @@ struct miopen_apply
inputs.at(0) = synced_max_iter;
inputs.at(1) = cpu_cond;
auto copy_inputs = inputs;
std::transform(
copy_inputs.begin(), copy_inputs.end(), std::back_inserter(inputs), [&](auto in) {
return mod->insert_instruction(
ins, make_op("hip::allocate", {{"shape", to_value(in->get_shape())}}));
});
std::transform(copy_inputs.begin(),
copy_inputs.end(),
std::back_inserter(inputs),
[&](auto in) { return insert_allocation(ins, in->get_shape()); });
auto mod_args = ins->module_inputs();
auto output = insert_allocation(ins, ins->get_shape());
const auto* sub_mod = mod_args.front();
auto cond_out = mod->insert_instruction(
ins,
make_op("hip::allocate",
{{"shape", to_value(sub_mod->get_output_shapes().front())}}));
auto cond_out = insert_allocation(ins, sub_mod->get_output_shapes().front());
// add cond and mod outputs to the argument list
inputs.push_back(cond_out);
inputs.push_back(output);
......
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace {
struct layernorm
{
std::string name() const { return "gpu::prelayernorm"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
auto s = inputs.at(0);
if(s.scalar())
{
return s;
}
else if(s.broadcasted())
{
return {s.type(), s.lens()};
}
else
{
return s.with_lens(s.lens());
}
}
};
MIGRAPHX_REGISTER_OP(layernorm);
struct find_layernorm
{
auto matcher() const { return match::layernorm(); }
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto x_ins = r.instructions["x"];
m.replace_instruction(ins, layernorm{}, x_ins);
}
};
struct find_gpulayernorm
{
auto matcher() const { return match::layernorm(); }
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
......@@ -30,7 +67,7 @@ struct find_layernorm
}
};
struct find_triaddlayernorm
struct find_gputriaddlayernorm
{
auto matcher() const
{
......@@ -68,7 +105,8 @@ struct find_triaddlayernorm
void prefuse_ops::apply(module& m) const
{
match::find_matches(m, find_triaddlayernorm{}, find_layernorm{});
match::find_matches(m, find_layernorm{});
// match::find_matches(m, find_gputriaddlayernorm{}, find_gpulayernorm{});
}
} // namespace gpu
......
......@@ -17,6 +17,7 @@
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
......@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering{&ctx, options.offload_copy},
eliminate_contiguous{"gpu::contiguous"},
dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{},
eliminate_concat{concat_gpu_optimization{}},
dead_code_elimination{},
pack_int8_args{},
......
......@@ -33,6 +33,16 @@ struct tf_parser
instruction_ref add_broadcastable_binary_op(const std::string& op_name,
instruction_ref arg0,
instruction_ref arg1) const;
instruction_ref add_common_op(const std::string& op_name,
std::vector<instruction_ref> inputs) const;
template <class... Ts>
instruction_ref add_common_op(const std::string& op_name, Ts... xs) const
{
return add_common_op(op_name, {xs...});
}
instruction_ref add_instruction(const operation& op,
const std::vector<instruction_ref>& args) const;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment