manual merge

d2549384 · Khalique · 67048d04 · ab6cd9d3 · d2549384 · d2549384
Commit d2549384 authored Feb 01, 2019 by Khalique
20 changed files
--- a/src/targets/gpu/include/migraph/gpu/lowering.hpp
+++ b/src/targets/gpu/include/migraph/gpu/lowering.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
-#define MIGRAPH_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP

-#include <migraph/program.hpp>
-#include <migraph/config.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/context.hpp>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct lowering
@@ -17,7 +17,7 @@ struct lowering
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/max.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/max.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_MAX_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MAX_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/max.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_max : binary_device<hip_max, device::max>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/min.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/min.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_MIN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MIN_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/min.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_min : binary_device<hip_min, device::min>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraph/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraph/gpu/miopen.hpp
-#ifndef MIGRAPH_GUARD_MIGRAPHLIB_MIOPEN_HPP
-#define MIGRAPH_GUARD_MIGRAPHLIB_MIOPEN_HPP
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_HPP

-#include <migraph/manage_ptr.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/operators.hpp>
 #include <miopen/miopen.h>
-#include <migraph/config.hpp>
+#include <migraphx/config.hpp>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-using miopen_handle          = MIGRAPH_MANAGE_PTR(miopenHandle_t, miopenDestroy);
-using tensor_descriptor      = MIGRAPH_MANAGE_PTR(miopenTensorDescriptor_t,
-                                             miopenDestroyTensorDescriptor);
-using convolution_descriptor = MIGRAPH_MANAGE_PTR(miopenConvolutionDescriptor_t,
-                                                  miopenDestroyConvolutionDescriptor);
-using pooling_descriptor     = MIGRAPH_MANAGE_PTR(miopenPoolingDescriptor_t,
-                                              miopenDestroyPoolingDescriptor);
-using activation_descriptor  = MIGRAPH_MANAGE_PTR(miopenActivationDescriptor_t,
-                                                 miopenDestroyActivationDescriptor);
-using fusion_plan_descriptor = MIGRAPH_MANAGE_PTR(miopenFusionPlanDescriptor_t,
-                                                  miopenDestroyFusionPlan);
-using fused_operator_args    = MIGRAPH_MANAGE_PTR(miopenOperatorArgs_t, miopenDestroyOperatorArgs);
+using miopen_handle          = MIGRAPHX_MANAGE_PTR(miopenHandle_t, miopenDestroy);
+using tensor_descriptor      = MIGRAPHX_MANAGE_PTR(miopenTensorDescriptor_t,
+                                              miopenDestroyTensorDescriptor);
+using convolution_descriptor = MIGRAPHX_MANAGE_PTR(miopenConvolutionDescriptor_t,
+                                                   miopenDestroyConvolutionDescriptor);
+using pooling_descriptor     = MIGRAPHX_MANAGE_PTR(miopenPoolingDescriptor_t,
+                                               miopenDestroyPoolingDescriptor);
+using activation_descriptor  = MIGRAPHX_MANAGE_PTR(miopenActivationDescriptor_t,
+                                                  miopenDestroyActivationDescriptor);
+using fusion_plan_descriptor = MIGRAPHX_MANAGE_PTR(miopenFusionPlanDescriptor_t,
+                                                   miopenDestroyFusionPlan);
+using fused_operator_args    = MIGRAPHX_MANAGE_PTR(miopenOperatorArgs_t, miopenDestroyOperatorArgs);
+
 using lrn_descriptor = MIGRAPH_MANAGE_PTR(miopenLRNDescriptor_t, miopenDestroyLRNDescriptor);

 template <class Result, class F, class... Ts>
@@ -31,11 +32,11 @@ Result make_obj(F f, Ts... xs)
    auto status                = f(&x, xs...);
    Result r{x};
    if(status != miopenStatusSuccess)
-        MIGRAPH_THROW("MIOpen call failed");
+        MIGRAPHX_THROW("MIOpen call failed");
    return r;
 }

-inline tensor_descriptor make_tensor(const migraph::shape& s)
+inline tensor_descriptor make_tensor(const migraphx::shape& s)
 {
    auto t = make_obj<tensor_descriptor>(&miopenCreateTensorDescriptor);
    // Convert to ints
@@ -47,26 +48,31 @@ inline tensor_descriptor make_tensor(const migraph::shape& s)
    else if(s.type() == shape::half_type)
        d = miopenHalf;
    else
-        MIGRAPH_THROW("Unsupported type");
+        MIGRAPHX_THROW("Unsupported type");
    miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
    return t;
 }

-inline convolution_descriptor make_conv(const migraph::op::convolution& op)
+inline convolution_descriptor make_conv(const migraphx::op::convolution& op)
 {
    auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
+    miopenConvolutionMode_t c_mode = miopenConvolution;
+    if(op.group > 1)
+        c_mode = miopenGroupConv;
    miopenInitConvolutionDescriptor(c.get(),
-                                    miopenConvolution,
+                                    c_mode,
                                    op.padding[0],
                                    op.padding[1],
                                    op.stride[0],
                                    op.stride[1],
                                    op.dilation[0],
                                    op.dilation[1]);
+    if(op.group > 1)
+        miopenSetConvolutionGroupCount(c.get(), op.group);
    return c;
 }

-inline pooling_descriptor make_pooling(const migraph::op::pooling& op)
+inline pooling_descriptor make_pooling(const migraphx::op::pooling& op)
 {
    miopenPoolingMode_t mode;
    if(op.mode == "max")
@@ -99,6 +105,29 @@ inline activation_descriptor make_relu()
    return ad;
 }

+inline activation_descriptor make_sigmoid()
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    miopenSetActivationDescriptor(ad.get(), miopenActivationLOGISTIC, 0, 0, 0);
+    return ad;
+}
+
+inline activation_descriptor make_tanh()
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    // onnx operator does not apply additional scaling for tanh
+    // defaults for alpha and beta are therefore set to 1
+    miopenSetActivationDescriptor(ad.get(), miopenActivationTANH, 1, 1, 0);
+    return ad;
+}
+
+inline activation_descriptor make_abs()
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    miopenSetActivationDescriptor(ad.get(), miopenActivationABS, 0, 0, 0);
+    return ad;
+}
+
 inline activation_descriptor make_leaky_relu(double alpha)
 {
    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
@@ -106,6 +135,13 @@ inline activation_descriptor make_leaky_relu(double alpha)
    return ad;
 }

+inline activation_descriptor make_elu(double alpha)
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    miopenSetActivationDescriptor(ad.get(), miopenActivationELU, alpha, 0, 0);
+    return ad;
+}
+
 inline fusion_plan_descriptor make_fusion_plan(const shape& input)
 {
    auto t = make_tensor(input);
@@ -125,7 +161,7 @@ inline fused_operator_args make_fused_args()
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/mul.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/mul.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_MUL_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MUL_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/mul.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_mul : binary_device<hip_mul, device::mul>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_UNARY_HPP
+#define MIGRAPHX_GUARD_RTGLIB_UNARY_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/type_name.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+#include <iostream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+template <class Derived>
+struct oper
+{
+    std::string name() const
+    {
+        const std::string& name = get_type_name<Derived>();
+        // search the namespace gpu (::gpu::)
+        auto pos_ns = name.find("::gpu::");
+        if(pos_ns != std::string::npos)
+        {
+            auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length());
+            if(pos_name != std::string::npos)
+            {
+                return std::string("gpu::") + name.substr(pos_name + 4);
+            }
+            else
+            {
+                return name.substr(pos_ns + 2);
+            }
+        }
+
+        return "unknown";
+    }
+};
+
+template <class Derived, void (*F)(hipStream_t, const argument&, const argument&)>
+struct unary_device : oper<Derived>
+{
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(2);
+        return inputs.at(0);
+    }
+
+    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
+    {
+        F(ctx.get_stream().get(), args[1], args[0]);
+        return args[1];
+    }
+
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+template <class Derived, void (*F)(hipStream_t, const argument&, const argument&, const argument&)>
+struct binary_device : oper<Derived>
+{
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(3);
+        return inputs.at(0);
+    }
+
+    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
+    {
+        F(ctx.get_stream().get(), args[2], args[1], args[0]);
+        return args[2];
+    }
+
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/pad.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pad.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_PAD_HPP
+#define MIGRAPHX_GUARD_RTGLIB_PAD_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/pad.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_pad
+{
+    op::pad op;
+
+    std::string name() const { return "gpu::pad"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraph/gpu/pooling.hpp
+++ b/src/targets/gpu/include/migraph/gpu/pooling.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_POOLING_HPP
-#define MIGRAPH_GUARD_RTGLIB_POOLING_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_POOLING_HPP
+#define MIGRAPHX_GUARD_RTGLIB_POOLING_HPP

-#include <migraph/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/generate.hpp>
-#include <migraph/shape_for_each.hpp>
-#include <migraph/config.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <utility>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_pooling
@@ -35,7 +35,7 @@ struct miopen_pooling
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraph/gpu/relu.hpp
+++ b/src/targets/gpu/include/migraph/gpu/relu.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_RELU_HPP
-#define MIGRAPH_GUARD_RTGLIB_RELU_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_RELU_HPP
+#define MIGRAPHX_GUARD_RTGLIB_RELU_HPP

-#include <migraph/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/generate.hpp>
-#include <migraph/shape_for_each.hpp>
-#include <migraph/config.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <utility>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_relu
@@ -33,7 +33,7 @@ struct miopen_relu
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraph/gpu/rocblas.hpp
+++ b/src/targets/gpu/include/migraph/gpu/rocblas.hpp
-#ifndef MIGRAPH_GUARD_MIGRAPHLIB_ROCBLAS_HPP
-#define MIGRAPH_GUARD_MIGRAPHLIB_ROCBLAS_HPP
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP

-#include <migraph/manage_ptr.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/config.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/config.hpp>
 #include <rocblas.h>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);
+using rocblas_handle_ptr = MIGRAPHX_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);

 rocblas_handle_ptr create_rocblas_handle_ptr();
 rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct miopen_sigmoid
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "gpu::sigmoid"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/sin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sin.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_SIN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SIN_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/sin.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_sin : unary_device<hip_sin, device::sin>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/sinh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sinh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_SINH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SINH_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/sinh.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_sinh : unary_device<hip_sinh, device::sinh>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraph/gpu/softmax.hpp
+++ b/src/targets/gpu/include/migraph/gpu/softmax.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_SOFTMAX_HPP
-#define MIGRAPH_GUARD_RTGLIB_SOFTMAX_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP

-#include <migraph/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/generate.hpp>
-#include <migraph/shape_for_each.hpp>
-#include <migraph/config.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <utility>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_softmax
@@ -33,7 +33,7 @@ struct miopen_softmax
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/tan.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tan.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_TAN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_TAN_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/tan.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_tan : unary_device<hip_tan, device::tan>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_TANH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_TANH_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct miopen_tanh
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "gpu::tanh"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/target.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/target.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct target
+{
+    std::string name() const;
+    std::vector<pass> get_passes(migraphx::context& gctx) const;
+    migraphx::context get_context() const;
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraph/gpu/write_literals.hpp
+++ b/src/targets/gpu/include/migraph/gpu/write_literals.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
-#define MIGRAPH_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP

-#include <migraph/program.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/gpu/context.hpp>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {

 namespace gpu {

@@ -18,7 +18,7 @@ struct write_literals
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx

 #endif
--- a/src/targets/gpu/leaky_relu.cpp
+++ b/src/targets/gpu/leaky_relu.cpp
-#include <migraph/gpu/leaky_relu.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/leaky_relu.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 shape miopen_leaky_relu::compute_shape(const std::vector<shape>& inputs) const
@@ -18,7 +18,8 @@ argument miopen_leaky_relu::compute(context& ctx,
                                    const shape& output_shape,
                                    const std::vector<argument>& args) const
 {
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    auto x_desc = make_tensor(args[0].get_shape());
    auto y_desc = make_tensor(output_shape);
    miopenActivationForward(ctx.get_stream().get_miopen(),
@@ -34,5 +35,5 @@ argument miopen_leaky_relu::compute(context& ctx,
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
 #include <rocblas.h>
-#include <migraph/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/generate.hpp>
-#include <migraph/shape_for_each.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
-#include <migraph/gpu/convolution.hpp>
-#include <migraph/gpu/contiguous.hpp>
-#include <migraph/gpu/lrn.hpp>
-#include <migraph/gpu/relu.hpp>
-#include <migraph/gpu/leaky_relu.hpp>
-#include <migraph/gpu/softmax.hpp>
-#include <migraph/gpu/add.hpp>
-#include <migraph/gpu/mul.hpp>
-#include <migraph/gpu/batchnorm.hpp>
-#include <migraph/gpu/pooling.hpp>
-#include <migraph/gpu/gemm.hpp>
-#include <migraph/gpu/concat.hpp>
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/convolution.hpp>
+#include <migraphx/gpu/contiguous.hpp>
+#include <migraphx/gpu/relu.hpp>
+#include <migraphx/gpu/sigmoid.hpp>
+#include <migraphx/gpu/abs.hpp>
+#include <migraphx/gpu/leaky_relu.hpp>
+#include <migraphx/gpu/elu.hpp>
+#include <migraphx/gpu/softmax.hpp>
+#include <migraphx/gpu/add.hpp>
+#include <migraphx/gpu/exp.hpp>
+#include <migraphx/gpu/log.hpp>
+#include <migraphx/gpu/sin.hpp>
+#include <migraphx/gpu/cos.hpp>
+#include <migraphx/gpu/tan.hpp>
+#include <migraphx/gpu/sinh.hpp>
+#include <migraphx/gpu/cosh.hpp>
+#include <migraphx/gpu/tanh.hpp>
+#include <migraphx/gpu/asin.hpp>
+#include <migraphx/gpu/acos.hpp>
+#include <migraphx/gpu/atan.hpp>
+#include <migraphx/gpu/mul.hpp>
+#include <migraphx/gpu/max.hpp>
+#include <migraphx/gpu/min.hpp>
+#include <migraphx/gpu/batchnorm.hpp>
+#include <migraphx/gpu/pooling.hpp>
+#include <migraphx/gpu/gemm.hpp>
+#include <migraphx/gpu/concat.hpp>
+#include <migraphx/gpu/pad.hpp>
+#include <migraphx/gpu/gather.hpp>
+#include <migraphx/gpu/lrn.hpp>
 #include <utility>
+#include <functional>
+#include <algorithm>

-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_apply
 {
    program* prog = nullptr;
    context ctx{};
+    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};

    void check_shape(shape x, instruction_ref i)
    {
@@ -43,58 +64,53 @@ struct miopen_apply
        (void)i;
    }

+    void init()
+    {
+        add_miopen_simple_op<miopen_relu>("relu", make_relu);
+        add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
+        add_miopen_simple_op<miopen_abs>("abs", make_abs);
+        add_miopen_simple_op<miopen_tanh>("tanh", make_tanh);
+
+        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
+        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);
+
+        add_generic_op<hip_add>("add");
+        add_generic_op<hip_exp>("exp");
+        add_generic_op<hip_log>("log");
+        add_generic_op<hip_sin>("sin");
+        add_generic_op<hip_cos>("cos");
+        add_generic_op<hip_tan>("tan");
+        add_generic_op<hip_sinh>("sinh");
+        add_generic_op<hip_cosh>("cosh");
+        add_generic_op<hip_asin>("asin");
+        add_generic_op<hip_acos>("acos");
+        add_generic_op<hip_atan>("atan");
+        add_generic_op<hip_mul>("mul");
+        add_generic_op<hip_max>("max");
+        add_generic_op<hip_min>("min");
+
+        add_extend_op<miopen_gemm, op::dot>("dot");
+        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
+        add_extend_op<hip_concat, op::concat>("concat");
+        add_extend_op<miopen_softmax, op::softmax>("softmax");
+        add_extend_op<miopen_lrn, op::lrn>("lrn");
+        add_extend_op<hip_gather, op::gather>("gather");
+        add_extend_op<hip_pad, op::pad>("pad");
+
+        add_convolution_op();
+        add_pooling_op();
+        add_batch_norm_inference_op();
+    }
+
    void apply()
    {
+        init();
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            auto s = it->get_shape();
-            if(it->name() == "convolution")
-            {
-                check_shape(s, apply_convolution(it));
-            }
-            else if(it->name() == "relu")
-            {
-                check_shape(s, apply_relu(it));
-            }
-            else if(it->name() == "leaky_relu")
-            {
-                check_shape(s, apply_leaky_relu(it));
-            }
-            else if(it->name() == "pooling")
-            {
-                check_shape(s, apply_pooling(it));
-            }
-            else if(it->name() == "lrn")
-            {
-                check_shape(s, apply_lrn(it));
-            }
-            else if(it->name() == "add")
-            {
-                check_shape(s, apply_add(it));
-            }
-            else if(it->name() == "mul")
-            {
-                check_shape(s, apply_mul(it));
-            }
-            else if(it->name() == "dot")
-            {
-                check_shape(s, apply_gemm(it));
-            }
-            else if(it->name() == "contiguous")
+            if(apply_map.count(it->name()) > 0)
            {
-                check_shape(s, apply_contiguous(it));
-            }
-            else if(it->name() == "concat")
-            {
-                check_shape(s, apply_concat(it));
-            }
-            else if(it->name() == "batch_norm_inference")
-            {
-                check_shape(s, apply_batch_norm_inference(it));
-            }
-            else if(it->name() == "softmax")
-            {
-                check_shape(s, apply_softmax(it));
+                check_shape(s, apply_map.at(it->name())(it));
            }
        }
    }
@@ -113,127 +129,118 @@ struct miopen_apply
        }
    }

-    instruction_ref apply_convolution(instruction_ref ins)
+    void add_convolution_op()
    {
-        auto&& op = any_cast<op::convolution>(ins->get_operator());
+        apply_map.emplace("convolution", [=](instruction_ref ins) {
+            auto&& op = any_cast<op::convolution>(ins->get_operator());

-        auto conv = miopen_convolution{op, make_conv(op)};
-        auto ws   = conv.compile(ctx, ins->get_shape(), ins->inputs());
+            auto conv = miopen_convolution{op, make_conv(op)};
+            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

-        auto workspace = insert_allocation(ins, ws, "workspace");
-        auto output    = insert_allocation(ins, ins->get_shape());
+            auto workspace = insert_allocation(ins, ws, "workspace");
+            auto output    = insert_allocation(ins, ins->get_shape());

-        return prog->replace_instruction(
-            ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
+            return prog->replace_instruction(
+                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
+        });
    }

-    instruction_ref apply_pooling(instruction_ref ins)
+    void add_pooling_op()
    {
-        auto&& op   = any_cast<op::pooling>(ins->get_operator());
-        auto pd     = make_pooling(op);
-        auto output = insert_allocation(ins, ins->get_shape());
-
-        return prog->replace_instruction(
-            ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
+        apply_map.emplace("pooling", [=](instruction_ref ins) {
+            auto&& op   = any_cast<op::pooling>(ins->get_operator());
+            auto pd     = make_pooling(op);
+            auto output = insert_allocation(ins, ins->get_shape());
+
+            return prog->replace_instruction(
+                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
+        });
    }

-    instruction_ref apply_lrn(instruction_ref ins)
+    void apply_lrn(instruction_ref ins)
    {
-        auto&& op   = any_cast<op::lrn>(ins->get_operator());
-        auto ldesc  = make_lrn(op);
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
+        apply_map.emplace("lrn", [=](insruction_ref ins) {
+            auto&& op   = any_cast<op::lrn>(ins->get_operator());
+            auto ldesc  = make_lrn(op);
+            auto output = insert_allocation(ins, ins->get_shape());
+            return prog->replace_instruction(
+                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
+        });
    }

-    instruction_ref apply_relu(instruction_ref ins)
+    template <class T>
+    void add_generic_op(std::string name)
    {
-        auto ad = make_relu();
+        apply_map.emplace(name, [=](instruction_ref ins) {
+            auto output                       = insert_allocation(ins, ins->get_shape());
+            std::vector<instruction_ref> refs = ins->inputs();
+            refs.push_back(output);

-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, miopen_relu{std::move(ad)}, ins->inputs().at(0), output);
+            return prog->replace_instruction(ins, T{}, refs);
+        });
    }

-    instruction_ref apply_leaky_relu(instruction_ref ins)
+    template <class T, class Op>
+    void add_extend_op(std::string name)
    {
-        auto&& op = any_cast<op::leaky_relu>(ins->get_operator());
-        auto ad   = make_leaky_relu(op.alpha);
-
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, miopen_leaky_relu{std::move(ad)}, ins->inputs().at(0), output);
+        apply_map.emplace(name, [=](instruction_ref ins) {
+            auto&& op                         = any_cast<Op>(ins->get_operator());
+            auto output                       = insert_allocation(ins, ins->get_shape());
+            std::vector<instruction_ref> refs = ins->inputs();
+            refs.push_back(output);
+
+            return prog->replace_instruction(ins, T{op}, refs);
+        });
    }

-    instruction_ref apply_softmax(instruction_ref ins)
+    template <class T, class Op, class F>
+    void add_miopen_extend_op(std::string name, F f)
    {
-        auto&& op   = any_cast<op::softmax>(ins->get_operator());
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(ins, miopen_softmax{op}, ins->inputs().at(0), output);
-    }
+        apply_map.emplace(name, [=](instruction_ref ins) {
+            auto&& op = any_cast<Op>(ins->get_operator());
+            auto ad   = f(op.alpha);

-    instruction_ref apply_add(instruction_ref ins)
-    {
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output);
-    }
-
-    instruction_ref apply_mul(instruction_ref ins)
-    {
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, hip_mul{}, ins->inputs().at(0), ins->inputs().at(1), output);
-    }
-
-    instruction_ref apply_gemm(instruction_ref ins)
-    {
-        auto&& op   = any_cast<op::dot>(ins->get_operator());
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output);
-    }
-
-    instruction_ref apply_contiguous(instruction_ref ins)
-    {
-        auto&& op   = any_cast<op::contiguous>(ins->get_operator());
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(ins, miopen_contiguous{op}, ins->inputs().at(0), output);
+            auto output = insert_allocation(ins, ins->get_shape());
+            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
+        });
    }

-    instruction_ref apply_concat(instruction_ref ins)
+    template <class T, class F>
+    void add_miopen_simple_op(std::string name, F f)
    {
-        auto&& op                         = any_cast<op::concat>(ins->get_operator());
-        auto output                       = insert_allocation(ins, ins->get_shape());
-        std::vector<instruction_ref> refs = ins->inputs();
-        refs.push_back(output);
-        return prog->replace_instruction(ins, hip_concat{op}, refs);
+        apply_map.emplace(name, [=](instruction_ref ins) {
+            auto ad     = f();
+            auto output = insert_allocation(ins, ins->get_shape());
+            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
+        });
    }

-    instruction_ref apply_batch_norm_inference(instruction_ref ins)
+    void add_batch_norm_inference_op()
    {
-        auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
-        auto output     = insert_allocation(ins, ins->get_shape());
-        shape old_shape = ins->inputs().at(1)->get_shape();
-        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
-        auto reshape_op = op::reshape{new_shape};
-        std::vector<instruction_ref> reshapes;
-        std::transform(ins->inputs().begin() + 1,
-                       ins->inputs().end(),
-                       std::back_inserter(reshapes),
-                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
-        return prog->replace_instruction(ins,
-                                         miopen_batch_norm_inference{op},
-                                         ins->inputs().at(0),
-                                         reshapes[0],
-                                         reshapes[1],
-                                         reshapes[2],
-                                         reshapes[3],
-                                         output);
+        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
+            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
+            auto output     = insert_allocation(ins, ins->get_shape());
+            shape old_shape = ins->inputs().at(1)->get_shape();
+            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
+            auto reshape_op = op::reshape{new_shape};
+            std::vector<instruction_ref> reshapes;
+            std::transform(ins->inputs().begin() + 1,
+                           ins->inputs().end(),
+                           std::back_inserter(reshapes),
+                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
+            return prog->replace_instruction(ins,
+                                             miopen_batch_norm_inference{op},
+                                             ins->inputs().at(0),
+                                             reshapes[0],
+                                             reshapes[1],
+                                             reshapes[2],
+                                             reshapes[3],
+                                             output);
+        });
    }
 };

 void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx