Merge from develop

bc5d7f75 · Paul · 47c0854d · a5b0afa0 · bc5d7f75 · bc5d7f75
Commit bc5d7f75 authored Feb 15, 2019 by Paul
20 changed files
--- a/src/targets/gpu/include/migraph/gpu/softmax.hpp
+++ b/src/targets/gpu/include/migraph/gpu/softmax.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_SOFTMAX_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
-#define MIGRAPH_GUARD_RTGLIB_SOFTMAX_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
-#include <migraph/gpu/lowering.hpp>
+#include <migraphx/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
+#include <migraphx/instruction.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/generate.hpp>
+#include <migraphx/generate.hpp>
-#include <migraph/shape_for_each.hpp>
+#include <migraphx/shape_for_each.hpp>
-#include <migraph/config.hpp>
+#include <migraphx/config.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
+#include <migraphx/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
+#include <migraphx/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
+#include <migraphx/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
+#include <migraphx/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
+#include <migraphx/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 struct miopen_softmax
@@ -33,7 +33,7 @@ struct miopen_softmax
 };
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
 #endif
--- a/src/targets/gpu/include/migraphx/gpu/sub.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sub.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_SUB_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SUB_HPP
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/sub.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct hip_sub : binary_device<hip_sub, device::sub>
+{
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/tan.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tan.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_TAN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_TAN_HPP
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/tan.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct hip_tan : unary_device<hip_tan, device::tan>
+{
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_TANH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_TANH_HPP
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct miopen_tanh
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "gpu::tanh"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/target.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/target.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct target
+{
+    std::string name() const;
+    std::vector<pass> get_passes(migraphx::context& gctx) const;
+    migraphx::context get_context() const;
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraph/gpu/write_literals.hpp
+++ b/src/targets/gpu/include/migraph/gpu/write_literals.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
-#define MIGRAPH_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
-#include <migraph/program.hpp>
+#include <migraphx/program.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/context.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -18,7 +18,7 @@ struct write_literals
 };
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
 #endif
--- a/src/targets/gpu/leaky_relu.cpp
+++ b/src/targets/gpu/leaky_relu.cpp
-#include <migraph/gpu/leaky_relu.hpp>
+#include <migraphx/gpu/leaky_relu.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_leaky_relu::compute_shape(const std::vector<shape>& inputs) const
@@ -18,7 +18,8 @@ argument miopen_leaky_relu::compute(context& ctx,
                                    const shape& output_shape,
                                    const std::vector<argument>& args) const
 {
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    auto x_desc = make_tensor(args[0].get_shape());
    auto y_desc = make_tensor(output_shape);
    miopenActivationForward(ctx.get_stream().get_miopen(),
@@ -34,5 +35,5 @@ argument miopen_leaky_relu::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
 #include <rocblas.h>
-#include <migraph/gpu/lowering.hpp>
+#include <migraphx/gpu/lowering.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/instruction.hpp>
+#include <migraphx/instruction.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/generate.hpp>
+#include <migraphx/generate.hpp>
-#include <migraph/shape_for_each.hpp>
+#include <migraphx/shape_for_each.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
-#include <migraph/gpu/hip.hpp>
+#include <migraphx/gpu/hip.hpp>
-#include <migraph/dfor.hpp>
+#include <migraphx/dfor.hpp>
-#include <migraph/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/add.hpp>
+#include <migraphx/gpu/device/add.hpp>
-#include <migraph/iterator_for.hpp>
+#include <migraphx/iterator_for.hpp>
-#include <migraph/gpu/rocblas.hpp>
+#include <migraphx/gpu/rocblas.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/context.hpp>
-#include <migraph/gpu/convolution.hpp>
+#include <migraphx/gpu/convolution.hpp>
-#include <migraph/gpu/contiguous.hpp>
+#include <migraphx/gpu/contiguous.hpp>
-#include <migraph/gpu/relu.hpp>
+#include <migraphx/gpu/relu.hpp>
-#include <migraph/gpu/leaky_relu.hpp>
+#include <migraphx/gpu/sigmoid.hpp>
-#include <migraph/gpu/softmax.hpp>
+#include <migraphx/gpu/abs.hpp>
-#include <migraph/gpu/add.hpp>
+#include <migraphx/gpu/leaky_relu.hpp>
-#include <migraph/gpu/mul.hpp>
+#include <migraphx/gpu/elu.hpp>
-#include <migraph/gpu/batchnorm.hpp>
+#include <migraphx/gpu/softmax.hpp>
-#include <migraph/gpu/pooling.hpp>
+#include <migraphx/gpu/add.hpp>
-#include <migraph/gpu/gemm.hpp>
+#include <migraphx/gpu/sub.hpp>
-#include <migraph/gpu/concat.hpp>
+#include <migraphx/gpu/exp.hpp>
+#include <migraphx/gpu/log.hpp>
+#include <migraphx/gpu/sin.hpp>
+#include <migraphx/gpu/cos.hpp>
+#include <migraphx/gpu/tan.hpp>
+#include <migraphx/gpu/sinh.hpp>
+#include <migraphx/gpu/cosh.hpp>
+#include <migraphx/gpu/tanh.hpp>
+#include <migraphx/gpu/asin.hpp>
+#include <migraphx/gpu/acos.hpp>
+#include <migraphx/gpu/atan.hpp>
+#include <migraphx/gpu/mul.hpp>
+#include <migraphx/gpu/max.hpp>
+#include <migraphx/gpu/min.hpp>
+#include <migraphx/gpu/batchnorm.hpp>
+#include <migraphx/gpu/pooling.hpp>
+#include <migraphx/gpu/gemm.hpp>
+#include <migraphx/gpu/concat.hpp>
+#include <migraphx/gpu/pad.hpp>
+#include <migraphx/gpu/gather.hpp>
+#include <migraphx/gpu/lrn.hpp>
 #include <utility>
+#include <functional>
+#include <algorithm>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 struct miopen_apply
 {
    program* prog = nullptr;
    context ctx{};
+    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
+    instruction_ref last{};
    void check_shape(shape x, instruction_ref i)
    {
@@ -42,61 +66,62 @@ struct miopen_apply
        (void)i;
    }
+    void init()
+    {
+        this->last = instruction::get_output_alias(std::prev(prog->end()));
+        add_miopen_simple_op<miopen_relu>("relu", make_relu);
+        add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
+        add_miopen_simple_op<miopen_abs>("abs", make_abs);
+        add_miopen_simple_op<miopen_tanh>("tanh", make_tanh);
+        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
+        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);
+        add_generic_op<hip_add>("add");
+        add_generic_op<hip_sub>("sub");
+        add_generic_op<hip_exp>("exp");
+        add_generic_op<hip_log>("log");
+        add_generic_op<hip_sin>("sin");
+        add_generic_op<hip_cos>("cos");
+        add_generic_op<hip_tan>("tan");
+        add_generic_op<hip_sinh>("sinh");
+        add_generic_op<hip_cosh>("cosh");
+        add_generic_op<hip_asin>("asin");
+        add_generic_op<hip_acos>("acos");
+        add_generic_op<hip_atan>("atan");
+        add_generic_op<hip_mul>("mul");
+        add_generic_op<hip_max>("max");
+        add_generic_op<hip_min>("min");
+        add_extend_op<miopen_gemm, op::dot>("dot");
+        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
+        add_extend_op<hip_concat, op::concat>("concat");
+        add_extend_op<miopen_softmax, op::softmax>("softmax");
+        add_extend_op<hip_gather, op::gather>("gather");
+        add_extend_op<hip_pad, op::pad>("pad");
+        add_lrn_op();
+        add_convolution_op();
+        add_pooling_op();
+        add_batch_norm_inference_op();
+    }
    void apply()
    {
+        init();
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            auto s = it->get_shape();
-            if(it->name() == "convolution")
+            if(apply_map.count(it->name()) > 0)
-            {
-                check_shape(s, apply_convolution(it));
-            }
-            else if(it->name() == "relu")
-            {
-                check_shape(s, apply_relu(it));
-            }
-            else if(it->name() == "leaky_relu")
-            {
-                check_shape(s, apply_leaky_relu(it));
-            }
-            else if(it->name() == "pooling")
-            {
-                check_shape(s, apply_pooling(it));
-            }
-            else if(it->name() == "add")
-            {
-                check_shape(s, apply_add(it));
-            }
-            else if(it->name() == "mul")
-            {
-                check_shape(s, apply_mul(it));
-            }
-            else if(it->name() == "dot")
-            {
-                check_shape(s, apply_gemm(it));
-            }
-            else if(it->name() == "contiguous")
            {
-                check_shape(s, apply_contiguous(it));
+                check_shape(s, apply_map.at(it->name())(it));
-            }
-            else if(it->name() == "concat")
-            {
-                check_shape(s, apply_concat(it));
-            }
-            else if(it->name() == "batch_norm_inference")
-            {
-                check_shape(s, apply_batch_norm_inference(it));
-            }
-            else if(it->name() == "softmax")
-            {
-                check_shape(s, apply_softmax(it));
            }
        }
    }
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
    {
-        if(ins == --prog->end() and tag.empty())
+        if(ins == last and tag.empty())
        {
            return prog->add_parameter("output", s);
        }
@@ -107,118 +132,118 @@ struct miopen_apply
        }
    }
-    instruction_ref apply_convolution(instruction_ref ins)
+    void add_convolution_op()
    {
-        auto&& op = any_cast<op::convolution>(ins->get_operator());
+        apply_map.emplace("convolution", [=](instruction_ref ins) {
+            auto&& op = any_cast<op::convolution>(ins->get_operator());
-        auto conv = miopen_convolution{op, make_conv(op)};
-        auto ws   = conv.compile(ctx, ins->get_shape(), ins->inputs());
-        auto workspace = insert_allocation(ins, ws, "workspace");
-        auto output    = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
+            auto conv = miopen_convolution{op, make_conv(op)};
-            ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
+            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
-    }
-    instruction_ref apply_pooling(instruction_ref ins)
+            auto workspace = insert_allocation(ins, ws, "workspace");
-    {
+            auto output    = insert_allocation(ins, ins->get_shape());
-        auto&& op   = any_cast<op::pooling>(ins->get_operator());
-        auto pd     = make_pooling(op);
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
+            return prog->replace_instruction(
-            ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
+                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
+        });
    }
-    instruction_ref apply_relu(instruction_ref ins)
+    void add_pooling_op()
    {
-        auto ad = make_relu();
+        apply_map.emplace("pooling", [=](instruction_ref ins) {
+            auto&& op   = any_cast<op::pooling>(ins->get_operator());
-        auto output = insert_allocation(ins, ins->get_shape());
+            auto pd     = make_pooling(op);
-        return prog->replace_instruction(
+            auto output = insert_allocation(ins, ins->get_shape());
-            ins, miopen_relu{std::move(ad)}, ins->inputs().at(0), output);
+            return prog->replace_instruction(
+                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
+        });
    }
-    instruction_ref apply_leaky_relu(instruction_ref ins)
+    void add_lrn_op()
    {
-        auto&& op = any_cast<op::leaky_relu>(ins->get_operator());
+        apply_map.emplace("lrn", [=](instruction_ref ins) {
-        auto ad   = make_leaky_relu(op.alpha);
+            auto&& op   = any_cast<op::lrn>(ins->get_operator());
+            auto ldesc  = make_lrn(op);
-        auto output = insert_allocation(ins, ins->get_shape());
+            auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
+            return prog->replace_instruction(
-            ins, miopen_leaky_relu{std::move(ad)}, ins->inputs().at(0), output);
+                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
+        });
    }
-    instruction_ref apply_softmax(instruction_ref ins)
+    template <class T>
+    void add_generic_op(std::string name)
    {
-        auto&& op   = any_cast<op::softmax>(ins->get_operator());
+        apply_map.emplace(name, [=](instruction_ref ins) {
-        auto output = insert_allocation(ins, ins->get_shape());
+            auto output                       = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(ins, miopen_softmax{op}, ins->inputs().at(0), output);
+            std::vector<instruction_ref> refs = ins->inputs();
-    }
+            refs.push_back(output);
-    instruction_ref apply_add(instruction_ref ins)
+            return prog->replace_instruction(ins, T{}, refs);
-    {
+        });
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(
-            ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output);
    }
-    instruction_ref apply_mul(instruction_ref ins)
+    template <class T, class Op>
+    void add_extend_op(std::string name)
    {
-        auto output = insert_allocation(ins, ins->get_shape());
+        apply_map.emplace(name, [=](instruction_ref ins) {
-        return prog->replace_instruction(
+            auto&& op                         = any_cast<Op>(ins->get_operator());
-            ins, hip_mul{}, ins->inputs().at(0), ins->inputs().at(1), output);
+            auto output                       = insert_allocation(ins, ins->get_shape());
+            std::vector<instruction_ref> refs = ins->inputs();
+            refs.push_back(output);
+            return prog->replace_instruction(ins, T{op}, refs);
+        });
    }
-    instruction_ref apply_gemm(instruction_ref ins)
+    template <class T, class Op, class F>
+    void add_miopen_extend_op(std::string name, F f)
    {
-        auto&& op   = any_cast<op::dot>(ins->get_operator());
+        apply_map.emplace(name, [=](instruction_ref ins) {
-        auto output = insert_allocation(ins, ins->get_shape());
+            auto&& op = any_cast<Op>(ins->get_operator());
-        return prog->replace_instruction(
+            auto ad   = f(op.alpha);
-            ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output);
-    }
-    instruction_ref apply_contiguous(instruction_ref ins)
+            auto output = insert_allocation(ins, ins->get_shape());
-    {
+            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
-        auto&& op   = any_cast<op::contiguous>(ins->get_operator());
+        });
-        auto output = insert_allocation(ins, ins->get_shape());
-        return prog->replace_instruction(ins, miopen_contiguous{op}, ins->inputs().at(0), output);
    }
-    instruction_ref apply_concat(instruction_ref ins)
+    template <class T, class F>
+    void add_miopen_simple_op(std::string name, F f)
    {
-        auto&& op                         = any_cast<op::concat>(ins->get_operator());
+        apply_map.emplace(name, [=](instruction_ref ins) {
-        auto output                       = insert_allocation(ins, ins->get_shape());
+            auto ad     = f();
-        std::vector<instruction_ref> refs = ins->inputs();
+            auto output = insert_allocation(ins, ins->get_shape());
-        refs.push_back(output);
+            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
-        return prog->replace_instruction(ins, hip_concat{op}, refs);
+        });
    }
-    instruction_ref apply_batch_norm_inference(instruction_ref ins)
+    void add_batch_norm_inference_op()
    {
-        auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
+        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
-        auto output     = insert_allocation(ins, ins->get_shape());
+            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
-        shape old_shape = ins->inputs().at(1)->get_shape();
+            auto output     = insert_allocation(ins, ins->get_shape());
-        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
+            shape old_shape = ins->inputs().at(1)->get_shape();
-        auto reshape_op = op::reshape{new_shape};
+            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
-        std::vector<instruction_ref> reshapes;
+            auto reshape_op = op::reshape{new_shape};
-        std::transform(ins->inputs().begin() + 1,
+            std::vector<instruction_ref> reshapes;
-                       ins->inputs().end(),
+            std::transform(ins->inputs().begin() + 1,
-                       std::back_inserter(reshapes),
+                           ins->inputs().end(),
-                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
+                           std::back_inserter(reshapes),
-        return prog->replace_instruction(ins,
+                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
-                                         miopen_batch_norm_inference{op},
+            return prog->replace_instruction(ins,
-                                         ins->inputs().at(0),
+                                             miopen_batch_norm_inference{op},
-                                         reshapes[0],
+                                             ins->inputs().at(0),
-                                         reshapes[1],
+                                             reshapes[0],
-                                         reshapes[2],
+                                             reshapes[1],
-                                         reshapes[3],
+                                             reshapes[2],
-                                         output);
+                                             reshapes[3],
+                                             output);
+        });
    }
 };
 void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/lrn.cpp
+++ b/src/targets/gpu/lrn.cpp
+#include <migraphx/gpu/lrn.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape miopen_lrn::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+argument miopen_lrn::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenLRNForward(ctx.get_stream().get_miopen(),
+                     ldesc.get(),
+                     &alpha,
+                     x_desc.get(),
+                     args[0].implicit(),
+                     &beta,
+                     y_desc.get(),
+                     args[1].implicit(),
+                     false,
+                     nullptr);
+    return args[1];
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/mul.cpp
+++ b/src/targets/gpu/mul.cpp
-#include <migraph/gpu/mul.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <utility>
-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
-namespace gpu {
-shape hip_mul::compute_shape(const std::vector<shape>& inputs) const
-{
-    // check_shapes{inputs, *this}.has(3).standard();
-    check_shapes{inputs, *this}.has(3);
-    return inputs.at(0);
-}
-argument hip_mul::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-{
-    device::mul(ctx.get_stream().get(), args[2], args[0], args[1]);
-    return args[2];
-}
-} // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
--- a/src/targets/gpu/pad.cpp
+++ b/src/targets/gpu/pad.cpp
+#include <migraphx/gpu/pad.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/device/pad.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape hip_pad::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+argument hip_pad::compute(context& ctx, const shape&, const std::vector<argument>& args) const
+{
+    return device::pad(ctx.get_stream().get(), args.back(), args.front(), op.value, op.pads);
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/pooling.cpp
+++ b/src/targets/gpu/pooling.cpp
-#include <migraph/gpu/pooling.hpp>
+#include <migraphx/gpu/pooling.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_pooling::compute_shape(const std::vector<shape>& inputs) const
@@ -20,7 +20,8 @@ argument miopen_pooling::compute(context& ctx,
    auto x_desc = make_tensor(args[0].get_shape());
    auto y_desc = make_tensor(output_shape);
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    miopenPoolingForward(ctx.get_stream().get_miopen(),
                         pd.get(),
@@ -38,5 +39,5 @@ argument miopen_pooling::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/relu.cpp
+++ b/src/targets/gpu/relu.cpp
-#include <migraph/gpu/relu.hpp>
+#include <migraphx/gpu/relu.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_relu::compute_shape(const std::vector<shape>& inputs) const
@@ -18,7 +18,8 @@ argument miopen_relu::compute(context& ctx,
                              const shape& output_shape,
                              const std::vector<argument>& args) const
 {
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    auto x_desc = make_tensor(args[0].get_shape());
    auto y_desc = make_tensor(output_shape);
    miopenActivationForward(ctx.get_stream().get_miopen(),
@@ -34,5 +35,5 @@ argument miopen_relu::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/rocblas.cpp
+++ b/src/targets/gpu/rocblas.cpp
-#include <migraph/gpu/rocblas.hpp>
+#include <migraphx/gpu/rocblas.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 rocblas_handle_ptr create_rocblas_handle_ptr()
@@ -19,5 +19,5 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/sigmoid.cpp
+++ b/src/targets/gpu/sigmoid.cpp
+#include <migraphx/gpu/sigmoid.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape miopen_sigmoid::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+argument miopen_sigmoid::compute(context& ctx,
+                                 const shape& output_shape,
+                                 const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenActivationForward(ctx.get_stream().get_miopen(),
+                            ad.get(),
+                            &alpha,
+                            x_desc.get(),
+                            args[0].implicit(),
+                            &beta,
+                            y_desc.get(),
+                            args[1].implicit());
+    return args[1];
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/softmax.cpp
+++ b/src/targets/gpu/softmax.cpp
-#include <migraph/gpu/softmax.hpp>
+#include <migraphx/gpu/softmax.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_softmax::compute_shape(const std::vector<shape>& inputs) const
@@ -18,7 +18,8 @@ argument miopen_softmax::compute(context& ctx,
                                 const shape& output_shape,
                                 const std::vector<argument>& args) const
 {
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    auto x_desc = make_tensor(args[0].get_shape());
    auto y_desc = make_tensor(output_shape);
    miopenSoftmaxForward(ctx.get_stream().get_miopen(),
@@ -33,5 +34,5 @@ argument miopen_softmax::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/tanh.cpp
+++ b/src/targets/gpu/tanh.cpp
+#include <migraphx/gpu/tanh.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape miopen_tanh::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+argument miopen_tanh::compute(context& ctx,
+                              const shape& output_shape,
+                              const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenActivationForward(ctx.get_stream().get_miopen(),
+                            ad.get(),
+                            &alpha,
+                            x_desc.get(),
+                            args[0].implicit(),
+                            &beta,
+                            y_desc.get(),
+                            args[1].implicit());
+    return args[1];
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
-#include <migraph/gpu/target.hpp>
+#include <migraphx/gpu/target.hpp>
-#include <migraph/gpu/lowering.hpp>
+#include <migraphx/gpu/lowering.hpp>
-#include <migraph/memory_coloring.hpp>
+#include <migraphx/memory_coloring.hpp>
-#include <migraph/gpu/write_literals.hpp>
+#include <migraphx/gpu/write_literals.hpp>
-#include <migraph/gpu/context.hpp>
+#include <migraphx/gpu/context.hpp>
-#include <migraph/gpu/eliminate_workspace.hpp>
+#include <migraphx/gpu/eliminate_workspace.hpp>
-#include <migraph/eliminate_allocation.hpp>
+#include <migraphx/eliminate_allocation.hpp>
-#include <migraph/gpu/fuse_ops.hpp>
+#include <migraphx/gpu/fuse_ops.hpp>
-#include <migraph/check_context.hpp>
+#include <migraphx/check_context.hpp>
-#include <migraph/auto_contiguous.hpp>
+#include <migraphx/auto_contiguous.hpp>
-#include <migraph/dead_code_elimination.hpp>
+#include <migraphx/dead_code_elimination.hpp>
-#include <migraph/simplify_reshapes.hpp>
+#include <migraphx/simplify_reshapes.hpp>
-#include <migraph/simplify_algebra.hpp>
+#include <migraphx/simplify_algebra.hpp>
-#include <migraph/constant_propagate.hpp>
+#include <migraphx/constant_propagate.hpp>
-#include <migraph/eliminate_contiguous.hpp>
+#include <migraphx/eliminate_contiguous.hpp>
-#include <migraph/common_subexpression_elimination.hpp>
+#include <migraphx/common_subexpression_elimination.hpp>
-#include <migraph/fwd_conv_batchnorm_rewrite.hpp>
+#include <migraphx/fwd_conv_batchnorm_rewrite.hpp>
-#include <migraph/eliminate_concat.hpp>
+#include <migraphx/rewrite_rnn.hpp>
-#include <migraph/gpu/concat_gpu_opt.hpp>
+#include <migraphx/eliminate_concat.hpp>
+#include <migraphx/gpu/concat_gpu_opt.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
-std::vector<pass> target::get_passes(migraph::context& gctx) const
+std::vector<pass> target::get_passes(migraphx::context& gctx) const
 {
    auto& ctx = any_cast<context>(gctx);
    // clang-format off
@@ -31,14 +32,16 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
        dead_code_elimination{},
        fwd_conv_batchnorm_rewrite{},
        dead_code_elimination{},
-        common_subexpression_elimination{},
+        rewrite_rnn{},
        dead_code_elimination{},
+        //common_subexpression_elimination{},
+        //dead_code_elimination{},
        simplify_algebra{},
        dead_code_elimination{},
        constant_propagate{},
        dead_code_elimination{},
        auto_contiguous{},
-        simplify_reshapes{},
+        //simplify_reshapes{},
        dead_code_elimination{},
        lowering{ctx},
        eliminate_concat{concat_gpu_optimization{}},
@@ -59,7 +62,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
 std::string target::name() const { return "miopen"; }
-migraph::context target::get_context() const { return context{}; }
+migraphx::context target::get_context() const { return context{}; }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/write_literals.cpp
+++ b/src/targets/gpu/write_literals.cpp
-#include <migraph/gpu/write_literals.hpp>
+#include <migraphx/gpu/write_literals.hpp>
-#include <migraph/iterator_for.hpp>
+#include <migraphx/iterator_for.hpp>
-#include <migraph/gpu/hip.hpp>
+#include <migraphx/gpu/hip.hpp>
-#include <migraph/instruction.hpp>
+#include <migraphx/instruction.hpp>
-#include <migraph/env.hpp>
+#include <migraphx/env.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
-MIGRAPH_DECLARE_ENV_VAR(MIGRAPH_COPY_LITERALS)
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_COPY_LITERALS)
 struct hip_load_literal
 {
@@ -33,7 +33,7 @@ void write_literals::apply(program& p) const
    {
        if(ins->name() == "@literal")
        {
-            if(enabled(MIGRAPH_COPY_LITERALS{}))
+            if(enabled(MIGRAPHX_COPY_LITERALS{}))
            {
                literal l  = ins->get_literal();
                auto pre   = p.add_literal(l);
@@ -52,5 +52,5 @@ void write_literals::apply(program& p) const
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -10,12 +10,12 @@ set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
 add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR} --timeout 1500)
 add_custom_target(tests)
-find_program(MIGRAPH_GDB gdb)
+find_program(MIGRAPHX_GDB gdb)
-if(MIGRAPH_GDB)
+if(MIGRAPHX_GDB)
-    set(MIGRAPH_TEST_GDB On CACHE BOOL "")
+    set(MIGRAPHX_TEST_GDB On CACHE BOOL "")
 else()
-    set(MIGRAPH_TEST_GDB Off CACHE BOOL "")
+    set(MIGRAPHX_TEST_GDB Off CACHE BOOL "")
 endif()
 set(SKIP_TESTS)
@@ -34,8 +34,8 @@ function(add_test_command NAME EXE)
                    %1 ${ARGN}")
        add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
    else()
-        if(MIGRAPH_TEST_GDB)
+        if(MIGRAPHX_TEST_GDB)
-            # add_test(NAME ${NAME} COMMAND ${MIGRAPH_GDB} 
+            # add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB} 
            #     --batch
            #     --return-child-result
            #     -ex "set disable-randomization off"
@@ -54,7 +54,7 @@ function(add_test_command NAME EXE)
                    if(EXISTS ${TEST_DIR}/core)
                        set(\$ENV{UBSAN_OPTIONS} print_stacktrace=1)
                        set(\$ENV{ASAN_OPTIONS} print_stacktrace=1)
-                        execute_process(COMMAND ${MIGRAPH_GDB} $<TARGET_FILE:${EXE}> ${TEST_DIR}/core -batch -ex bt)
+                        execute_process(COMMAND ${MIGRAPHX_GDB} $<TARGET_FILE:${EXE}> ${TEST_DIR}/core -batch -ex bt)
                    endif()
                    message(FATAL_ERROR \"Test failed\")
                endif()
@@ -83,7 +83,7 @@ function(add_test_executable TEST_NAME)
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
    set_tests_properties(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-    target_link_libraries(${TEST_NAME} migraph migraph_cpu migraph_onnx)
+    target_link_libraries(${TEST_NAME} migraphx migraphx_cpu migraphx_onnx)
    target_include_directories(${TEST_NAME} PUBLIC include)
 endfunction(add_test_executable)
@@ -95,7 +95,7 @@ foreach(TEST ${TESTS})
    rocm_clang_tidy_check(test_${BASE_NAME})
 endforeach()
-if(MIGRAPH_ENABLE_GPU)
+if(MIGRAPHX_ENABLE_GPU)
    # gpu tests
    file(GLOB GPU_TESTS gpu/*.cpp)
@@ -107,19 +107,24 @@ if(MIGRAPH_ENABLE_GPU)
            COST 10 
            RESOURCE_LOCK gpu
        )
-        target_link_libraries(test_gpu_${BASE_NAME} migraph_gpu)
+        target_link_libraries(test_gpu_${BASE_NAME} migraphx_gpu)
    endforeach()
 endif()
 # Onnx test
-add_executable(test_onnx onnx/onnx_test.cpp)
+set(TEST_ONNX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
+add_executable(test_onnx ${TEST_ONNX_DIR}/onnx_test.cpp)
 rocm_clang_tidy_check(test_onnx)
-target_link_libraries(test_onnx migraph_onnx)
+target_link_libraries(test_onnx migraphx_onnx)
 target_include_directories(test_onnx PUBLIC include)
 add_test(NAME test_onnx COMMAND $<TARGET_FILE:test_onnx> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/onnx) 
 add_dependencies(tests test_onnx)
 add_dependencies(check test_onnx)
+if(MIGRAPHX_ENABLE_PYTHON)
+add_subdirectory(py)
+endif()
 function(test_header NAME HEADER)
    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp 
@@ -142,14 +147,14 @@ function(test_headers PREFIX)
        string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME)
        get_filename_component(BASE_NAME ${HEADER} NAME_WE)
        test_header(header_${TEST_NAME} ${PREFIX}/${BASE_NAME}.hpp)
-        if(MIGRAPH_ENABLE_GPU)
+        if(MIGRAPHX_ENABLE_GPU)
-            target_link_libraries(header_${TEST_NAME} migraph_gpu)
+            target_link_libraries(header_${TEST_NAME} migraphx_gpu)
        endif()
    endforeach()
 endfunction()
-test_headers(migraph ${CMAKE_SOURCE_DIR}/src/include/migraph/*.hpp)
+test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
-test_headers(migraph/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraph/cpu/*.hpp)
+test_headers(migraphx/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp)
-if(MIGRAPH_ENABLE_GPU)
+if(MIGRAPHX_ENABLE_GPU)
-test_headers(migraph/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraph/gpu/*.hpp)
+test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
 endif()