Merge branch 'develop' into mem_color_ordering_fix

a5c1c7f6 · Paul Fultz II · GitHub · 462a4920 · d516b099 · a5c1c7f6
Unverified Commit a5c1c7f6 authored Feb 10, 2019 by Paul Fultz II Committed by GitHub Feb 10, 2019
20 changed files
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -6,7 +6,7 @@
 #include <migraphx/instruction.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct fusion
@@ -38,7 +38,7 @@ struct fusion
        op_t result;
        auto status = miopenFusionPlanGetOp(fp.get(), i, &result);
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Failed retrieving operator at " + std::to_string(i));
+            MIGRAPHX_THROW("Failed retrieving operator at " + std::to_string(i));
        return result;
    }

@@ -51,7 +51,7 @@ struct fusion
        auto t      = keep_alive(make_tensor(b));
        auto status = miopenCreateOpBiasForward(fp.get(), &result, t.get());
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Creating operator failed");
+            MIGRAPHX_THROW("Creating operator failed");
        return result;
    }

@@ -60,7 +60,7 @@ struct fusion
        op_t result;
        auto status = miopenCreateOpActivationForward(fp.get(), &result, miopenActivationRELU);
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Creating operator failed");
+            MIGRAPHX_THROW("Creating operator failed");
        return result;
    }

@@ -71,7 +71,7 @@ struct fusion
        auto t      = keep_alive(make_tensor(weights));
        auto status = miopenCreateOpConvForward(fp.get(), &result, cd.get(), t.get());
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Creating operator failed");
+            MIGRAPHX_THROW("Creating operator failed");
        return result;
    }

@@ -91,7 +91,7 @@ struct fusion
    {
        auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get());
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Compiling fusion plan failed");
+            MIGRAPHX_THROW("Compiling fusion plan failed");
    }

    argument execute(context& ctx,
@@ -109,12 +109,12 @@ struct fusion
                                              y.implicit(),
                                              fargs.get());
        if(status != miopenStatusSuccess)
-            MIGRAPH_THROW("Failed to execute fusion plan");
+            MIGRAPHX_THROW("Failed to execute fusion plan");
        return y;
    }
 };

-MIGRAPH_PRED_MATCHER(bias_shape, instruction_ref ins)
+MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
 {
    auto&& s = ins->get_shape();
    return s.broadcasted() and s.strides().size() == 4 and s.strides()[0] == 0 and
@@ -128,7 +128,7 @@ std::array<T, sizeof...(Ts) + 1> make_array(T x, Ts... xs)
    return {std::move(x), std::move(static_cast<T>(xs))...};
 }

-MIGRAPH_PRED_MATCHER(fusable_conv, instruction_ref ins)
+MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
 {
    if(ins->name() != "gpu::convolution")
        return false;
@@ -137,6 +137,8 @@ MIGRAPH_PRED_MATCHER(fusable_conv, instruction_ref ins)
    auto wei = ins->inputs().at(1)->get_shape();
    assert(wei.lens().size() == 4);
    auto conv = any_cast<miopen_convolution>(ins->get_operator());
+    if(conv.op.group > 1)
+        return false;
    if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd)
        return false;
    auto op = conv.op;
@@ -265,17 +267,15 @@ struct miopen_conv_bias
    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
    {
        auto fargs  = make_fused_args();
-        float alpha = 1, beta = 0;
+        float alpha = 1;
+        float beta  = 0;
        miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
        miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
        return f.execute(ctx, fargs, args[0], args[4]);
    }

-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -308,18 +308,15 @@ struct miopen_conv_bias_relu
    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
    {
        auto fargs  = make_fused_args();
-        float alpha = 1, beta = 0;
+        float alpha = 1;
+        float beta  = 0;
        miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit());
        miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit());
        miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0);
        return f.execute(ctx, fargs, args[0], args[4]);
    }
-
-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -346,8 +343,8 @@ void apply_conv_bias(context& ctx, program& p, match::matcher_result r)

    Op cb{conv_op, input_ins->get_shape(), weights_ins->get_shape(), bias_ins->get_shape()};
    // TODO: Insert ws allocation
-    auto ws = cb.compile(ctx);
-
+    auto ws = cb.get_workspace(ctx);
+    (void)ws;
    p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
 }

@@ -389,5 +386,5 @@ void fuse_ops::apply(program& p) const
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/gather.cpp
+++ b/src/targets/gpu/gather.cpp
+#include <migraphx/gpu/gather.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/device/concat.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape hip_gather::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+
+argument hip_gather::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    return device::gather(ctx.get_stream().get(), output_shape, args, op.axis);
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/gemm.cpp
+++ b/src/targets/gpu/gemm.cpp
@@ -5,7 +5,7 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 template <class... Ts>
@@ -29,7 +29,7 @@ void generic_rocblas_gemm(shape::as<half>, Ts&&... xs)
 template <class T, class... Ts>
 void generic_rocblas_gemm(shape::as<T>, Ts&&...)
 {
-    MIGRAPH_THROW("Type unsupported by rocblas");
+    MIGRAPHX_THROW("Type unsupported by rocblas");
 }

 template <class T>
@@ -107,9 +107,10 @@ argument miopen_gemm::compute(context& ctx,
                             ldc);

    });
+
    return args[2];
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -7,32 +7,33 @@
 #include <vector>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
+using hip_ptr = MIGRAPHX_MANAGE_PTR(void, hipFree);

 std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }

 std::size_t get_available_gpu_memory()
 {
-    size_t free, total;
+    size_t free;
+    size_t total;
    auto status = hipMemGetInfo(&free, &total);
    if(status != hipSuccess)
-        MIGRAPH_THROW("Failed getting available memory: " + hip_error(status));
+        MIGRAPHX_THROW("Failed getting available memory: " + hip_error(status));
    return free;
 }

 hip_ptr allocate_gpu(std::size_t sz, bool host = false)
 {
    if(sz > get_available_gpu_memory())
-        MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
+        MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
    void* result;
    auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
    if(status != hipSuccess)
    {
        if(host)
-            MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
+            MIGRAPHX_THROW("Gpu allocation failed: " + hip_error(status));
        else
            allocate_gpu(sz, true);
    }
@@ -45,7 +46,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
    std::vector<T> result(sz);
    auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
    if(status != hipSuccess)
-        MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
+        MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
    return result;
 }

@@ -54,7 +55,7 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
    auto result = allocate_gpu(sz, host);
    auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
    if(status != hipSuccess)
-        MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
+        MIGRAPHX_THROW("Copy to gpu failed: " + hip_error(status));
    return result;
 }

@@ -72,13 +73,13 @@ argument allocate_gpu(const shape& s, bool host)
    return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
 }

-argument to_gpu(argument arg, bool host)
+argument to_gpu(const argument& arg, bool host)
 {
    auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes(), host));
    return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
 }

-argument from_gpu(argument arg)
+argument from_gpu(const argument& arg)
 {
    argument result;
    arg.visit([&](auto x) {
@@ -93,22 +94,22 @@ void set_device(std::size_t id)
 {
    auto status = hipSetDevice(id);
    if(status != hipSuccess)
-        MIGRAPH_THROW("Error setting device");
+        MIGRAPHX_THROW("Error setting device");
 }

 void gpu_sync() { hipDeviceSynchronize(); }

-void copy_to_gpu(argument src, argument dst)
+void copy_to_gpu(const argument& src, const argument& dst)
 {
    std::size_t src_size = src.get_shape().bytes();
    std::size_t dst_size = dst.get_shape().bytes();
    if(src_size > dst_size)
-        MIGRAPH_THROW("Not enough memory available in destination to do copy");
+        MIGRAPHX_THROW("Not enough memory available in destination to do copy");
    auto status = hipMemcpy(dst.data(), src.data(), src_size, hipMemcpyHostToDevice);
    if(status != hipSuccess)
-        MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
+        MIGRAPHX_THROW("Copy to gpu failed: " + hip_error(status));
 }

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/abs.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/abs.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ABS_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct miopen_abs
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "gpu::abs"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/acos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/acos.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/acos.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_acos : unary_device<hip_acos, device::acos>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/add.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/add.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_ADD_HPP
-#define MIGRAPH_GUARD_RTGLIB_ADD_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ADD_HPP

 #include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
 #include <migraphx/manage_ptr.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/operators.hpp>
@@ -19,28 +20,15 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-struct hip_add
+struct hip_add : binary_device<hip_add, device::add>
 {
-    std::string name() const { return "gpu::add"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument compute(context&, const shape&, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
-};
-
-struct miopen_add
-{
-    std::string name() const { return "gpu::add"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/asin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/asin.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/asin.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_asin : unary_device<hip_asin, device::asin>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/atan.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/atan.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/atan.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_atan : unary_device<hip_atan, device::atan>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_BATCHNORM_HPP
-#define MIGRAPH_GUARD_RTGLIB_BATCHNORM_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP
+#define MIGRAPHX_GUARD_RTGLIB_BATCHNORM_HPP

 #include <migraphx/gpu/lowering.hpp>
 #include <migraphx/manage_ptr.hpp>
@@ -19,7 +19,7 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_batch_norm_inference
@@ -33,7 +33,7 @@ struct miopen_batch_norm_inference
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/concat.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/concat.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_CONCAT_HPP
-#define MIGRAPH_GUARD_RTGLIB_CONCAT_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONCAT_HPP

 #include <migraphx/gpu/lowering.hpp>
 #include <migraphx/manage_ptr.hpp>
@@ -19,7 +19,7 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct hip_concat
@@ -34,7 +34,7 @@ struct hip_concat
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
-#define MIGRAPH_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP

 #include <migraphx/gpu/concat.hpp>


--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
-#define MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP

 #include <migraphx/gpu/miopen.hpp>
 #include <migraphx/gpu/rocblas.hpp>
@@ -8,10 +8,10 @@
 #include <migraphx/config.hpp>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-MIGRAPH_DECLARE_ENV_VAR(MIGRAPH_DISABLE_NULL_STREAM)
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_NULL_STREAM)

 struct hip_device
 {
@@ -21,7 +21,7 @@ struct hip_device

    struct stream
    {
-        using hip_stream_ptr = MIGRAPH_MANAGE_PTR(hipStream_t, hipStreamDestroy);
+        using hip_stream_ptr = MIGRAPHX_MANAGE_PTR(hipStream_t, hipStreamDestroy);

        stream() {}

@@ -34,13 +34,13 @@ struct hip_device
            hipStream_t result = nullptr;
            auto status        = hipStreamCreate(&result);
            if(status != hipSuccess)
-                MIGRAPH_THROW("Failed to allocate stream");
+                MIGRAPHX_THROW("Failed to allocate stream");
            return hip_stream_ptr{result};
        }

        hipStream_t get()
        {
-            if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
+            if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
            {
                setup();
                if(s == nullptr)
@@ -53,7 +53,7 @@ struct hip_device

        auto create_miopen_handle()
        {
-            if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
+            if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
                return make_obj<miopen_handle>(&miopenCreateWithStream, get());
            else
                return make_obj<miopen_handle>(&miopenCreate);
@@ -116,7 +116,7 @@ struct context
    std::shared_ptr<hip_device> current_device;
 };
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_CONTIGUOUS_HPP
-#define MIGRAPH_GUARD_RTGLIB_CONTIGUOUS_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP

 #include <migraphx/gpu/lowering.hpp>
 #include <migraphx/manage_ptr.hpp>
@@ -19,7 +19,7 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_contiguous
@@ -32,7 +32,7 @@ struct miopen_contiguous
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_CONVOLUTION_HPP
-#define MIGRAPH_GUARD_RTGLIB_CONVOLUTION_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP

 #include <migraphx/gpu/lowering.hpp>
 #include <migraphx/manage_ptr.hpp>
@@ -19,7 +19,7 @@
 #include <utility>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 struct miopen_convolution
@@ -27,6 +27,7 @@ struct miopen_convolution
    op::convolution op;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
+    miopenHandle_t handle = nullptr;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -39,12 +40,13 @@ struct miopen_convolution
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    shape compile(context& ctx, const shape& output_shape, std::vector<instruction_ref> inputs);
+    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/cos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/cos.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_COS_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/cos.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_cos : unary_device<hip_cos, device::cos>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/cosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/cosh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_COSH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_COSH_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/cosh.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_cosh : unary_device<hip_cosh, device::cosh>
+{
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/acos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/acos.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOS_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void acos(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/add.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/add.hpp

-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP

 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -21,7 +21,7 @@ void add(hipStream_t stream,

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif
--- a/src/targets/gpu/include/migraphx/gpu/device/add_relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/add_relu.hpp

-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP

 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

@@ -24,7 +24,7 @@ void add_relu(hipStream_t stream,

 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

 #endif