manual merge

d2549384 · Khalique · 67048d04 · ab6cd9d3 · d2549384 · d2549384
Commit d2549384 authored Feb 01, 2019 by Khalique
20 changed files
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -10,20 +10,34 @@ if(NOT TARGET MIOpen)
    message(SEND_ERROR "Cant find miopen")
 endif()
-add_library(migraph_device
+add_library(migraphx_device
    device/add.cpp
+    device/max.cpp
+    device/min.cpp
+    device/exp.cpp
+    device/log.cpp
+    device/sin.cpp
+    device/cos.cpp
+    device/tan.cpp
+    device/sinh.cpp
+    device/cosh.cpp
+    device/asin.cpp
+    device/acos.cpp
+    device/atan.cpp
    device/add_relu.cpp
    device/contiguous.cpp
    device/mul.cpp
    device/concat.cpp
+    device/pad.cpp
+    device/gather.cpp
 )
-set_target_properties(migraph_device PROPERTIES EXPORT_NAME device)
+set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
-rocm_clang_tidy_check(migraph_device)
+rocm_clang_tidy_check(migraphx_device)
-target_link_libraries(migraph_device migraph hip::device)
+target_link_libraries(migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906)
-target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
+target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
-target_include_directories(migraph_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
+target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
-add_library(migraph_gpu
+add_library(migraphx_gpu
    eliminate_workspace.cpp
    fuse_ops.cpp
    hip.cpp
@@ -37,20 +51,24 @@ add_library(migraph_gpu
    concat.cpp
    relu.cpp
    leaky_relu.cpp
-    add.cpp
+    tanh.cpp
-    mul.cpp
    batchnorm.cpp
    write_literals.cpp
    rocblas.cpp
+    sigmoid.cpp
+    abs.cpp
+    elu.cpp
+    pad.cpp
+    gather.cpp
    lrn.cpp
 )
-set_target_properties(migraph_gpu PROPERTIES EXPORT_NAME gpu)
+set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
-rocm_clang_tidy_check(migraph_gpu)
+rocm_clang_tidy_check(migraphx_gpu)
-target_link_libraries(migraph_gpu PUBLIC migraph MIOpen roc::rocblas)
+target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
-target_link_libraries(migraph_gpu PRIVATE migraph_device)
+target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
 rocm_install_targets(
-  TARGETS migraph_gpu migraph_device
+  TARGETS migraphx_gpu migraphx_device
  INCLUDE
    ${CMAKE_CURRENT_SOURCE_DIR}/include
 )

--- a/src/targets/gpu/abs.cpp
+++ b/src/targets/gpu/abs.cpp
+#include <migraphx/gpu/abs.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    return inputs.at(1);
+}
+argument miopen_abs::compute(context& ctx,
+                             const shape& output_shape,
+                             const std::vector<argument>& args) const
+{
+    float alpha = 1;
+    float beta  = 0;
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    miopenActivationForward(ctx.get_stream().get_miopen(),
+                            ad.get(),
+                            &alpha,
+                            x_desc.get(),
+                            args[0].implicit(),
+                            &beta,
+                            y_desc.get(),
+                            args[1].implicit());
+    return args[1];
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/add.cpp
+++ b/src/targets/gpu/add.cpp
-#include <migraph/gpu/add.hpp>
-#include <migraph/operators.hpp>
-#include <migraph/manage_ptr.hpp>
-#include <migraph/config.hpp>
-#include <migraph/gpu/miopen.hpp>
-#include <utility>
-namespace migraph {
-inline namespace MIGRAPH_INLINE_NS {
-namespace gpu {
-shape hip_add::compute_shape(const std::vector<shape>& inputs) const
-{
-    // check_shapes{inputs, *this}.has(3).standard();
-    check_shapes{inputs, *this}.has(3);
-    return inputs.at(0);
-}
-argument hip_add::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-{
-    device::add(ctx.get_stream().get(), args[2], args[0], args[1]);
-    return args[2];
-}
-shape miopen_add::compute_shape(const std::vector<shape>& inputs) const
-{
-    check_shapes{inputs, *this}.has(3).not_broadcasted();
-    return inputs.at(0);
-}
-argument miopen_add::compute(context& ctx,
-                             const shape& output_shape,
-                             const std::vector<argument>& args) const
-{
-    float alpha = 1, beta = 0;
-    auto a_desc = make_tensor(args[0].get_shape());
-    auto b_desc = make_tensor(args[1].get_shape());
-    auto c_desc = make_tensor(output_shape);
-    miopenOpTensor(ctx.get_stream().get_miopen(),
-                   miopenTensorOpAdd,
-                   &alpha,
-                   a_desc.get(),
-                   args[0].implicit(),
-                   &alpha,
-                   b_desc.get(),
-                   args[1].implicit(),
-                   &beta,
-                   c_desc.get(),
-                   args[2].implicit());
-    return args[2];
-}
-} // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
-} // namespace migraph
--- a/src/targets/gpu/batchnorm.cpp
+++ b/src/targets/gpu/batchnorm.cpp
-#include <migraph/gpu/batchnorm.hpp>
+#include <migraphx/gpu/batchnorm.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const
@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
    auto y_desc  = make_tensor(output_shape);
    auto bn_desc = make_tensor(args[3].get_shape());
-    float alpha = 1.0, beta = 0.0f;
+    float alpha = 1.0;
+    float beta  = 0.0f;
    miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
                                             miopenBatchNormMode_t(op.bn_mode),
@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/concat.cpp
+++ b/src/targets/gpu/concat.cpp
-#include <migraph/gpu/concat.hpp>
+#include <migraphx/gpu/concat.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
-#include <migraph/gpu/device/concat.hpp>
+#include <migraphx/gpu/device/concat.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape hip_concat::compute_shape(std::vector<shape> inputs) const
@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/contiguous.cpp
+++ b/src/targets/gpu/contiguous.cpp
-#include <migraph/gpu/contiguous.hpp>
+#include <migraphx/gpu/contiguous.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const
@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
 }
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/convolution.cpp
+++ b/src/targets/gpu/convolution.cpp
-#include <migraph/gpu/convolution.hpp>
+#include <migraphx/gpu/convolution.hpp>
-#include <migraph/operators.hpp>
+#include <migraphx/operators.hpp>
-#include <migraph/manage_ptr.hpp>
+#include <migraphx/manage_ptr.hpp>
-#include <migraph/gpu/miopen.hpp>
+#include <migraphx/gpu/miopen.hpp>
 #include <utility>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
    auto w_desc = make_tensor(args[1].get_shape());
    auto y_desc = make_tensor(output_shape);
-    float alpha = 1, beta = 0;
+    float alpha = 1;
+    float beta  = 0;
    miopenConvolutionForward(ctx.get_stream().get_miopen(),
                             &alpha,
                             x_desc.get(),
@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
 shape miopen_convolution::compile(context& ctx,
                                  const shape& output_shape,
-                                  std::vector<instruction_ref> inputs)
+                                  std::vector<shape> inputs)
 {
    shape workspace_shape{};
-    auto x_desc = make_tensor(inputs[0]->get_shape());
+    auto x_desc = make_tensor(inputs[0]);
-    auto w_desc = make_tensor(inputs[1]->get_shape());
+    auto w_desc = make_tensor(inputs[1]);
    auto y_desc = make_tensor(output_shape);
    std::size_t workspace_size = 0;
@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
                                             &workspace_size);
    workspace_shape = shape{shape::int8_type, {workspace_size}};
-    auto x         = to_gpu(generate_argument(inputs[0]->get_shape()));
+    auto x         = to_gpu(generate_argument(inputs[0]));
-    auto w         = to_gpu(generate_argument(inputs[1]->get_shape()));
+    auto w         = to_gpu(generate_argument(inputs[1]));
    auto y         = allocate_gpu(output_shape);
    auto workspace = allocate_gpu(workspace_shape);
    int algo_count = 1;
    miopenConvAlgoPerf_t perf;
-    miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
+    auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
-                                          x_desc.get(),
+                                                        x_desc.get(),
-                                          x.implicit(),
+                                                        x.implicit(),
-                                          w_desc.get(),
+                                                        w_desc.get(),
-                                          w.implicit(),
+                                                        w.implicit(),
-                                          cd.get(),
+                                                        cd.get(),
-                                          y_desc.get(),
+                                                        y_desc.get(),
-                                          y.implicit(),
+                                                        y.implicit(),
-                                          1,
+                                                        1,
-                                          &algo_count,
+                                                        &algo_count,
-                                          &perf,
+                                                        &perf,
-                                          workspace.implicit(),
+                                                        workspace.implicit(),
-                                          workspace_size,
+                                                        workspace_size,
-                                          false);
+                                                        false);
-    algo = perf.fwd_algo;
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("Find convolution failed");
+    handle = ctx.get_stream().get_miopen();
+    algo   = perf.fwd_algo;
    return shape{shape::int8_type, {perf.memory}};
 }
+void miopen_convolution::finalize(context& ctx,
+                                  const shape& output_shape,
+                                  std::vector<shape> inputs)
+{
+    if(handle == ctx.get_stream().get_miopen())
+        return;
+    // TODO: Check that workspace hasn't changed
+    compile(ctx, output_shape, std::move(inputs));
+}
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/device/acos.cpp
+++ b/src/targets/gpu/device/acos.cpp
+#include <migraphx/gpu/device/acos.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void acos(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/add.cpp
+++ b/src/targets/gpu/device/add.cpp
-#include <migraph/gpu/device/add.hpp>
+#include <migraphx/gpu/device/add.hpp>
-#include <migraph/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/nary.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
@@ -22,5 +22,5 @@ void add(hipStream_t stream,
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/device/add_relu.cpp
+++ b/src/targets/gpu/device/add_relu.cpp
-#include <migraph/gpu/device/add_relu.hpp>
+#include <migraphx/gpu/device/add_relu.hpp>
-#include <migraph/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/nary.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/device/asin.cpp
+++ b/src/targets/gpu/device/asin.cpp
+#include <migraphx/gpu/device/asin.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void asin(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::asin(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/atan.cpp
+++ b/src/targets/gpu/device/atan.cpp
+#include <migraphx/gpu/device/atan.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void atan(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::atan(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/concat.cpp
+++ b/src/targets/gpu/device/concat.cpp
-#include <migraph/shape.hpp>
+#include <migraphx/shape.hpp>
-#include <migraph/argument.hpp>
+#include <migraphx/argument.hpp>
-#include <migraph/gpu/device/concat.hpp>
+#include <migraphx/gpu/device/concat.hpp>
-#include <migraph/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
-#include <migraph/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/launch.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
 argument concat(hipStream_t stream,
-                const migraph::shape& output_shape,
+                const migraphx::shape& output_shape,
-                std::vector<migraph::argument> args,
+                std::vector<migraphx::argument> args,
                std::vector<std::size_t> offsets)
 {
    for(std::size_t l = 0; l < args.size() - 1; l++)
@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/device/contiguous.cpp
+++ b/src/targets/gpu/device/contiguous.cpp
-#include <migraph/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
-#include <migraph/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/nary.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
--- a/src/targets/gpu/device/cos.cpp
+++ b/src/targets/gpu/device/cos.cpp
+#include <migraphx/gpu/device/cos.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void cos(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cos(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/cosh.cpp
+++ b/src/targets/gpu/device/cosh.cpp
+#include <migraphx/gpu/device/cosh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void cosh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::cosh(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/exp.cpp
+++ b/src/targets/gpu/device/exp.cpp
+#include <migraphx/gpu/device/exp.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void exp(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::exp(to_hip_type(x)); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/gather.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/hip.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+argument gather(hipStream_t stream,
+                const migraphx::shape& output_shape,
+                std::vector<migraphx::argument> args,
+                int axis)
+{
+    int axis_index = (axis < 0) ? (axis + output_shape.lens().size()) : axis;
+    visit_all(args.back(), args[0])([&](auto output, auto input) {
+        std::size_t nelements = output_shape.elements();
+        args[1].visit([&](auto indices) {
+            visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
+                const auto* indices_ptr = device_cast(indices.data());
+                auto* outptr            = device_cast(output.data());
+                const auto* inptr       = device_cast(input.data());
+                hip_tensor_descriptor<ndim> desc_input(input.get_shape());
+                hip_tensor_descriptor<ndim> desc_output(output.get_shape());
+                gs_launch(stream, nelements)([=](auto i) {
+                    auto lens        = desc_output.multi(i);
+                    lens[axis_index] = indices_ptr[lens[axis_index]];
+                    outptr[i]        = inptr[desc_input.linear(lens)];
+                });
+            });
+        });
+    });
+    return args.back();
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+++ b/src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
 #include <hip/hip_runtime.h>
-#include <migraph/config.hpp>
+#include <migraphx/config.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
 // Workaround hcc's broken tile_static macro
 #ifdef tile_static
 #undef tile_static
-#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
+#define MIGRAPHX_DEVICE_SHARED __attribute__((tile_static))
 #else
-#define MIGRAPH_DEVICE_SHARED __shared__
+#define MIGRAPHX_DEVICE_SHARED __shared__
 #endif
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
 #endif
--- a/src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+++ b/src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
-#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
-#define MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
-#include <migraph/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
-#include <migraph/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/launch.hpp>
-#include <migraph/gpu/device/types.hpp>
+#include <migraphx/gpu/device/types.hpp>
-#include <migraph/functional.hpp>
+#include <migraphx/functional.hpp>
-#include <migraph/ranges.hpp>
+#include <migraphx/ranges.hpp>
-#include <migraph/config.hpp>
+#include <migraphx/config.hpp>
-namespace migraph {
+namespace migraphx {
-inline namespace MIGRAPH_INLINE_NS {
+inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {
@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
        const std::size_t bdim_vec_len = bdim_len / vec_size;
        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
+            MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
            {
@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
        const std::size_t n       = output.size();
        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED type buffer[2048];
+            MIGRAPHX_DEVICE_SHARED type buffer[2048];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_len; i += nlocal)
            {
@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
        const std::size_t bdim_vec_len = bdim_len / vec_size;
        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
+            MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
            {
@@ -243,7 +243,7 @@ void binary_broadcast_impl(
        const std::size_t n       = output.size();
        launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
-            MIGRAPH_DEVICE_SHARED type buffer[2048];
+            MIGRAPHX_DEVICE_SHARED type buffer[2048];
            // Load bias into LDS
            for(size_t i = idx.local; i < bdim_len; i += nlocal)
            {
@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
        nary_nonstandard_impl(stream, f, result, args...);
 }
+template <class F>
+void nary_impl(hipStream_t stream, F f, argument result)
+{
+    nary_standard_impl(stream, f, result);
+}
 template <class... Arguments>
 auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
 {
@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,
 } // namespace device
 } // namespace gpu
-} // namespace MIGRAPH_INLINE_NS
+} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraph
+} // namespace migraphx
 #endif