Merge branch 'develop' into simplify_1_mul_div_ops

66483df6 · Chris Austen · GitHub · 9310bff0 · 40118191 · 9310bff0
Unverified Commit 66483df6 authored Sep 27, 2022 by Chris Austen Committed by GitHub Sep 27, 2022
20 changed files
--- a/src/targets/gpu/device/tanh.cpp
+++ b/src/targets/gpu/device/tanh.cpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <migraphx/gpu/device/tanh.hpp>
-#include <migraphx/gpu/device/nary.hpp>
-#include <migraphx/gpu/device/types.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void tanh(hipStream_t stream, const argument& result, const argument& arg)
-{
-    nary(stream, result, arg)([](auto x) __device__ { return ::tanh(to_hip_type(x)); });
-}
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
--- a/src/targets/gpu/device/unary_not.cpp
+++ b/src/targets/gpu/device/unary_not.cpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <migraphx/gpu/device/unary_not.hpp>
-#include <migraphx/gpu/device/nary.hpp>
-#include <migraphx/type_traits.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void unary_not(hipStream_t stream, const argument& result, const argument& arg)
-{
-    nary(stream, result, arg)([](auto x) __device__ { return not x; });
-}
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
--- a/src/targets/gpu/device/where.cpp
+++ b/src/targets/gpu/device/where.cpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <migraphx/gpu/device/where.hpp>
-#include <migraphx/gpu/device/tensor.hpp>
-#include <migraphx/gpu/device/types.hpp>
-#include <migraphx/gpu/device/launch.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-template <class Shape>
-constexpr auto get_rank(const Shape&)
-{
-    return decltype(typename Shape::hip_index{}.size()){};
-}
-
-void where(hipStream_t stream,
-           const argument& result,
-           const argument& arg0,
-           const argument& arg1,
-           const argument& arg2)
-{
-    hip_visit_all(result, arg1, arg2)([&](auto output, auto x, auto y) {
-        hip_visit_all(arg0)([&](auto cond) {
-            if constexpr(get_rank(cond.get_shape()) == get_rank(output.get_shape()))
-            {
-                gs_launch(stream, arg1.get_shape().elements())([=](auto idx) __device__ {
-                    auto i    = output.get_shape().multi(idx);
-                    output[i] = cond[i] ? x[i] : y[i];
-                });
-            }
-        });
-    });
-}
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -61,13 +61,25 @@ struct mlir_conv
 MIGRAPHX_REGISTER_OP(mlir_conv);

 namespace {
+
+MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
+{
+    if(ins->name() != "convolution")
+        return false;
+    value v    = ins->get_operator().to_value();
+    auto group = v.at("group").to<int>();
+    if(group != 1)
+        return false;
+    return true;
+}
+
 struct find_conv_pointwise
 {
    // Find a convolution followed by a pointwise operation.
    auto matcher() const
    {
        auto convolution =
-            match::skip(match::name("contiguous"))(match::name("convolution").bind("convolution"));
+            match::skip(match::name("contiguous"))(is_mlir_conv().bind("convolution"));
        return match::name("pointwise")(match::any_of[match::inputs()](convolution.bind("x")));
    }


--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -29,21 +29,7 @@
 #include <migraphx/gpu/convolution.hpp>
 #include <migraphx/gpu/device_name.hpp>
 #include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/add.hpp>
-#include <migraphx/gpu/mul.hpp>
 #include <migraphx/gpu/gemm.hpp>
-#include <migraphx/gpu/device/layernorm.hpp>
-#include <migraphx/gpu/device/gelu.hpp>
-#include <migraphx/gpu/device/mul_add.hpp>
-#include <migraphx/gpu/device/add_clip.hpp>
-#include <migraphx/gpu/device/add_relu.hpp>
-#include <migraphx/gpu/device/add_sigmoid.hpp>
-#include <migraphx/gpu/device/add_tanh.hpp>
-#include <migraphx/gpu/device/mul_add_relu.hpp>
-#include <migraphx/gpu/device/add.hpp>
-#include <migraphx/match/layernorm.hpp>
-#include <migraphx/match/gelu_erf.hpp>
-#include <migraphx/match/gelu_tanh.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/register_op.hpp>
 #include <migraphx/array.hpp>
@@ -224,100 +210,6 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
           contains({{0, 0}, {1, 1}}, op.stride) and contains({{1, 1}}, op.dilation);
 }

-struct hip_triadd : ternary_device<hip_triadd, &device::add>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_triadd)
-
-struct hip_triadd_clip : quinary_device<hip_triadd_clip, &device::add_clip>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_triadd_clip)
-
-struct hip_add_clip : quaternary_device<hip_add_clip, &device::add_clip>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_clip)
-
-struct hip_triadd_relu : ternary_device<hip_triadd_relu, &device::add_relu>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_triadd_relu)
-
-struct hip_triadd_sigmoid : ternary_device<hip_triadd_sigmoid, &device::add_sigmoid>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_triadd_sigmoid)
-
-struct hip_triadd_tanh : ternary_device<hip_triadd_tanh, &device::add_tanh>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_triadd_tanh)
-
-struct hip_add_relu : binary_device<hip_add_relu, &device::add_relu>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_relu)
-
-struct hip_add_sigmoid : binary_device<hip_add_relu, &device::add_sigmoid>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_sigmoid)
-
-struct hip_add_tanh : binary_device<hip_add_tanh, &device::add_tanh>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_tanh)
-
-struct hip_layernorm : unary_device<hip_layernorm, &device::layernorm>
-{
-    // Empty finalize to skip dimension reduction
-    void finalize(context&, const shape&, const std::vector<shape>&) {}
-};
-MIGRAPHX_REGISTER_OP(hip_layernorm)
-
-struct hip_triadd_layernorm : ternary_device<hip_triadd_layernorm, &device::triadd_layernorm>
-{
-    shape compute_shape(const std::vector<shape>& inputs) const
-    {
-        check_shapes{inputs, *this}.has(4).standard();
-        return inputs[0];
-    }
-    // Empty finalize to skip dimension reduction
-    void finalize(context&, const shape&, const std::vector<shape>&) {}
-};
-MIGRAPHX_REGISTER_OP(hip_triadd_layernorm)
-
-struct hip_gelu : unary_device<hip_gelu, &device::gelu>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_gelu)
-
-struct hip_add_gelu : binary_device<hip_add_gelu, &device::add_gelu>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_gelu)
-
-struct hip_gelu_new : unary_device<hip_gelu_new, &device::gelu_new>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_gelu_new)
-
-struct hip_add_gelu_new : binary_device<hip_add_gelu_new, &device::add_gelu_new>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_add_gelu_new)
-
-struct hip_mul_add : ternary_device<hip_mul_add, &device::mul_add>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_mul_add)
-
-struct hip_mul_add_relu : ternary_device<hip_mul_add_relu, &device::mul_add_relu>
-{
-};
-MIGRAPHX_REGISTER_OP(hip_mul_add_relu)
-
 void move_broadcasted_back(std::vector<instruction_ref>& args)
 {
    // Ensure the last arguments is the broadcasted one
@@ -341,256 +233,6 @@ void move_standard_front(std::vector<instruction_ref>& args)
 auto gpu_name(const std::string& s) { return match::name("gpu::" + s); }

 namespace {
-struct find_layernorm
-{
-    auto matcher() const { return match::layernorm(&gpu_name); }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto ins   = r.result;
-        auto x_ins = r.instructions["x"];
-        auto args  = ins->inputs();
-
-        // We dont fuse for non-standard layouts
-        if(not x_ins->get_shape().standard())
-            return;
-
-        auto relements = x_ins->get_shape().lens().back();
-
-        if(relements > 1024 or (relements % 4 != 0 and relements > 256))
-            return;
-
-        m.replace_instruction(ins, hip_layernorm{}, x_ins, args.back());
-    }
-};
-
-struct find_triadd_layernorm
-{
-    auto matcher() const
-    {
-        return match::name("gpu::layernorm")(match::arg(0)(match::name("gpu::triadd")(
-            match::used_once(), match::all_of[match::inputs()](match::standard_shape()))));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto ins    = r.result;
-        auto triadd = ins->inputs().front();
-        m.replace_instruction(ins, hip_triadd_layernorm{}, triadd->inputs());
-    }
-};
-
-struct find_gelu
-{
-    auto matcher() const { return match::gelu_erf(&gpu_name); }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto ins   = r.result;
-        auto x_ins = r.instructions["x"];
-        auto args  = ins->inputs();
-
-        m.replace_instruction(ins, hip_gelu{}, x_ins, args.back());
-    }
-};
-
-struct find_add_gelu
-{
-    auto matcher() const
-    {
-        return match::name("gpu::gelu")(match::arg(0)(match::name("gpu::add").bind("add")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto add_ins = r.instructions["add"];
-        auto ins     = r.result;
-        auto args    = add_ins->inputs();
-        move_standard_front(args);
-        move_broadcasted_back(args);
-
-        args.back() = ins->inputs().back();
-        m.replace_instruction(ins, hip_add_gelu{}, args);
-    }
-};
-
-struct find_gelu_new
-{
-    bool fast_math = true;
-
-    auto matcher() const { return match::gelu_tanh(&gpu_name); }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto ins   = r.result;
-        auto x_ins = r.instructions["x"];
-        auto args  = ins->inputs();
-
-        if(fast_math)
-            m.replace_instruction(ins, hip_gelu{}, x_ins, args.back());
-        else
-            m.replace_instruction(ins, hip_gelu_new{}, x_ins, args.back());
-    }
-};
-
-struct find_add_gelu_new
-{
-    auto matcher() const
-    {
-        return match::name("gpu::gelu_new")(match::arg(0)(match::name("gpu::add").bind("add")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto add_ins = r.instructions["add"];
-        auto ins     = r.result;
-        auto args    = add_ins->inputs();
-        move_standard_front(args);
-        move_broadcasted_back(args);
-
-        args.back() = ins->inputs().back();
-        m.replace_instruction(ins, hip_add_gelu_new{}, args);
-    }
-};
-
-struct find_add_clip
-{
-    auto matcher() const
-    {
-        return match::name(std::unordered_set<std::string>{"gpu::clip", "gpu::clipped_relu"})(
-            match::arg(0)(match::any_of(match::name("gpu::add"),
-                                        match::name("gpu::triadd"),
-                                        match::any_of[match::inputs()](match::standard_shape()))
-                              .bind("add")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto add_ins  = r.instructions["add"];
-        auto ins      = r.result;
-        auto ins_args = ins->inputs();
-        auto add_args = add_ins->inputs();
-        move_standard_front(add_args);
-        move_broadcasted_back(add_args);
-
-        // Use the allocation from the clip operator
-        add_args.pop_back();
-        add_args.insert(add_args.end(), std::next(ins_args.begin()), ins_args.end());
-        if(add_ins->name() == "gpu::add")
-            m.replace_instruction(ins, hip_add_clip{}, add_args);
-        else if(add_ins->name() == "gpu::triadd")
-            m.replace_instruction(ins, hip_triadd_clip{}, add_args);
-    }
-};
-
-struct find_add_unary
-{
-    std::string op_name;
-    operation binary_add_op;
-    operation ternary_add_op;
-    auto matcher() const
-    {
-        return match::name(op_name)(match::arg(0)(
-            match::used_once(),
-            match::any_of(match::name("gpu::add"),
-                          match::name("gpu::triadd"),
-                          match::any_of(match::name("@literal"),
-                                        match::any_of[match::inputs()](match::standard_shape())))
-                .bind("add")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto add_ins = r.instructions["add"];
-        auto ins     = r.result;
-        auto args    = add_ins->inputs();
-        move_standard_front(args);
-        move_broadcasted_back(args);
-
-        // Use the allocation from the relu operator
-        args.back() = ins->inputs().back();
-        if(add_ins->name() == "gpu::add")
-            m.replace_instruction(ins, binary_add_op, args);
-        else if(add_ins->name() == "gpu::triadd")
-            m.replace_instruction(ins, ternary_add_op, args);
-    }
-};
-
-struct find_triadd
-{
-    auto matcher() const
-    {
-        return match::name("gpu::add")(match::either_arg(0, 1)(
-            match::name("gpu::add")(match::used_once()).bind("add"),
-            match::any(match::any_of(match::name("@literal"),
-                                     match::any_of[match::inputs()](match::standard_shape())))
-                .bind("input")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto add_ins   = r.instructions["add"];
-        auto input_ins = r.instructions["input"];
-        auto ins       = r.result;
-        auto args      = add_ins->inputs();
-
-        auto is_broadcasted = [](auto arg) { return arg->get_shape().broadcasted(); };
-        if(std::count_if(args.begin(), args.end(), is_broadcasted) > 2)
-            return;
-        args.insert(args.begin(), input_ins);
-        move_standard_front(args);
-        move_broadcasted_back(args);
-
-        args.back() = ins->inputs().back();
-        m.replace_instruction(ins, hip_triadd{}, args);
-    }
-};
-
-struct find_mul_add
-{
-    auto matcher() const
-    {
-        return match::name("gpu::add")(match::either_arg(0, 1)(
-            match::name("gpu::mul")(match::used_once()).bind("mul"), match::any().bind("b")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto mul_ins = r.instructions["mul"];
-        auto b_ins   = r.instructions["b"];
-        auto ins     = r.result;
-        auto args    = mul_ins->inputs();
-        assert(mul_ins != b_ins);
-
-        move_standard_front(args);
-        move_broadcasted_back(args);
-        args.insert(std::prev(args.end()), b_ins);
-
-        args.back() = ins->inputs().back();
-        m.replace_instruction(ins, hip_mul_add{}, args);
-    }
-};
-
-struct find_mul_add_relu
-{
-    auto matcher() const
-    {
-        return match::name("gpu::relu")(
-            match::arg(0)(match::name("gpu::mul_add")(match::used_once()).bind("mul_add")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto mul_add_ins = r.instructions["mul_add"];
-        auto ins         = r.result;
-        auto args        = mul_add_ins->inputs();
-
-        // Use the allocation from the relu operator
-        args.back() = ins->inputs().back();
-        m.replace_instruction(ins, hip_mul_add_relu{}, args);
-    }
-};
-
 struct miopen_fusion
 {
    struct fuse_op_data
@@ -907,46 +549,6 @@ struct find_conv_pointwise
    }
 };

-struct find_gemm_add
-{
-    auto matcher() const
-    {
-        return match::name("gpu::add")(
-            match::all_of[match::inputs()](match::standard_shape()),
-            match::either_arg(0, 1)(match::used_once().bind("c"),
-                                    match::name("gpu::gemm")(match::nargs(3)).bind("gemm")));
-    }
-
-    void apply(module& m, const match::matcher_result& r) const
-    {
-        auto ins      = r.result;
-        auto gemm_ins = r.instructions["gemm"];
-        auto c_ins    = r.instructions["c"];
-
-        auto gemm = any_cast<rocblas_gemm<op::dot>>(gemm_ins->get_operator());
-
-        // Already fused gemm
-        if(not float_equal(gemm.beta, 0))
-            return;
-
-        auto inputs = gemm_ins->inputs();
-        inputs.pop_back();
-
-        auto copy_ins = c_ins;
-
-        // Insert copy
-        if(ins == m.end() or c_ins->outputs().size() > 1 or c_ins->inputs().empty())
-        {
-            copy_ins = m.insert_instruction(ins, hip_copy{}, c_ins, ins->inputs().back());
-        }
-        inputs.push_back(copy_ins);
-        inputs.push_back(copy_ins);
-
-        gemm.beta = 1;
-        m.replace_instruction(ins, gemm, inputs);
-    }
-};
-
 struct find_gemm_pointwise
 {
    auto matcher() const
@@ -1185,26 +787,11 @@ struct find_layernorm_pointwise

 void fuse_ops::apply(module& m) const
 {
-    match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
+    match::find_matches(m, find_contiguous_pointwise{});
    run_passes(m, {dead_code_elimination{}});
-    match::find_matches(m, find_triadd{});
-    match::find_matches(m,
-                        find_layernorm{},
-                        find_conv_pointwise{ctx},
-                        find_conv_bias_relu{ctx},
-                        find_conv_bias{ctx},
-                        find_add_gelu{},
-                        find_add_gelu_new{},
-                        find_mul_add{},
-                        find_mul_add_relu{},
-                        find_add_unary{"gpu::relu", hip_add_relu{}, hip_triadd_relu{}},
-                        find_add_unary{"gpu::sigmoid", hip_add_sigmoid{}, hip_triadd_sigmoid{}},
-                        find_add_unary{"gpu::tanh", hip_add_tanh{}, hip_triadd_tanh{}},
-                        find_add_clip{});
+    match::find_matches(m, find_conv_pointwise{ctx}, find_conv_bias_relu{ctx}, find_conv_bias{ctx});
    run_passes(m, {dead_code_elimination{}});
    match::find_matches(m,
-                        find_triadd_layernorm{},
-                        find_gemm_add{},
                        find_layernorm_pointwise{},
                        find_gemm_pointwise{},
                        find_contiguous_tranpose_gemm{},

--- a/src/targets/gpu/gemm_impl.cpp
+++ b/src/targets/gpu/gemm_impl.cpp
@@ -176,8 +176,13 @@ void gemm_impl(context& ctx,

        auto num_matrices = std::accumulate(
            out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
-        if(num_matrices == 1)
+        if(num_matrices == 1 or (num_matrices > 1 and get_batch_stride(args[1]) == 0))
        {
+            // If the batch dimension of B is broadcasted, then we can
+            // multiply m by the batch_size and use rocblas_gemm_ex
+            // instead of rocblas_gemm_strided_batched_ex.
+            m *= num_matrices;
+
            // the rocblas_gemm API handles inputs and output matrices as
            // column-major format. When doing a C = A * B, we actually do
            // C^T = (B^T) * (A^T). That is the reason we input args[1] as

--- a/src/targets/gpu/include/migraphx/gpu/acos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/acos.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/acos.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_acos : unary_device<hip_acos, device::acos>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/acosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/acosh.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ACOSH_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/acosh.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_acosh : unary_device<hip_acosh, device::acosh>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/add.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/add.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ADD_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/add.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_add : binary_device<hip_add, device::add>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/asin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/asin.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/asin.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_asin : unary_device<hip_asin, device::asin>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/asinh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/asinh.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ASINH_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/asinh.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_asinh : unary_device<hip_asinh, device::asinh>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/atan.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/atan.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ATAN_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/atan.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_atan : unary_device<hip_atan, device::atan>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/atanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/atanh.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
-#define MIGRAPHX_GUARD_RTGLIB_ATANH_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/atanh.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_atanh : unary_device<hip_atanh, device::atanh>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/ceil.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/ceil.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
-#define MIGRAPHX_GUARD_RTGLIB_CEIL_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/ceil.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_ceil : unary_device<hip_ceil, device::ceil>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
@@ -36,6 +36,9 @@ inline namespace MIGRAPHX_INLINE_NS {
 struct shape;

 namespace gpu {
+
+struct context;
+
 namespace gen {

 struct vectorize
@@ -43,6 +46,10 @@ struct vectorize
    std::size_t size = 1;
    std::size_t axis = 0;
    static vectorize elements(std::size_t axis, const std::vector<shape>& inputs);
+    static vectorize elements(context& ctx, std::size_t axis, const std::vector<shape>& inputs);
+    static vectorize elements(std::size_t axis,
+                              const std::vector<shape>& inputs,
+                              const std::vector<std::size_t>& sizes);
    std::string str() const;
 };
 struct preload

--- a/src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp
@@ -24,8 +24,9 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP

-#include <migraphx/gpu/concat.hpp>
+#include <migraphx/op/concat.hpp>
 #include <migraphx/operation.hpp>
+#include <migraphx/serialize.hpp>

 namespace migraphx {
 namespace gpu {
@@ -36,7 +37,8 @@ struct concat_gpu_optimization
    std::string allocate() const { return "hip::allocate"; }
    migraphx::op::concat get_concat(const migraphx::operation& op) const
    {
-        return migraphx::any_cast<migraphx::gpu::hip_concat>(op).op;
+        auto v = op.to_value();
+        return from_value<migraphx::op::concat>(v.at("op"));
    }
 };


--- a/src/targets/gpu/include/migraphx/gpu/cosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/cosh.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_COSH_HPP
-#define MIGRAPHX_GUARD_RTGLIB_COSH_HPP
-
-#include <migraphx/gpu/oper.hpp>
-#include <migraphx/gpu/device/cosh.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct hip_cosh : unary_device<hip_cosh, device::cosh>
-{
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/acos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/acos.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOS_HPP
-#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOS_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
-#include <hip/hip_runtime_api.h>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void acos(hipStream_t stream, const argument& result, const argument& arg);
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/acosh.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
-#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ACOSH_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
-#include <hip/hip_runtime_api.h>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void acosh(hipStream_t stream, const argument& result, const argument& arg);
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/add.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/add.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP
-#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
-#include <hip/hip_runtime_api.h>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void add(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
-
-void add(hipStream_t stream,
-         const argument& result,
-         const argument& arg1,
-         const argument& arg2,
-         const argument& arg3);
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif