Merge branch 'develop' into multinomial_parse_merge

264a7647 · Brian Pickrell · d99729f8 · 8e18544f · 264a7647 · 264a7647
Commit 264a7647 authored Jul 26, 2023 by Brian Pickrell
20 changed files
--- a/src/targets/gpu/include/migraphx/gpu/config.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/config.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_GPU_CONFIG_HPP
+#define MIGRAPHX_GUARD_GPU_CONFIG_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/export.h>
+
+#endif // MIGRAPHX_GUARD_GPU_CONFIG_HPP
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP

+#include <migraphx/gpu/export.h>
 #include <migraphx/context.hpp>
 #include <migraphx/gpu/miopen.hpp>
 #include <migraphx/gpu/rocblas.hpp>

--- a/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        if(inputs.front().standard())
-            return inputs.front();
        auto lens = inputs.at(0).lens();
        auto t    = inputs.at(0).type();
        return {t, lens};

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -31,7 +31,7 @@
 #include <migraphx/op/identity.hpp>
 #include <migraphx/op/convolution.hpp>
 #include <migraphx/op/quant_convolution.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <unordered_map>
 #include <migraphx/reflect.hpp>
 #include <migraphx/gpu/context.hpp>
@@ -146,7 +146,8 @@ struct miopen_convolution

    void set_conv_descriptor()
    {
-        cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op);
+        cd =
+            (op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op);
    }

    value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
@@ -159,10 +160,31 @@ struct miopen_convolution
    shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
    {
        shape workspace_shape{};
-        auto x_desc                = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
-        auto w_desc                = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
-        auto y_desc                = make_tensor(reshape_if_1d(output_shape));
+        auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
+        auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
+        auto y_desc = make_tensor(reshape_if_1d(output_shape));
+
+        auto* miopen_stream_handle = ctx.get_stream().get_miopen();
        std::size_t workspace_size = 0;
+        auto status                = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle,
+                                                               w_desc.get(),
+                                                               x_desc.get(),
+                                                               cd.get(),
+                                                               y_desc.get(),
+                                                               &workspace_size);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
+
+        workspace_shape = shape{shape::int8_type, {workspace_size}};
+
+        auto x_shape = inputs[0];
+        auto w_shape = inputs[1];
+        if(int8_x4_format)
+        {
+            x_shape = pack_int8_shape(x_shape);
+            w_shape = pack_int8_shape(w_shape);
+        }
+
 #ifdef MIGRAPHX_HAS_FIND_2_API
        {
            auto conv_problem = make_obj<miopen_problem>(
@@ -170,13 +192,34 @@ struct miopen_convolution

            set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
            set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
+            bool preallocate = false;
+#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
+            // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
+            preallocate = true;
+#endif
+            auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0];
+            auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1];
+            auto y = preallocate ? allocate_gpu(output_shape) : inputs[2];
+            auto workspace =
+                preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
+
            set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);

-            auto* miopen_stream_handle = ctx.get_stream().get_miopen();
+            const miopenTensorArgument_t tensor_args[3] = {
+                {miopenTensorConvolutionX, nullptr, x.implicit()},
+                {miopenTensorConvolutionW, nullptr, w.implicit()},
+                {miopenTensorConvolutionY, nullptr, y.implicit()},
+            };
+
+            solution_ptr = find_solution(miopen_stream_handle,
+                                         3,
+                                         tensor_args,
+                                         workspace.implicit(),
+                                         workspace_size,
+                                         conv_problem.get(),
+                                         ctx.get_exhaustive_tune_flag());

-            solution_ptr = find_solution(
-                miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
-            auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
+            status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
            if(status != miopenStatusSuccess)
                MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");

@@ -195,29 +238,10 @@ struct miopen_convolution
            return shape{shape::int8_type, {workspace_size}};
        }
 #else
-        auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
-                                                               w_desc.get(),
-                                                               x_desc.get(),
-                                                               cd.get(),
-                                                               y_desc.get(),
-                                                               &workspace_size);
-        if(status != miopenStatusSuccess)
-            MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
-
-        workspace_shape = shape{shape::int8_type, {workspace_size}};
-
-        auto x_shape = inputs[0];
-        auto w_shape = inputs[1];
-        if(int8_x4_format)
-        {
-            x_shape = pack_int8_shape(x_shape);
-            w_shape = pack_int8_shape(w_shape);
-        }
        auto x         = to_gpu(generate_argument(x_shape));
        auto w         = to_gpu(generate_argument(w_shape));
        auto y         = allocate_gpu(output_shape);
        auto workspace = allocate_gpu(workspace_shape);
-
        int algo_count = 1;
        miopenConvAlgoPerf_t perf;
        status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
@@ -337,6 +361,7 @@ struct miopen_convolution
        return {s.type(), lens, strides};
    }
 };
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void argmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
+void MIGRAPHX_DEVICE_EXPORT argmax(hipStream_t stream,
+                                   const argument& result,
+                                   const argument& arg,
+                                   int64_t axis);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void argmin(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
+void MIGRAPHX_DEVICE_EXPORT argmin(hipStream_t stream,
+                                   const argument& result,
+                                   const argument& arg,
+                                   int64_t axis);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/config.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/config.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/export.h>
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/contiguous.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_MIGRAPHLIB_KERNELS_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,9 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void contiguous(hipStream_t stream, const argument& result, const argument& arg);
+void MIGRAPHX_DEVICE_EXPORT contiguous(hipStream_t stream,
+                                       const argument& result,
+                                       const argument& arg);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/fill.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/fill.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void fill(hipStream_t stream, const argument& result, unsigned long val);
+void MIGRAPHX_DEVICE_EXPORT fill(hipStream_t stream, const argument& result, unsigned long val);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/gather.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/gather.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_GATHER_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument gather(hipStream_t stream, argument result, argument arg1, argument arg2, int64_t axis);
+argument MIGRAPHX_DEVICE_EXPORT
+gather(hipStream_t stream, argument result, argument arg1, argument arg2, int64_t axis);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,9 +33,13 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg);
+void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_a(hipStream_t stream,
+                                             const argument& result,
+                                             const argument& arg);

-void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument& arg);
+void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_b(hipStream_t stream,
+                                             const argument& result,
+                                             const argument& arg);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
+void MIGRAPHX_DEVICE_EXPORT logsoftmax(hipStream_t stream,
+                                       const argument& result,
+                                       const argument& arg,
+                                       int64_t axis);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/multinomial.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/multinomial.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,10 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void multinomial(hipStream_t stream,
-                 const argument& result,
-                 const argument& arg0,
-                 const argument& arg1);
+void MIGRAPHX_DEVICE_EXPORT multinomial(hipStream_t stream,
+                                        const argument& result,
+                                        const argument& arg0,
+                                        const argument& arg1);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/nonzero.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/nonzero.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,9 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data);
+argument MIGRAPHX_DEVICE_EXPORT nonzero(hipStream_t stream,
+                                        const argument& result,
+                                        const argument& arg_data);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/pad.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/pad.hpp
@@ -26,7 +26,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_PAD_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -34,11 +34,11 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument pad(hipStream_t stream,
-             argument result,
-             argument arg1,
-             float value,
-             std::vector<std::int64_t> pads);
+argument MIGRAPHX_DEVICE_EXPORT pad(hipStream_t stream,
+                                    argument result,
+                                    argument arg1,
+                                    float value,
+                                    std::vector<std::int64_t> pads);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,12 +33,12 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void prefix_scan_sum(hipStream_t stream,
-                     const argument& result,
-                     const argument& arg,
-                     int32_t axis,
-                     bool exclusive,
-                     bool reverse);
+void MIGRAPHX_DEVICE_EXPORT prefix_scan_sum(hipStream_t stream,
+                                            const argument& result,
+                                            const argument& arg,
+                                            int32_t axis,
+                                            bool exclusive,
+                                            bool reverse);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/reverse.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/reverse.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,8 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument
-reverse(hipStream_t stream, argument result, argument arg1, const std::vector<int64_t>& axes);
+argument MIGRAPHX_DEVICE_EXPORT reverse(hipStream_t stream,
+                                        argument result,
+                                        argument arg1,
+                                        const std::vector<int64_t>& axes);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/rnn_variable_seq_lens.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/rnn_variable_seq_lens.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_RNN_VARIABLE_SEQ_LENS_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,22 +33,22 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void rnn_var_sl_shift_sequence(hipStream_t stream,
-                               const argument& result,
-                               const argument& arg_hs,
-                               const argument& arg_sl);
+void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_sequence(hipStream_t stream,
+                                                      const argument& result,
+                                                      const argument& arg_hs,
+                                                      const argument& arg_sl);

-void rnn_var_sl_shift_output(hipStream_t stream,
-                             const argument& result,
-                             const argument& arg_hs,
-                             const argument& arg_sl,
-                             bool is_reverse);
+void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_output(hipStream_t stream,
+                                                    const argument& result,
+                                                    const argument& arg_hs,
+                                                    const argument& arg_sl,
+                                                    bool is_reverse);

-void rnn_var_sl_last_output(hipStream_t stream,
-                            const argument& result,
-                            const argument& arg_hs,
-                            const argument& arg_sl,
-                            bool is_reverse);
+void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_last_output(hipStream_t stream,
+                                                   const argument& result,
+                                                   const argument& arg_hs,
+                                                   const argument& arg_sl,
+                                                   bool is_reverse);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/scatter.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument scatter(
+argument MIGRAPHX_DEVICE_EXPORT scatter(
    hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis);

 } // namespace device

--- a/src/targets/gpu/include/migraphx/gpu/device/topk.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/topk.hpp
@@ -25,7 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP

 #include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/gpu/device/config.hpp>
 #include <hip/hip_runtime_api.h>

 namespace migraphx {
@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-argument topk_smallest(hipStream_t stream,
-                       const argument& val_res,
-                       const argument& ind_res,
-                       const argument& arg,
-                       int64_t k,
-                       int64_t axis);
+argument MIGRAPHX_DEVICE_EXPORT topk_smallest(hipStream_t stream,
+                                              const argument& val_res,
+                                              const argument& ind_res,
+                                              const argument& arg,
+                                              int64_t k,
+                                              int64_t axis);

-argument topk_largest(hipStream_t stream,
-                      const argument& val_res,
-                      const argument& ind_res,
-                      const argument& arg,
-                      int64_t k,
-                      int64_t axis);
+argument MIGRAPHX_DEVICE_EXPORT topk_largest(hipStream_t stream,
+                                             const argument& val_res,
+                                             const argument& ind_res,
+                                             const argument& arg,
+                                             int64_t k,
+                                             int64_t axis);

 } // namespace device
 } // namespace gpu