"sgl-kernel/vscode:/vscode.git/clone" did not exist on "81964328b7ed9f12bcc5ce171fe87795fd202de3"
Unverified Commit 40fbef9b authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into threaded_nms

parents d164b151 aeb9f78c
......@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
gpu_sync();
std::vector<T> result(sz);
assert(not is_device_ptr(result.data()));
assert(is_device_ptr(x));
if(not is_device_ptr(x))
{
MIGRAPHX_THROW(
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed\n");
}
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess)
MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/instruction_ref.hpp>
#include <string>
......@@ -33,7 +33,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct gpu_allocation_model
struct MIGRAPHX_GPU_EXPORT gpu_allocation_model
{
std::string name() const;
std::string copy() const;
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/analyze_streams.hpp>
namespace migraphx {
......@@ -34,7 +34,7 @@ struct module;
namespace gpu {
std::vector<stream_race> analyze_streams(const module& m);
MIGRAPHX_GPU_EXPORT std::vector<stream_race> analyze_streams(const module& m);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -24,9 +24,10 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/filesystem.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/env.hpp>
#include <migraphx/functional.hpp>
#include <string>
#include <utility>
......@@ -36,6 +37,11 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
#ifdef MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS);
#endif
struct hiprtc_src_file
{
hiprtc_src_file() = default;
......@@ -52,14 +58,13 @@ struct hiprtc_src_file
}
};
std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_src_file> srcs,
std::string params,
const std::string& arch);
MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>> compile_hip_src_with_hiprtc(
std::vector<hiprtc_src_file> srcs, std::string params, const std::string& arch);
std::vector<std::vector<char>>
MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch);
std::string enum_params(std::size_t count, std::string param);
MIGRAPHX_GPU_EXPORT std::string enum_params(std::size_t count, std::string param);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -24,8 +24,9 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/compile_src.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -39,9 +40,10 @@ struct hip_compile_options
std::size_t local;
std::vector<shape> inputs;
shape output;
std::string kernel_name = "kernel";
std::string params = "";
std::vector<shape> virtual_inputs = {};
std::string kernel_name = "kernel";
std::string params = "";
std::vector<shape> virtual_inputs = {};
std::vector<src_file> additional_src_files = {};
/**
* @brief Set the launch parameters but allow v to override the values
......@@ -64,14 +66,16 @@ struct hip_compile_options
};
/// Compute global for n elements, but max out on target-specific upper limit
std::function<std::size_t(std::size_t local)>
MIGRAPHX_GPU_EXPORT std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over = 1);
operation compile_hip_code_object(const std::string& content, hip_compile_options options);
MIGRAPHX_GPU_EXPORT operation compile_hip_code_object(const std::string& content,
hip_compile_options options);
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size = 1024);
MIGRAPHX_GPU_EXPORT std::size_t compute_block_size(std::size_t n,
std::size_t max_block_size = 1024);
std::string generate_make_shape(const shape& s);
MIGRAPHX_GPU_EXPORT std::string generate_make_shape(const shape& s);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <string>
namespace migraphx {
......@@ -36,9 +36,10 @@ namespace gpu {
struct context;
struct compile_ops
struct MIGRAPHX_GPU_EXPORT compile_ops
{
context* ctx = nullptr;
context* ctx = nullptr;
bool exhaustive_tune = false;
std::string name() const { return "gpu::compile_ops"; }
void apply(module& m) const;
};
......
......@@ -24,12 +24,15 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILER_HPP
#define MIGRAPHX_GUARD_GPU_COMPILER_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/auto_register.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
#include <migraphx/module.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <functional>
namespace migraphx {
......@@ -38,17 +41,57 @@ namespace gpu {
struct context;
using compiler_replace = std::function<void(module& m, instruction_ref ins)>;
using compiler_compile = std::function<compiler_replace(context&, instruction_ref, operation)>;
struct compiler_replace
{
compiler_replace() = default;
compiler_replace(const operation& op) : code_object{op} {}
template <class F>
compiler_replace(const operation& op, F f)
: code_object{op},
replace_fn([=](const compiler_replace& cr, module& m, instruction_ref ins) {
f(m, ins, cr.code_object);
})
{
}
operation code_object = {};
std::function<void(const compiler_replace& cr, module& m, instruction_ref ins)> replace_fn =
nullptr;
void replace(module& m, instruction_ref ins) const
{
if(replace_fn)
replace_fn(*this, m, ins);
else
m.replace_instruction(ins, code_object, ins->inputs());
}
};
using compiler_compile =
std::function<compiler_replace(context&, instruction_ref, operation, const value&)>;
using compiler_compile_op =
std::function<operation(context&, const std::vector<shape>& inputs, const value&)>;
using compiler_tuning_config =
std::function<optional<tuning_config>(context&, instruction_ref, const operation&, bool)>;
void register_compiler(const std::string& name, compiler_compile c, compiler_compile_op cop);
MIGRAPHX_GPU_EXPORT void register_compiler(const std::string& name,
compiler_compile c,
compiler_compile_op cop,
compiler_tuning_config ctg);
bool has_compiler_for(const std::string& name);
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op);
operation
compile_op(const std::string& name, context& ctx, const std::vector<shape>& inputs, const value& v);
MIGRAPHX_GPU_EXPORT bool has_compiler_for(const std::string& name);
MIGRAPHX_GPU_EXPORT compiler_replace compile(context& ctx,
instruction_ref ins,
const operation& op,
const value& solution);
MIGRAPHX_GPU_EXPORT operation compile_op(const std::string& name,
context& ctx,
const std::vector<shape>& inputs,
const value& v);
MIGRAPHX_GPU_EXPORT optional<tuning_config>
get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive);
template <class T>
void register_compiler()
......@@ -58,8 +101,11 @@ void register_compiler()
{
register_compiler(
name,
[=](auto&&... xs) { return c.compile(std::forward<decltype(xs)>(xs)...); },
[=](auto&&... xs) { return c.compile_op(std::forward<decltype(xs)>(xs)...); });
[=](auto&&... xs) {
return c.invoke_compile(rank<1>{}, std::forward<decltype(xs)>(xs)...);
},
[=](auto&&... xs) { return c.compile_op(std::forward<decltype(xs)>(xs)...); },
[=](auto&&... xs) { return c.get_tuning_config(std::forward<decltype(xs)>(xs)...); });
}
}
......@@ -78,12 +124,31 @@ using auto_register_compiler = auto_register<register_compiler_action, T>;
template <class Derived>
struct compiler : auto_register_compiler<Derived>
{
auto replace(const operation& op) const
const Derived& derived() const { return static_cast<const Derived&>(*this); }
optional<tuning_config>
get_tuning_config(context&, instruction_ref, const operation&, bool) const
{
return
[=](module& m, instruction_ref ins) { m.replace_instruction(ins, op, ins->inputs()); };
return nullopt;
}
operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; }
template <class D = Derived>
auto invoke_compile(
rank<1>, context& ctx, instruction_ref ins, operation op, const value& solution) const
-> decltype(std::declval<D>().compile(ctx, ins, std::move(op), solution))
{
return derived().compile(ctx, ins, std::move(op), solution);
}
template <class D = Derived>
auto invoke_compile(
rank<0>, context& ctx, instruction_ref ins, operation op, const value& solution) const
-> decltype(std::declval<D>().compile(ctx, ins, std::move(op)))
{
assert(solution.empty());
(void)solution;
return derived().compile(ctx, ins, std::move(op));
}
};
} // namespace gpu
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/export.h>
#endif // MIGRAPHX_GUARD_GPU_CONFIG_HPP
......@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/gpu/export.h>
#include <migraphx/context.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
......@@ -170,7 +171,9 @@ struct hip_device
std::size_t stream_id() const { return current_stream; }
std::string get_device_name() const { return device_props.gcnArchName; }
std::string get_device_name() const { return get_arch_name(device_props); }
std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); }
std::size_t get_device_major() const { return device_props.major; }
......
......@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2);
if(inputs.front().standard())
return inputs.front();
auto lens = inputs.at(0).lens();
auto t = inputs.at(0).type();
return {t, lens};
......
......@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/convolution_backwards.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
......@@ -146,7 +146,8 @@ struct miopen_convolution
void set_conv_descriptor()
{
cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op);
cd =
(op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op);
}
value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
......@@ -159,10 +160,31 @@ struct miopen_convolution
shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
std::size_t workspace_size = 0;
auto status = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle,
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto conv_problem = make_obj<miopen_problem>(
......@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
bool preallocate = false;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate = true;
#endif
auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0];
auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1];
auto y = preallocate ? allocate_gpu(output_shape) : inputs[2];
auto workspace =
preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, x.implicit()},
{miopenTensorConvolutionW, nullptr, w.implicit()},
{miopenTensorConvolutionY, nullptr, y.implicit()},
};
solution_ptr = find_solution(miopen_stream_handle,
3,
tensor_args,
workspace.implicit(),
workspace_size,
conv_problem.get(),
ctx.get_exhaustive_tune_flag());
solution_ptr = find_solution(
miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");
......@@ -195,29 +238,10 @@ struct miopen_convolution
return shape{shape::int8_type, {workspace_size}};
}
#else
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
auto x = to_gpu(generate_argument(x_shape));
auto w = to_gpu(generate_argument(w_shape));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
......@@ -337,6 +361,7 @@ struct miopen_convolution
return {s.type(), lens, strides};
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void argmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
void MIGRAPHX_DEVICE_EXPORT argmax(hipStream_t stream,
const argument& result,
const argument& arg,
int64_t axis);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void argmin(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
void MIGRAPHX_DEVICE_EXPORT argmin(hipStream_t stream,
const argument& result,
const argument& arg,
int64_t axis);
} // namespace device
} // namespace gpu
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/export.h>
#endif
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_KERNELS_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,9 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void contiguous(hipStream_t stream, const argument& result, const argument& arg);
void MIGRAPHX_DEVICE_EXPORT contiguous(hipStream_t stream,
const argument& result,
const argument& arg);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void fill(hipStream_t stream, const argument& result, unsigned long val);
void MIGRAPHX_DEVICE_EXPORT fill(hipStream_t stream, const argument& result, unsigned long val);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_GATHER_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument gather(hipStream_t stream, argument result, argument arg1, argument arg2, int64_t axis);
argument MIGRAPHX_DEVICE_EXPORT
gather(hipStream_t stream, argument result, argument arg1, argument arg2, int64_t axis);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,9 +33,13 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg);
void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_a(hipStream_t stream,
const argument& result,
const argument& arg);
void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument& arg);
void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_b(hipStream_t stream,
const argument& result,
const argument& arg);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis);
void MIGRAPHX_DEVICE_EXPORT logsoftmax(hipStream_t stream,
const argument& result,
const argument& arg,
int64_t axis);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,10 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void multinomial(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1);
void MIGRAPHX_DEVICE_EXPORT multinomial(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1);
} // namespace device
} // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment