Unverified Commit 40fbef9b authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into threaded_nms

parents d164b151 aeb9f78c
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,9 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data);
argument MIGRAPHX_DEVICE_EXPORT nonzero(hipStream_t stream,
const argument& result,
const argument& arg_data);
} // namespace device
} // namespace gpu
......
......@@ -26,7 +26,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_PAD_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -34,11 +34,11 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument pad(hipStream_t stream,
argument result,
argument arg1,
float value,
std::vector<std::int64_t> pads);
argument MIGRAPHX_DEVICE_EXPORT pad(hipStream_t stream,
argument result,
argument arg1,
float value,
std::vector<std::int64_t> pads);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,12 +33,12 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void prefix_scan_sum(hipStream_t stream,
const argument& result,
const argument& arg,
int32_t axis,
bool exclusive,
bool reverse);
void MIGRAPHX_DEVICE_EXPORT prefix_scan_sum(hipStream_t stream,
const argument& result,
const argument& arg,
int32_t axis,
bool exclusive,
bool reverse);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,8 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument
reverse(hipStream_t stream, argument result, argument arg1, const std::vector<int64_t>& axes);
argument MIGRAPHX_DEVICE_EXPORT reverse(hipStream_t stream,
argument result,
argument arg1,
const std::vector<int64_t>& axes);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_RNN_VARIABLE_SEQ_LENS_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,22 +33,22 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void rnn_var_sl_shift_sequence(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl);
void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_sequence(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl);
void rnn_var_sl_shift_output(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl,
bool is_reverse);
void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_output(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl,
bool is_reverse);
void rnn_var_sl_last_output(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl,
bool is_reverse);
void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_last_output(hipStream_t stream,
const argument& result,
const argument& arg_hs,
const argument& arg_sl,
bool is_reverse);
} // namespace device
} // namespace gpu
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument scatter(
argument MIGRAPHX_DEVICE_EXPORT scatter(
hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis);
} // namespace device
......
......@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
......@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument topk_smallest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
argument MIGRAPHX_DEVICE_EXPORT topk_smallest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
argument topk_largest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
argument MIGRAPHX_DEVICE_EXPORT topk_largest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
} // namespace device
} // namespace gpu
......
......@@ -24,16 +24,20 @@
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <string>
struct hipDeviceProp_t;
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::string get_device_name();
MIGRAPHX_GPU_EXPORT std::string get_arch_name(const hipDeviceProp_t& props);
MIGRAPHX_GPU_EXPORT std::string get_device_name();
int get_device_id();
MIGRAPHX_GPU_EXPORT int get_device_id();
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module_pass_manager;
namespace gpu {
struct fuse_ck
{
context* ctx = nullptr;
std::string name() const { return "gpu::fuse_ck"; }
void apply(module_pass_manager& mpm) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
......@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
......@@ -34,7 +33,9 @@ struct module_pass_manager;
namespace gpu {
struct fuse_mlir
MIGRAPHX_GPU_EXPORT bool mlir_enabled();
struct MIGRAPHX_GPU_EXPORT fuse_mlir
{
context* ctx = nullptr;
std::string name() const { return "gpu::fuse_mlir"; }
......
......@@ -24,11 +24,12 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/dyn_output.hpp>
#include <utility>
namespace migraphx {
......@@ -37,26 +38,26 @@ namespace gpu {
struct context;
std::string hip_error(int error);
MIGRAPHX_GPU_EXPORT std::string hip_error(int error);
argument allocate_gpu(const shape& s, bool host = false);
MIGRAPHX_GPU_EXPORT argument allocate_gpu(const shape& s, bool host = false);
argument register_on_gpu(const argument& arg);
MIGRAPHX_GPU_EXPORT argument register_on_gpu(const argument& arg);
argument to_gpu(const argument& arg, bool host = false);
MIGRAPHX_GPU_EXPORT argument to_gpu(const argument& arg, bool host = false);
argument from_gpu(const argument& arg);
MIGRAPHX_GPU_EXPORT argument from_gpu(const argument& arg);
void set_device(std::size_t id);
MIGRAPHX_GPU_EXPORT void set_device(std::size_t id);
void gpu_sync();
void gpu_sync(const context& ctx);
MIGRAPHX_GPU_EXPORT void gpu_sync();
MIGRAPHX_GPU_EXPORT void gpu_sync(const context& ctx);
void gpu_copy(context& ctx, const argument& src, const argument& dst);
void copy_to_gpu(context& ctx, const argument& src, const argument& dst);
void copy_from_gpu(context& ctx, const argument& src, const argument& dst);
MIGRAPHX_GPU_EXPORT void gpu_copy(context& ctx, const argument& src, const argument& dst);
MIGRAPHX_GPU_EXPORT void copy_to_gpu(context& ctx, const argument& src, const argument& dst);
MIGRAPHX_GPU_EXPORT void copy_from_gpu(context& ctx, const argument& src, const argument& dst);
argument get_preallocation(context& ctx, const std::string& id);
MIGRAPHX_GPU_EXPORT argument get_preallocation(context& ctx, const std::string& id);
struct hip_allocate
{
......@@ -112,7 +113,7 @@ struct hip_copy_to_gpu
std::string name() const { return "hip::copy_to_gpu"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1, 2).same_type();
check_shapes{inputs, *this, true}.has(1, 2).same_type();
return inputs.at(0);
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
......@@ -121,6 +122,10 @@ struct hip_copy_to_gpu
if(args.size() == 1)
return input;
argument result = args[1].share();
if(result.get_shape().dynamic())
{
result = result.reshape(args[0].get_shape());
}
gpu_copy(ctx, input, result);
// Associate the input since it was registered with hip
return {result.get_shape(), [input, result]() mutable { return result.data(); }};
......@@ -138,19 +143,24 @@ struct hip_copy_from_gpu
std::string name() const { return "hip::copy_from_gpu"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1, 2).same_type();
check_shapes{inputs, *this, true}.has(1, 2).same_type();
return inputs.at(0);
}
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
compute(context& ctx, const dyn_output& dyn_out, const std::vector<argument>& args) const
{
if(args.size() == 1)
{
argument result = allocate_gpu(output_shape, true);
argument result = allocate_gpu(dyn_out.computed_shape, true);
gpu_copy(ctx, args[0], result);
return result;
}
copy_from_gpu(ctx, args[0], args[1]);
argument input = args[0].share();
if(input.get_shape().dynamic())
{
input = input.reshape(args[1].get_shape());
}
copy_from_gpu(ctx, input, args[1]);
return args[1];
}
std::ptrdiff_t output_alias(const std::vector<shape>& args) const
......@@ -177,7 +187,8 @@ struct hip_copy
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
};
void store_preallocated_param(context& ctx, const std::string& id, const argument& a);
MIGRAPHX_GPU_EXPORT void
store_preallocated_param(context& ctx, const std::string& id, const argument& a);
struct hip_allocate_memory
{
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h>
#include <memory>
......@@ -37,7 +37,7 @@ namespace gpu {
struct kernel_impl;
struct kernel
struct MIGRAPHX_GPU_EXPORT kernel
{
kernel() = default;
kernel(const char* image, const std::string& name);
......
......@@ -24,13 +24,12 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
struct module_pass_manager;
namespace gpu {
......@@ -40,12 +39,12 @@ namespace gpu {
* * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators.
*/
struct lowering
struct MIGRAPHX_GPU_EXPORT lowering
{
context* ctx;
bool offload_copy;
std::string name() const { return "gpu::lowering"; }
void apply(module& m) const;
void apply(module_pass_manager& mpm) const;
};
} // namespace gpu
......
......@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution
find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false)
inline miopen_solution find_solution(miopenHandle_t handle,
size_t num_inputs,
const miopenTensorArgument_t* tensor_args,
void* workspace,
size_t workspace_size,
miopenProblem_t problem,
bool tune = false)
{
miopenSolution_t solution;
size_t found = 0;
miopen_find_options fo = nullptr;
miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
if(tune)
{
fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
miopenSetFindOptionTuning(fo.get(), 1);
}
auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for(auto i : range(num_inputs))
{
auto status = miopenSetFindOptionPreallocatedTensor(
fo.get(), tensor_args[i].id, tensor_args[i].buffer);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
}
auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
#else
miopenStatus_t status;
(void)(num_inputs);
(void)(tensor_args);
(void)(workspace_size);
(void)(workspace);
#endif
status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
return result;
}
......@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
template <class T>
inline convolution_descriptor make_deconv(const T& op)
inline convolution_descriptor make_convolution_backwards(const T& op)
{
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenConvolutionMode_t c_mode = miopenTranspose;
......
......@@ -26,23 +26,29 @@
#include <string>
#include <vector>
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
std::string dump_mlir(const module& m);
code_object_op
compile_mlir(const context& ctx, module m, const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT std::string dump_mlir(const module& m);
MIGRAPHX_GPU_EXPORT code_object_op compile_mlir(const context& ctx,
module m,
const std::vector<instruction_ref>& inputs,
const value& solution);
instruction_ref insert_mlir(module& m,
instruction_ref ins,
code_object_op co,
const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
instruction_ref ins,
code_object_op co,
const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(module m,
const std::vector<shape>& inputs);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <migraphx/requires.hpp>
#include <utility>
#include <vector>
......@@ -46,7 +46,7 @@ struct kernel_argument
void* data;
};
std::vector<char> pack_args(const std::vector<kernel_argument>& args);
MIGRAPHX_GPU_EXPORT std::vector<char> pack_args(const std::vector<kernel_argument>& args);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
......@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct pack_int8_args
struct MIGRAPHX_GPU_EXPORT pack_int8_args
{
std::string name() const { return "gpu::pack_int8_args"; }
void apply(module& m) const;
......
......@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
#include <rocblas/rocblas.h>
namespace migraphx {
......@@ -38,9 +38,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
struct context;
bool get_compute_fp32_flag();
MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag();
MIGRAPHX_GPU_EXPORT bool get_int8_x4_format(context& ctx);
bool get_int8_x4_format(context& ctx);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -26,13 +26,13 @@
#include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct target
struct MIGRAPHX_GPU_EXPORT target
{
std::string name() const;
std::vector<pass> get_passes(migraphx::context& gctx, const compile_options& options) const;
......
......@@ -31,12 +31,10 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
std::pair<double, double>
MIGRAPHX_GPU_EXPORT std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment