Commit 264a7647 authored by Brian Pickrell's avatar Brian Pickrell
Browse files

Merge branch 'develop' into multinomial_parse_merge

parents d99729f8 8e18544f
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP #ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP #define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <string> #include <string>
struct hipDeviceProp_t; struct hipDeviceProp_t;
...@@ -33,11 +33,11 @@ namespace migraphx { ...@@ -33,11 +33,11 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
std::string get_arch_name(const hipDeviceProp_t& props); MIGRAPHX_GPU_EXPORT std::string get_arch_name(const hipDeviceProp_t& props);
std::string get_device_name(); MIGRAPHX_GPU_EXPORT std::string get_device_name();
int get_device_id(); MIGRAPHX_GPU_EXPORT int get_device_id();
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP #ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP #define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -34,9 +33,9 @@ struct module_pass_manager; ...@@ -34,9 +33,9 @@ struct module_pass_manager;
namespace gpu { namespace gpu {
bool mlir_enabled(); MIGRAPHX_GPU_EXPORT bool mlir_enabled();
struct fuse_mlir struct MIGRAPHX_GPU_EXPORT fuse_mlir
{ {
context* ctx = nullptr; context* ctx = nullptr;
std::string name() const { return "gpu::fuse_mlir"; } std::string name() const { return "gpu::fuse_mlir"; }
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP #ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP #define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/literal.hpp> #include <migraphx/literal.hpp>
#include <migraphx/check_shapes.hpp> #include <migraphx/check_shapes.hpp>
...@@ -38,26 +38,26 @@ namespace gpu { ...@@ -38,26 +38,26 @@ namespace gpu {
struct context; struct context;
std::string hip_error(int error); MIGRAPHX_GPU_EXPORT std::string hip_error(int error);
argument allocate_gpu(const shape& s, bool host = false); MIGRAPHX_GPU_EXPORT argument allocate_gpu(const shape& s, bool host = false);
argument register_on_gpu(const argument& arg); MIGRAPHX_GPU_EXPORT argument register_on_gpu(const argument& arg);
argument to_gpu(const argument& arg, bool host = false); MIGRAPHX_GPU_EXPORT argument to_gpu(const argument& arg, bool host = false);
argument from_gpu(const argument& arg); MIGRAPHX_GPU_EXPORT argument from_gpu(const argument& arg);
void set_device(std::size_t id); MIGRAPHX_GPU_EXPORT void set_device(std::size_t id);
void gpu_sync(); MIGRAPHX_GPU_EXPORT void gpu_sync();
void gpu_sync(const context& ctx); MIGRAPHX_GPU_EXPORT void gpu_sync(const context& ctx);
void gpu_copy(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void gpu_copy(context& ctx, const argument& src, const argument& dst);
void copy_to_gpu(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void copy_to_gpu(context& ctx, const argument& src, const argument& dst);
void copy_from_gpu(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void copy_from_gpu(context& ctx, const argument& src, const argument& dst);
argument get_preallocation(context& ctx, const std::string& id); MIGRAPHX_GPU_EXPORT argument get_preallocation(context& ctx, const std::string& id);
struct hip_allocate struct hip_allocate
{ {
...@@ -187,7 +187,8 @@ struct hip_copy ...@@ -187,7 +187,8 @@ struct hip_copy
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; } std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
}; };
void store_preallocated_param(context& ctx, const std::string& id, const argument& a); MIGRAPHX_GPU_EXPORT void
store_preallocated_param(context& ctx, const std::string& id, const argument& a);
struct hip_allocate_memory struct hip_allocate_memory
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP #define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/pack_args.hpp> #include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
#include <memory> #include <memory>
...@@ -37,7 +37,7 @@ namespace gpu { ...@@ -37,7 +37,7 @@ namespace gpu {
struct kernel_impl; struct kernel_impl;
struct kernel struct MIGRAPHX_GPU_EXPORT kernel
{ {
kernel() = default; kernel() = default;
kernel(const char* image, const std::string& name); kernel(const char* image, const std::string& name);
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP #define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -40,7 +39,7 @@ namespace gpu { ...@@ -40,7 +39,7 @@ namespace gpu {
* * Maps instructions to their GPU-specific counterparts. * * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators. * * Inserts `allocate` instructions before GPU operators.
*/ */
struct lowering struct MIGRAPHX_GPU_EXPORT lowering
{ {
context* ctx; context* ctx;
bool offload_copy; bool offload_copy;
......
...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr ...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem); using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution); using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution inline miopen_solution find_solution(miopenHandle_t handle,
find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false) size_t num_inputs,
const miopenTensorArgument_t* tensor_args,
void* workspace,
size_t workspace_size,
miopenProblem_t problem,
bool tune = false)
{ {
miopenSolution_t solution; miopenSolution_t solution;
size_t found = 0; size_t found = 0;
miopen_find_options fo = nullptr; miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
if(tune) if(tune)
{ {
fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
miopenSetFindOptionTuning(fo.get(), 1); miopenSetFindOptionTuning(fo.get(), 1);
} }
auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1); #ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for(auto i : range(num_inputs))
{
auto status = miopenSetFindOptionPreallocatedTensor(
fo.get(), tensor_args[i].id, tensor_args[i].buffer);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
}
auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
#else
miopenStatus_t status;
(void)(num_inputs);
(void)(tensor_args);
(void)(workspace_size);
(void)(workspace);
#endif
status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
auto result = miopen_solution{solution}; auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0) if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed"); MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
return result; return result;
} }
...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op) ...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
} }
template <class T> template <class T>
inline convolution_descriptor make_deconv(const T& op) inline convolution_descriptor make_convolution_backwards(const T& op)
{ {
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor); auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenConvolutionMode_t c_mode = miopenTranspose; miopenConvolutionMode_t c_mode = miopenTranspose;
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/code_object_op.hpp> #include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp> #include <migraphx/instruction_ref.hpp>
...@@ -35,14 +35,16 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -35,14 +35,16 @@ inline namespace MIGRAPHX_INLINE_NS {
struct module; struct module;
namespace gpu { namespace gpu {
std::string dump_mlir(const module& m); MIGRAPHX_GPU_EXPORT std::string dump_mlir(const module& m);
code_object_op
compile_mlir(const context& ctx, module m, const std::vector<instruction_ref>& inputs);
instruction_ref insert_mlir(module& m, MIGRAPHX_GPU_EXPORT code_object_op compile_mlir(const context& ctx,
instruction_ref ins, module m,
code_object_op co, const std::vector<instruction_ref>& inputs);
const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
instruction_ref ins,
code_object_op co,
const std::vector<instruction_ref>& inputs);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP #define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/requires.hpp> #include <migraphx/requires.hpp>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -46,7 +46,7 @@ struct kernel_argument ...@@ -46,7 +46,7 @@ struct kernel_argument
void* data; void* data;
}; };
std::vector<char> pack_args(const std::vector<kernel_argument>& args); MIGRAPHX_GPU_EXPORT std::vector<char> pack_args(const std::vector<kernel_argument>& args);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP #define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct pack_int8_args struct MIGRAPHX_GPU_EXPORT pack_int8_args
{ {
std::string name() const { return "gpu::pack_int8_args"; } std::string name() const { return "gpu::pack_int8_args"; }
void apply(module& m) const; void apply(module& m) const;
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP #ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP #define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <rocblas/rocblas.h> #include <rocblas/rocblas.h>
namespace migraphx { namespace migraphx {
...@@ -38,9 +38,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s); ...@@ -38,9 +38,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
struct context; struct context;
bool get_compute_fp32_flag(); MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag();
MIGRAPHX_GPU_EXPORT bool get_int8_x4_format(context& ctx);
bool get_int8_x4_format(context& ctx);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -26,13 +26,13 @@ ...@@ -26,13 +26,13 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct target struct MIGRAPHX_GPU_EXPORT target
{ {
std::string name() const; std::string name() const;
std::vector<pass> get_passes(migraphx::context& gctx, const compile_options& options) const; std::vector<pass> get_passes(migraphx::context& gctx, const compile_options& options) const;
......
...@@ -32,7 +32,7 @@ namespace migraphx { ...@@ -32,7 +32,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
std::pair<double, double> MIGRAPHX_GPU_EXPORT std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100); time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace gpu } // namespace gpu
......
...@@ -32,7 +32,7 @@ struct module; ...@@ -32,7 +32,7 @@ struct module;
namespace gpu { namespace gpu {
struct write_literals struct MIGRAPHX_GPU_EXPORT write_literals
{ {
context* ctx = nullptr; context* ctx = nullptr;
std::string name() const { return "gpu::write_literals"; } std::string name() const { return "gpu::write_literals"; }
......
...@@ -66,7 +66,7 @@ ${preamble} ...@@ -66,7 +66,7 @@ ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
ck_gemm<${solution}, ${blocks_per_batch}>(xs...); ck_gemm<${solution}, ${blocks_per_batch}>(xs...);
......
...@@ -47,7 +47,7 @@ ${preamble} ...@@ -47,7 +47,7 @@ ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(${concat_args})(${post}, y, xs...); concat<${axis}>(${concat_args})(${post}, y, xs...);
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void gather_kernel(void* in_data, void* in_indices, void* output) MIGRAPHX_GLOBAL void gather_kernel(void* in_data, void* in_indices, void* output)
{ {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) { make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...); gather<${axis}>(xs...);
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void gathernd_kernel(void* in_data, void* in_indices, void* output) MIGRAPHX_GLOBAL void gathernd_kernel(void* in_data, void* in_indices, void* output)
{ {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) { make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS})); auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS}));
......
...@@ -48,7 +48,7 @@ namespace migraphx { ...@@ -48,7 +48,7 @@ namespace migraphx {
${preamble} ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
${layernorm}<${axis}>(${post}, ${eps}, xs...); ${layernorm}<${axis}>(${post}, ${eps}, xs...);
......
...@@ -44,7 +44,7 @@ static const char* const pointwise_kernel = R"__migraphx__( ...@@ -44,7 +44,7 @@ static const char* const pointwise_kernel = R"__migraphx__(
namespace migraphx { namespace migraphx {
extern "C" { extern "C" {
__global__ void pad_kernel(void* input_p, void* output_p) MIGRAPHX_GLOBAL void pad_kernel(void* input_p, void* output_p)
{ {
auto offsets = index_ints<${offsets}>{}; auto offsets = index_ints<${offsets}>{};
auto idx = make_index(); auto idx = make_index();
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
${preamble} ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
auto idx = make_index(); auto idx = make_index();
pointwise(idx, ${transformers})(${lambda}, ${args}); pointwise(idx, ${transformers})(${lambda}, ${args});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment