"tasks/git@developer.sourcefind.cn:sugon_wxj/megatron-lm.git" did not exist on "49a38d5e0b930c63a9c735444b1cea861d12bc43"
Unverified Commit 23cb7917 authored by Brian Pickrell's avatar Brian Pickrell Committed by GitHub
Browse files

Merge branch 'develop' into blas_tuning

parents b5fcc0bc ea32ca70
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/config.hpp> #include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
namespace migraphx { namespace migraphx {
...@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,7 +33,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument scatter( argument MIGRAPHX_DEVICE_EXPORT scatter(
hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis); hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis);
} // namespace device } // namespace device
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/config.hpp> #include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
namespace migraphx { namespace migraphx {
...@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,19 +33,19 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument topk_smallest(hipStream_t stream, argument MIGRAPHX_DEVICE_EXPORT topk_smallest(hipStream_t stream,
const argument& val_res, const argument& val_res,
const argument& ind_res, const argument& ind_res,
const argument& arg, const argument& arg,
int64_t k, int64_t k,
int64_t axis); int64_t axis);
argument topk_largest(hipStream_t stream, argument MIGRAPHX_DEVICE_EXPORT topk_largest(hipStream_t stream,
const argument& val_res, const argument& val_res,
const argument& ind_res, const argument& ind_res,
const argument& arg, const argument& arg,
int64_t k, int64_t k,
int64_t axis); int64_t axis);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -24,16 +24,18 @@ ...@@ -24,16 +24,18 @@
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP #ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP #define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <string> #include <string>
struct hipDeviceProp_t;
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
std::string get_device_name(); MIGRAPHX_GPU_EXPORT std::string get_device_name();
int get_device_id(); MIGRAPHX_GPU_EXPORT int get_device_id();
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module_pass_manager;
namespace gpu {
struct fuse_ck
{
context* ctx = nullptr;
std::string name() const { return "gpu::fuse_ck"; }
void apply(module_pass_manager& mpm) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP #ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP #define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -34,7 +33,9 @@ struct module_pass_manager; ...@@ -34,7 +33,9 @@ struct module_pass_manager;
namespace gpu { namespace gpu {
struct fuse_mlir MIGRAPHX_GPU_EXPORT bool mlir_enabled();
struct MIGRAPHX_GPU_EXPORT fuse_mlir
{ {
context* ctx = nullptr; context* ctx = nullptr;
std::string name() const { return "gpu::fuse_mlir"; } std::string name() const { return "gpu::fuse_mlir"; }
......
...@@ -24,11 +24,12 @@ ...@@ -24,11 +24,12 @@
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP #ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP #define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/literal.hpp> #include <migraphx/literal.hpp>
#include <migraphx/check_shapes.hpp> #include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <migraphx/dyn_output.hpp>
#include <utility> #include <utility>
namespace migraphx { namespace migraphx {
...@@ -37,26 +38,26 @@ namespace gpu { ...@@ -37,26 +38,26 @@ namespace gpu {
struct context; struct context;
std::string hip_error(int error); MIGRAPHX_GPU_EXPORT std::string hip_error(int error);
argument allocate_gpu(const shape& s, bool host = false); MIGRAPHX_GPU_EXPORT argument allocate_gpu(const shape& s, bool host = false);
argument register_on_gpu(const argument& arg); MIGRAPHX_GPU_EXPORT argument register_on_gpu(const argument& arg);
argument to_gpu(const argument& arg, bool host = false); MIGRAPHX_GPU_EXPORT argument to_gpu(const argument& arg, bool host = false);
argument from_gpu(const argument& arg); MIGRAPHX_GPU_EXPORT argument from_gpu(const argument& arg);
void set_device(std::size_t id); MIGRAPHX_GPU_EXPORT void set_device(std::size_t id);
void gpu_sync(); MIGRAPHX_GPU_EXPORT void gpu_sync();
void gpu_sync(const context& ctx); MIGRAPHX_GPU_EXPORT void gpu_sync(const context& ctx);
void gpu_copy(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void gpu_copy(context& ctx, const argument& src, const argument& dst);
void copy_to_gpu(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void copy_to_gpu(context& ctx, const argument& src, const argument& dst);
void copy_from_gpu(context& ctx, const argument& src, const argument& dst); MIGRAPHX_GPU_EXPORT void copy_from_gpu(context& ctx, const argument& src, const argument& dst);
argument get_preallocation(context& ctx, const std::string& id); MIGRAPHX_GPU_EXPORT argument get_preallocation(context& ctx, const std::string& id);
struct hip_allocate struct hip_allocate
{ {
...@@ -91,7 +92,7 @@ struct hip_sync_stream ...@@ -91,7 +92,7 @@ struct hip_sync_stream
return inputs.front(); return inputs.front();
} }
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(const context& ctx, const shape&, const std::vector<argument>& args) const
{ {
gpu_sync(ctx); gpu_sync(ctx);
if(args.empty()) if(args.empty())
...@@ -112,7 +113,7 @@ struct hip_copy_to_gpu ...@@ -112,7 +113,7 @@ struct hip_copy_to_gpu
std::string name() const { return "hip::copy_to_gpu"; } std::string name() const { return "hip::copy_to_gpu"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1, 2).same_type(); check_shapes{inputs, *this, true}.has(1, 2).same_type();
return inputs.at(0); return inputs.at(0);
} }
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
...@@ -121,6 +122,10 @@ struct hip_copy_to_gpu ...@@ -121,6 +122,10 @@ struct hip_copy_to_gpu
if(args.size() == 1) if(args.size() == 1)
return input; return input;
argument result = args[1].share(); argument result = args[1].share();
if(result.get_shape().dynamic())
{
result = result.reshape(args[0].get_shape());
}
gpu_copy(ctx, input, result); gpu_copy(ctx, input, result);
// Associate the input since it was registered with hip // Associate the input since it was registered with hip
return {result.get_shape(), [input, result]() mutable { return result.data(); }}; return {result.get_shape(), [input, result]() mutable { return result.data(); }};
...@@ -138,19 +143,24 @@ struct hip_copy_from_gpu ...@@ -138,19 +143,24 @@ struct hip_copy_from_gpu
std::string name() const { return "hip::copy_from_gpu"; } std::string name() const { return "hip::copy_from_gpu"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1, 2).same_type(); check_shapes{inputs, *this, true}.has(1, 2).same_type();
return inputs.at(0); return inputs.at(0);
} }
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const compute(context& ctx, const dyn_output& dyn_out, const std::vector<argument>& args) const
{ {
if(args.size() == 1) if(args.size() == 1)
{ {
argument result = allocate_gpu(output_shape, true); argument result = allocate_gpu(dyn_out.computed_shape, true);
gpu_copy(ctx, args[0], result); gpu_copy(ctx, args[0], result);
return result; return result;
} }
copy_from_gpu(ctx, args[0], args[1]); argument input = args[0].share();
if(input.get_shape().dynamic())
{
input = input.reshape(args[1].get_shape());
}
copy_from_gpu(ctx, input, args[1]);
return args[1]; return args[1];
} }
std::ptrdiff_t output_alias(const std::vector<shape>& args) const std::ptrdiff_t output_alias(const std::vector<shape>& args) const
...@@ -177,7 +187,8 @@ struct hip_copy ...@@ -177,7 +187,8 @@ struct hip_copy
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; } std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
}; };
void store_preallocated_param(context& ctx, const std::string& id, const argument& a); MIGRAPHX_GPU_EXPORT void
store_preallocated_param(context& ctx, const std::string& id, const argument& a);
struct hip_allocate_memory struct hip_allocate_memory
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP #define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/pack_args.hpp> #include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
#include <memory> #include <memory>
...@@ -37,7 +37,7 @@ namespace gpu { ...@@ -37,7 +37,7 @@ namespace gpu {
struct kernel_impl; struct kernel_impl;
struct kernel struct MIGRAPHX_GPU_EXPORT kernel
{ {
kernel() = default; kernel() = default;
kernel(const char* image, const std::string& name); kernel(const char* image, const std::string& name);
......
...@@ -24,13 +24,12 @@ ...@@ -24,13 +24,12 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP #define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
struct module; struct module_pass_manager;
namespace gpu { namespace gpu {
...@@ -40,12 +39,12 @@ namespace gpu { ...@@ -40,12 +39,12 @@ namespace gpu {
* * Maps instructions to their GPU-specific counterparts. * * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators. * * Inserts `allocate` instructions before GPU operators.
*/ */
struct lowering struct MIGRAPHX_GPU_EXPORT lowering
{ {
context* ctx; context* ctx;
bool offload_copy; bool offload_copy;
std::string name() const { return "gpu::lowering"; } std::string name() const { return "gpu::lowering"; }
void apply(module& m) const; void apply(module_pass_manager& mpm) const;
}; };
} // namespace gpu } // namespace gpu
......
...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr ...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem); using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution); using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution inline miopen_solution find_solution(miopenHandle_t handle,
find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false) size_t num_inputs,
const miopenTensorArgument_t* tensor_args,
void* workspace,
size_t workspace_size,
miopenProblem_t problem,
bool tune = false)
{ {
miopenSolution_t solution; miopenSolution_t solution;
size_t found = 0; size_t found = 0;
miopen_find_options fo = nullptr; miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
if(tune) if(tune)
{ {
fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
miopenSetFindOptionTuning(fo.get(), 1); miopenSetFindOptionTuning(fo.get(), 1);
} }
auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1); #ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for(auto i : range(num_inputs))
{
auto status = miopenSetFindOptionPreallocatedTensor(
fo.get(), tensor_args[i].id, tensor_args[i].buffer);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
}
auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
#else
miopenStatus_t status;
(void)(num_inputs);
(void)(tensor_args);
(void)(workspace_size);
(void)(workspace);
#endif
status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
auto result = miopen_solution{solution}; auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0) if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed"); MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
return result; return result;
} }
...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op) ...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
} }
template <class T> template <class T>
inline convolution_descriptor make_deconv(const T& op) inline convolution_descriptor make_convolution_backwards(const T& op)
{ {
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor); auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenConvolutionMode_t c_mode = miopenTranspose; miopenConvolutionMode_t c_mode = miopenTranspose;
......
...@@ -26,23 +26,30 @@ ...@@ -26,23 +26,30 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/code_object_op.hpp> #include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp> #include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
struct module; struct module;
namespace gpu { namespace gpu {
std::string dump_mlir(const module& m); MIGRAPHX_GPU_EXPORT std::string dump_mlir(const module& m);
code_object_op MIGRAPHX_GPU_EXPORT code_object_op compile_mlir(const context& migraphx_ctx,
compile_mlir(const context& ctx, module m, const std::vector<instruction_ref>& inputs); module m,
const std::vector<instruction_ref>& inputs,
const value& solution);
instruction_ref insert_mlir(module& m, MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
instruction_ref ins, instruction_ref ins,
code_object_op co, code_object_op co,
const std::vector<instruction_ref>& inputs); const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
module m,
const std::vector<shape>& inputs);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP #define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/requires.hpp> #include <migraphx/requires.hpp>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -46,7 +46,7 @@ struct kernel_argument ...@@ -46,7 +46,7 @@ struct kernel_argument
void* data; void* data;
}; };
std::vector<char> pack_args(const std::vector<kernel_argument>& args); MIGRAPHX_GPU_EXPORT std::vector<char> pack_args(const std::vector<kernel_argument>& args);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP #define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,7 +32,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct pack_int8_args struct MIGRAPHX_GPU_EXPORT pack_int8_args
{ {
std::string name() const { return "gpu::pack_int8_args"; } std::string name() const { return "gpu::pack_int8_args"; }
void apply(module& m) const; void apply(module& m) const;
......
...@@ -39,9 +39,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s); ...@@ -39,9 +39,10 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
struct context; struct context;
bool get_compute_fp32_flag(); MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag();
MIGRAPHX_GPU_EXPORT bool get_int8_x4_format(context& ctx);
bool get_int8_x4_format(context& ctx);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -26,13 +26,13 @@ ...@@ -26,13 +26,13 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/config.hpp> #include <migraphx/gpu/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct target struct MIGRAPHX_GPU_EXPORT target
{ {
std::string name() const; std::string name() const;
std::vector<pass> get_passes(migraphx::context& gctx, const compile_options& options) const; std::vector<pass> get_passes(migraphx::context& gctx, const compile_options& options) const;
......
...@@ -31,12 +31,10 @@ ...@@ -31,12 +31,10 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace driver {
std::pair<double, double> MIGRAPHX_GPU_EXPORT std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100); time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace driver
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct tuning_config
{
value problem;
std::vector<value> solutions;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
...@@ -32,7 +32,7 @@ struct module; ...@@ -32,7 +32,7 @@ struct module;
namespace gpu { namespace gpu {
struct write_literals struct MIGRAPHX_GPU_EXPORT write_literals
{ {
context* ctx = nullptr; context* ctx = nullptr;
std::string name() const { return "gpu::write_literals"; } std::string name() const { return "gpu::write_literals"; }
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <fstream>
#include <migraphx/filesystem.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/module.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include "ck/host/device_gemm_multiple_d.hpp"
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
using namespace migraphx::gpu::gen; // NOLINT
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_LOG_CK_GEMM);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_TUNING);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_TUNING_VALUE);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TUNE_CK);
// NOLINTNEXTLINE
static const char* const ck_gemm_kernel = R"__migraphx__(
#include <args.hpp>
#include <migraphx/kernels/ck_gemm.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/ops.hpp>
#include <${include}>
namespace migraphx {
${preamble}
extern "C" {
MIGRAPHX_GLOBAL void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
ck_gemm<${solution}, ${blocks_per_batch}>(xs...);
});
}
}
} // namespace migraphx
)__migraphx__";
// NOLINTNEXTLINE
static const char* const disable_warning_pragma = R"__migraphx__(
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
${content}
#pragma clang diagnostic pop
)__migraphx__";
template <class P>
static std::string ck_disable_warnings(P p)
{
return interpolate_string(disable_warning_pragma,
{{"content", std::string{p.first, p.second}}});
}
static std::unordered_map<std::string, std::string> create_ck_header_strings()
{
std::unordered_map<std::string, std::string> result;
auto ck_headers = ck::host::GetHeaders();
std::transform(
ck_headers.begin(), ck_headers.end(), std::inserter(result, result.begin()), [&](auto&& p) {
return std::make_pair(p.first, ck_disable_warnings(p.second));
});
return result;
}
static std::vector<src_file> create_ck_headers()
{
static const auto& header_strings = create_ck_header_strings();
std::vector<src_file> srcs;
std::transform(
header_strings.begin(), header_strings.end(), std::back_inserter(srcs), [&](auto&& p) {
return src_file{fs::path{p.first},
{p.second.data(), p.second.data() + p.second.size()}};
});
return srcs;
}
static const std::vector<src_file>& ck_headers()
{
static const auto& headers = create_ck_headers();
return headers;
}
static bool transposed_matrix(const shape& s) { return s.strides().back() != 1; }
using tuning_entry = std::pair<std::vector<shape>, size_t>;
static std::vector<tuning_entry> read_tuning(const std::string& s)
{
if(not fs::exists(s))
return {};
return from_value<std::vector<tuning_entry>>(from_json_string(read_string(s)));
}
static float matrix_distance(const shape& x, const shape& y)
{
if(x.type() != y.type())
return std::numeric_limits<float>::max();
if(transposed_matrix(x) != transposed_matrix(y))
return std::numeric_limits<float>::max();
auto sum_squared = std::inner_product(x.lens().rbegin(),
x.lens().rbegin() + 2,
y.lens().rbegin(),
0,
std::plus<>{},
[](auto a, auto b) { return (a - b) * (a - b); });
return std::sqrt(sum_squared);
}
static std::size_t get_tuning_for(const std::vector<shape>& inputs)
{
static auto tuning = read_tuning(string_value_of(MIGRAPHX_CK_TUNING{}, ""));
if(tuning.empty())
{
std::cout << "*********** Warning: No CK tuning! for config:" << std::endl;
std::cout << " " << inputs[0] << std::endl;
std::cout << " " << inputs[1] << std::endl;
std::cout << " " << inputs[2] << std::endl;
}
auto it = std::find_if(
tuning.begin(), tuning.end(), [&](const auto& p) { return p.first == inputs; });
if(it == tuning.end())
{
std::cout << "*********** Warning: CK tuning missing for config!" << std::endl;
std::cout << " " << inputs[0] << std::endl;
std::cout << " " << inputs[1] << std::endl;
std::cout << " " << inputs[2] << std::endl;
std::vector<std::pair<float, std::size_t>> w;
std::transform(tuning.begin(), tuning.end(), std::back_inserter(w), [&](const auto& p) {
if(inputs.size() < 3 or p.first.size() < 3)
MIGRAPHX_THROW("Invalid CK config");
auto avg_distance = std::inner_product(
p.first.begin(),
p.first.begin() + 3,
inputs.begin(),
0.0f,
std::plus<>{},
[](const auto& x, const auto& y) { return matrix_distance(x, y) / 3.0f; });
return std::make_pair(avg_distance, p.second);
});
std::sort(w.begin(), w.end());
std::size_t default_value = 4;
if(not w.empty())
default_value = w.front().second;
auto tuning_val = value_of(MIGRAPHX_CK_TUNING_VALUE{}, default_value);
std::cout << "*********** Warning: CK try tuning: " << tuning_val << std::endl;
return tuning_val;
}
return it->second;
}
struct ck_gemm_compiler : compiler<ck_gemm_compiler>
{
static std::string get_layout(const shape& s)
{
return transposed_matrix(s) ? "ck::tensor_layout::gemm::ColumnMajor"
: "ck::tensor_layout::gemm::RowMajor";
}
static ck::host::DataType get_type(const shape& s)
{
if(s.type() == shape::half_type)
return ck::host::DataType::Half;
else if(s.type() == shape::float_type)
return ck::host::DataType::Float;
else if(s.type() == shape::int8_type)
return ck::host::DataType::Int8;
else if(s.type() == shape::int32_type)
return ck::host::DataType::Int32;
MIGRAPHX_THROW("Unsupported ck type");
}
template <class Iterator, class F>
static std::string ck_tuple(Iterator start, Iterator last, F f)
{
std::vector<std::string> s;
std::transform(start, last, std::back_inserter(s), f);
return "ck::Tuple<" + join_strings(s, ",") + ">";
}
static std::vector<shape> adjust_inputs(std::vector<shape> inputs, bool& swap_inputs)
{
swap_inputs = false;
auto c_shape = inputs.back();
if(not transposed_matrix(c_shape))
return inputs;
std::vector<int64_t> perm(c_shape.lens().size());
std::iota(perm.begin(), perm.end(), 0);
std::swap(perm[perm.size() - 1], perm[perm.size() - 2]);
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](shape s) {
return reorder_shape(s, perm);
});
swap_inputs = true;
return inputs;
}
static std::size_t get_batch_count(const shape& s)
{
return std::accumulate(
s.lens().rbegin() + 2, s.lens().rend(), std::size_t{1}, std::multiplies<std::size_t>());
}
static void fold_batch_dims(shape& s)
{
auto lens = s.lens();
if(lens.size() <= 2)
return;
auto batch_count = get_batch_count(s);
auto m1 = lens.at(lens.size() - 2);
auto m2 = lens.at(lens.size() - 1);
if(transposed_matrix(s))
s = shape{s.type(), {m1, m2 * batch_count}};
else
s = shape{s.type(), {m1 * batch_count, m2}};
}
static void remove_batch_dims(shape& s)
{
auto lens = s.lens();
if(lens.size() <= 2)
return;
auto m1 = lens.at(lens.size() - 2);
auto m2 = lens.at(lens.size() - 1);
s = shape{s.type(), {m1, m2}};
}
std::vector<std::string> names() const { return {"ck_gemm", "gpu::ck_gemm"}; }
static bool standard_batch(const shape& s)
{
if(s.lens().size() < 3)
return true;
std::vector<std::size_t> lens(s.lens().begin(), s.lens().end() - 2);
std::vector<std::size_t> strides(s.strides().begin(), s.strides().end() - 2);
auto base = *(s.lens().end() - 2) * *(s.lens().end() - 1);
std::transform(strides.begin(), strides.end(), strides.begin(), [&](auto stride) {
return stride / base;
});
return shape{s.type(), lens, strides}.standard();
}
bool can_fold_batch(const std::vector<shape>& inputs) const
{
const auto& b_shape = inputs[1];
if(std::any_of(inputs.begin() + 2, inputs.end() - 1, [](auto input) {
return not standard_batch(input);
}))
return false;
const auto& b_strides = b_shape.strides();
return std::all_of(
b_strides.begin(), b_strides.end() - 2, [](auto stride) { return stride == 0; });
}
ck::host::device_gemm_multiple_d::Problem create_problem(const std::vector<shape>& inputs,
const value& v) const
{
const auto& a_shape = inputs[0];
const auto& b_shape = inputs[1];
const auto& c_shape = inputs.back();
// cppcheck-suppress unreadVariable
auto rank = a_shape.ndim();
auto batch_count = get_batch_count(c_shape);
auto m = c_shape.lens()[rank - 2];
m = can_fold_batch(inputs) ? m * batch_count : m;
auto n = c_shape.lens().back();
auto k = a_shape.lens().back();
const bool trans_a = transposed_matrix(a_shape);
const bool trans_b = transposed_matrix(b_shape);
const bool trans_e = transposed_matrix(c_shape);
const auto a_type = get_type(a_shape);
const auto b_type = get_type(b_shape);
const auto e_type = get_type(c_shape);
std::vector<bool> ds_layout;
std::transform(inputs.begin() + 2,
inputs.end() - 1,
std::back_inserter(ds_layout),
[](const auto& i) { return transposed_matrix(i); });
std::vector<ck::host::DataType> ds_type;
std::transform(inputs.begin() + 2,
inputs.end() - 1,
std::back_inserter(ds_type),
[](const auto& i) { return get_type(i); });
std::string ck_passthrough = "ck_passthrough";
std::string cde_op = ck_passthrough;
assert(inputs.size() < 4 or v.contains("post"));
if(v.contains("post"))
{
cde_op = v.at("post").to<std::string>();
}
return ck::host::device_gemm_multiple_d::Problem{m,
n,
k,
trans_a,
trans_b,
trans_e,
ds_layout,
a_type,
b_type,
e_type,
ds_type,
ck_passthrough,
ck_passthrough,
cde_op};
}
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
const auto& a_shape = inputs[0];
const auto& b_shape = inputs[1];
const auto& c_shape = inputs.back();
auto tuning_value = v.get("tuning_value", 4);
if(not v.contains("tuning_value"))
tuning_value = get_tuning_for({a_shape, b_shape, c_shape});
auto batch_count = get_batch_count(c_shape);
auto problem = create_problem(inputs, v);
const auto include_header = problem.GetIncludeHeader();
const auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name());
const auto& solution = solutions.at(tuning_value);
const auto template_str = solution.template_str;
const auto blocks_per_batch = solution.grid_size;
const auto block_size = solution.block_size;
hip_compile_options options;
options.additional_src_files = ck_headers();
auto grid_size = can_fold_batch(inputs) ? blocks_per_batch : batch_count * blocks_per_batch;
options.set_launch_params(v, grid_size * block_size, block_size);
options.inputs = inputs;
options.output = c_shape;
options.kernel_name = v.get("kernel", "ck_gemm_kernel");
options.virtual_inputs = inputs;
if(can_fold_batch(inputs))
{
auto vinputs = inputs;
fold_batch_dims(vinputs[0]);
remove_batch_dims(vinputs[1]);
std::for_each(vinputs.begin() + 2, vinputs.end(), fold_batch_dims);
options.virtual_inputs = vinputs;
}
if(v.get("check", false) or enabled(MIGRAPHX_CK_DEBUG{}))
options.params += " -DMIGRAPHX_CK_CHECK=1";
auto src = interpolate_string(ck_gemm_kernel,
{{"solution", template_str},
{"include", include_header},
{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"blocks_per_batch", to_string(blocks_per_batch)},
{"preamble", v.get("preamble", std::string{})},
{"kernel", options.kernel_name}});
return compile_hip_code_object(src, options);
}
value create_settings(instruction_ref ins, const operation& op) const
{
auto v = op.to_value();
v["kernel"] = "ck_gemm_kernel";
if(not ins->module_inputs().empty())
{
auto* pm = ins->module_inputs().front();
v["preamble"] = generate_pointwise(*pm, "post_ck_gemm_function") +
"\nMIGRAPHX_LIFT_CLASS(post_ck_gemm, post_ck_gemm_function);";
v["post"] = "ck_function_adaptor<post_ck_gemm>";
v["kernel"] = "ck_gemm_" + generate_name_from_ops(*pm) + "_kernel";
}
return v;
}
compiler_replace
compile(context& ctx, instruction_ref ins, const operation& op, const value& solution) const
{
auto shapes = to_shapes(ins->inputs());
auto v = create_settings(ins, op);
if(not solution.is_null())
v["tuning_value"] = solution;
return {compile_op(ctx, shapes, v),
[=](module& m, instruction_ref ins2, const operation& code_object) {
if(enabled(MIGRAPHX_LOG_CK_GEMM{}))
{
std::vector<shape> gemm_shapes{
shapes[0], shapes[1], shapes.back().with_type(shapes[0].type())};
std::cout << "gpu::ck_gemm: " << to_json_string(to_value(gemm_shapes))
<< std::endl;
}
m.replace_instruction(ins2, code_object, ins2->inputs());
}};
}
optional<tuning_config>
get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive) const
{
if(not exhaustive and not enabled(MIGRAPHX_TUNE_CK{}))
return nullopt;
tuning_config tc;
auto shapes = to_shapes(ins->inputs());
auto problem = create_problem(shapes, create_settings(ins, op));
auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name());
tc.solutions.resize(solutions.size());
std::iota(tc.solutions.begin(), tc.solutions.end(), 0);
std::vector<shape> gemm_shapes{shapes[0], shapes[1], shapes.back()};
tc.problem = to_value(gemm_shapes);
return tc;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -47,7 +47,7 @@ ${preamble} ...@@ -47,7 +47,7 @@ ${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) MIGRAPHX_GLOBAL void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(${concat_args})(${post}, y, xs...); concat<${axis}>(${concat_args})(${post}, y, xs...);
...@@ -108,7 +108,7 @@ struct concat_compiler : compiler<concat_compiler> ...@@ -108,7 +108,7 @@ struct concat_compiler : compiler<concat_compiler>
v["post"] = "MIGRAPHX_LIFT(post_concat)"; v["post"] = "MIGRAPHX_LIFT(post_concat)";
v["kernel"] = "concat_" + generate_name_from_ops(*pm) + "_kernel"; v["kernel"] = "concat_" + generate_name_from_ops(*pm) + "_kernel";
} }
return replace(compile_op(ctx, to_shapes(ins->inputs()), v)); return compile_op(ctx, to_shapes(ins->inputs()), v);
} }
}; };
......
...@@ -44,7 +44,7 @@ namespace migraphx { ...@@ -44,7 +44,7 @@ namespace migraphx {
extern "C" { extern "C" {
__global__ void gather_kernel(void* in_data, void* in_indices, void* output) MIGRAPHX_GLOBAL void gather_kernel(void* in_data, void* in_indices, void* output)
{ {
make_tensors()(in_data, in_indices, output)([](auto&&... xs) { make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...); gather<${axis}>(xs...);
...@@ -80,7 +80,7 @@ struct gather_compiler : compiler<gather_compiler> ...@@ -80,7 +80,7 @@ struct gather_compiler : compiler<gather_compiler>
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{ {
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value())); return compile_op(ctx, to_shapes(ins->inputs()), op.to_value());
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment