"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "c4129727d49b14d2a20ceffb493456f5e7875991"
Commit f12064ee authored by umangyadav's avatar umangyadav
Browse files

Merge branch 'develop' into resnet50_partition

parents 2c4f70be 6f1c947f
...@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive> ...@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive>
} }
// dnnl has some issues with non-packed inputs // dnnl has some issues with non-packed inputs
void required(const check_shapes& cs) const { cs.packed_or_broadcasted(); } template <class T>
void required(const check_shapes<T>& cs) const
{
cs.packed_or_broadcasted();
}
std::string name() const { return "dnnl::" + op.name(); } std::string name() const { return "dnnl::" + op.name(); }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
......
...@@ -61,7 +61,7 @@ namespace cpu { ...@@ -61,7 +61,7 @@ namespace cpu {
std::string target::name() const { return "cpu"; } std::string target::name() const { return "cpu"; }
// cppcheck-suppress constParameter // cppcheck-suppress constParameterReference
std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const
{ {
auto& ctx = any_cast<context>(gctx); auto& ctx = any_cast<context>(gctx);
......
...@@ -48,7 +48,7 @@ include(Embed) ...@@ -48,7 +48,7 @@ include(Embed)
file(GLOB KERNEL_FILES CONFIGURE_DEPENDS file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp) ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp)
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES}) add_embed_library(migraphx_kernels ${KERNEL_FILES} RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/)
file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp) file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
add_library(migraphx_device ${DEVICE_GPU_SRCS}) add_library(migraphx_device ${DEVICE_GPU_SRCS})
......
...@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m) ...@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
{ {
if(starts_with(ins.name(), "@")) if(starts_with(ins.name(), "@"))
continue; continue;
if(ins.name() == "multibroadcast") if(contains({"multibroadcast", "contiguous"}, ins.name()))
continue; continue;
if(ins.name() == "pointwise") if(ins.name() == "pointwise")
{ {
......
...@@ -167,7 +167,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -167,7 +167,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
[](auto&& p) { [](auto&& p) {
auto&& name = p.first; auto&& name = p.first;
auto&& c = p.second; auto&& c = p.second;
auto path = fs::path{"migraphx"} / "kernels" / name; auto path = name;
return src_file{path, c}; return src_file{path, c};
}); });
srcs.push_back(src_file{fs::path{"main.cpp"}, srcs.push_back(src_file{fs::path{"main.cpp"},
......
...@@ -41,7 +41,7 @@ struct index ...@@ -41,7 +41,7 @@ struct index
__device__ index_int nglobal() const { return blockDim.x * gridDim.x; } // NOLINT __device__ index_int nglobal() const { return blockDim.x * gridDim.x; } // NOLINT
__device__ index_int nlocal() const { return blockDim.x; } // NOLINT __device__ index_int nlocal() const { return blockDim.x; } // NOLINT
template <class F> template <class F>
__device__ void global_stride(index_int n, F f) const __device__ void global_stride(index_int n, F f) const
...@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local) ...@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
dim3 nthreads(local); dim3 nthreads(local);
// cppcheck-suppress UseDeviceLaunch // cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL((launcher<f_type>), nblocks, nthreads, 0, stream, f); hipLaunchKernelGGL((launcher<f_type>), nblocks, nthreads, 0, stream, f);
hipError_t kernel_launch_status = hipGetLastError();
if(kernel_launch_status != hipSuccess)
{
MIGRAPHX_THROW("MIGraphX device kernel failed to launch with error: " +
std::string(hipGetErrorString(kernel_launch_status)));
}
}; };
} }
......
...@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl( ...@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
buffer[i] = binput.data()[i]; buffer[i] = binput.data()[i];
} }
__syncthreads(); __syncthreads();
auto* bp = as_pointer(buffer); const auto* bp = as_pointer(buffer);
// Process the data // Process the data
for(size_t i = idx.global; i < nelements; i += nglobal) for(size_t i = idx.global; i < nelements; i += nglobal)
{ {
...@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl( ...@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
buffer[i + bdim_vec_len] = binput2.data()[i]; buffer[i + bdim_vec_len] = binput2.data()[i];
} }
__syncthreads(); __syncthreads();
auto* bp = as_pointer(buffer); const auto* bp = as_pointer(buffer);
// Process the data // Process the data
for(size_t i = idx.global; i < nelements; i += nglobal) for(size_t i = idx.global; i < nelements; i += nglobal)
{ {
......
...@@ -72,12 +72,12 @@ struct hip_heap_vector ...@@ -72,12 +72,12 @@ struct hip_heap_vector
index_int l = 2 * index + 1; index_int l = 2 * index + 1;
index_int r = 2 * index + 2; index_int r = 2 * index + 2;
if(l < n && compare(data[data_index(l)], data[data_index(index)])) if(l < n and compare(data[data_index(l)], data[data_index(index)]))
{ {
index = l; index = l;
} }
if(r < n && compare(data[data_index(r)], data[data_index(index)])) if(r < n and compare(data[data_index(r)], data[data_index(index)]))
{ {
index = r; index = r;
if(compare(data[data_index(l)], data[data_index(r)])) if(compare(data[data_index(l)], data[data_index(r)]))
......
...@@ -31,20 +31,6 @@ namespace migraphx { ...@@ -31,20 +31,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
template <class HipDeviceProp>
std::string get_arch_name(rank<0>, const HipDeviceProp& props)
{
return "gfx" + std::to_string(props.gcnArch);
}
template <class HipDeviceProp>
auto get_arch_name(rank<1>, const HipDeviceProp& props) -> decltype(std::string(props.gcnArchName))
{
return std::string(props.gcnArchName);
}
std::string get_arch_name(const hipDeviceProp_t& props) { return get_arch_name(rank<1>{}, props); }
int get_device_id() int get_device_id()
{ {
int device; int device;
...@@ -60,7 +46,7 @@ std::string get_device_name() ...@@ -60,7 +46,7 @@ std::string get_device_name()
auto status = hipGetDeviceProperties(&props, get_device_id()); auto status = hipGetDeviceProperties(&props, get_device_id());
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Failed to get device properties"); MIGRAPHX_THROW("Failed to get device properties");
return get_arch_name(props); return props.gcnArchName;
} }
} // namespace gpu } // namespace gpu
......
...@@ -86,7 +86,7 @@ struct mlir_op ...@@ -86,7 +86,7 @@ struct mlir_op
size_t param_cnt = 0; size_t param_cnt = 0;
std::vector<std::string> names = mod->get_parameter_names(); std::vector<std::string> names = mod->get_parameter_names();
std::sort(names.begin(), names.end()); std::sort(names.begin(), names.end());
for(std::string param_name : names) for(const std::string& param_name : names)
{ {
ins_shapes[mod->get_parameter(param_name)] = inputs[param_cnt++]; ins_shapes[mod->get_parameter(param_name)] = inputs[param_cnt++];
} }
...@@ -210,42 +210,47 @@ struct find_mlir_op ...@@ -210,42 +210,47 @@ struct find_mlir_op
return false; return false;
} }
const std::initializer_list<std::string> any_type_ops = {"@literal", "@param", "@return"}; const std::initializer_list<std::string> any_type_ops = {"@literal", "@param", "@return"};
const std::initializer_list<std::string> no_bool_ops = {"convolution", const std::initializer_list<std::string> no_bool_ops = {
"quant_convolution", "convolution",
"dot", "quant_convolution",
"quant_dot", "dot",
"add", "quant_dot",
"clip", "add",
"relu", "clip",
"sub", "relu",
"mul", "sub",
"div", "mul",
"pow", "div",
"where", "pow",
"quantizelinear", "where",
"dequantizelinear", "quantizelinear",
"abs", "dequantizelinear",
"neg"}; "abs",
const std::initializer_list<std::string> fp_only_ops = {"ceil", "neg",
"erf", };
"exp", const std::initializer_list<std::string> fp_only_ops = {
"floor", "ceil",
"log", "erf",
"recip", "exp",
"rsqrt", "floor",
"sigmoid" "log",
"softmax", "recip",
"tanh"}; "rsqrt",
// There are bugs in MLIR right now for models using sigmoid so disable it for now
// "sigmoid",
"softmax",
"tanh",
};
bool is_float = contains({type_t::float_type, type_t::half_type}, result_type); bool is_float = contains({type_t::float_type, type_t::half_type}, result_type);
if(contains(any_type_ops, name)) if(contains(any_type_ops, name))
return true; return true;
if(result_type != type_t::bool_type && contains(no_bool_ops, name)) if(result_type != type_t::bool_type and contains(no_bool_ops, name))
return true; return true;
if(is_float && contains(fp_only_ops, name)) if(is_float and contains(fp_only_ops, name))
return true; return true;
// Only conversions between floating types are known to be unambigiously // Only conversions between floating types are known to be unambigiously
// supported. // supported.
if(is_float && name == "convert") if(is_float and name == "convert")
{ {
return std::all_of(i.inputs().begin(), i.inputs().end(), [](const auto& arg) { return std::all_of(i.inputs().begin(), i.inputs().end(), [](const auto& arg) {
return contains({type_t::float_type, type_t::half_type}, arg->get_shape().type()); return contains({type_t::float_type, type_t::half_type}, arg->get_shape().type());
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/optional.hpp> #include <migraphx/optional.hpp>
#include <migraphx/rank.hpp> #include <migraphx/rank.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <functional> #include <functional>
namespace migraphx { namespace migraphx {
...@@ -68,12 +69,6 @@ struct compiler_replace ...@@ -68,12 +69,6 @@ struct compiler_replace
} }
}; };
struct tuning_config
{
value problem;
std::vector<value> solutions;
};
using compiler_compile = using compiler_compile =
std::function<compiler_replace(context&, instruction_ref, operation, const value&)>; std::function<compiler_replace(context&, instruction_ref, operation, const value&)>;
using compiler_compile_op = using compiler_compile_op =
......
...@@ -46,13 +46,7 @@ using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy); ...@@ -46,13 +46,7 @@ using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
struct hip_device struct hip_device
{ {
hip_device() hip_device() : device_props{} { add_stream(); }
{
device_props.gcnArchName[0] = '\0';
device_props.gcnArch = 0;
device_props.multiProcessorCount = 0;
add_stream();
}
hip_device(std::size_t id, std::size_t n) : device_id(id) hip_device(std::size_t id, std::size_t n) : device_id(id)
{ {
...@@ -171,7 +165,7 @@ struct hip_device ...@@ -171,7 +165,7 @@ struct hip_device
std::size_t stream_id() const { return current_stream; } std::size_t stream_id() const { return current_stream; }
std::string get_device_name() const { return get_arch_name(device_props); } std::string get_device_name() const { return device_props.gcnArchName; }
std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); } std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); }
......
...@@ -33,8 +33,6 @@ namespace migraphx { ...@@ -33,8 +33,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
MIGRAPHX_GPU_EXPORT std::string get_arch_name(const hipDeviceProp_t& props);
MIGRAPHX_GPU_EXPORT std::string get_device_name(); MIGRAPHX_GPU_EXPORT std::string get_device_name();
MIGRAPHX_GPU_EXPORT int get_device_id(); MIGRAPHX_GPU_EXPORT int get_device_id();
......
...@@ -92,7 +92,7 @@ struct hip_sync_stream ...@@ -92,7 +92,7 @@ struct hip_sync_stream
return inputs.front(); return inputs.front();
} }
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(const context& ctx, const shape&, const std::vector<argument>& args) const
{ {
gpu_sync(ctx); gpu_sync(ctx);
if(args.empty()) if(args.empty())
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <migraphx/gpu/config.hpp> #include <migraphx/gpu/config.hpp>
#include <migraphx/gpu/code_object_op.hpp> #include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp> #include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/tuning_config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -36,16 +37,20 @@ struct module; ...@@ -36,16 +37,20 @@ struct module;
namespace gpu { namespace gpu {
MIGRAPHX_GPU_EXPORT std::string dump_mlir(const module& m); MIGRAPHX_GPU_EXPORT std::string dump_mlir(const module& m);
MIGRAPHX_GPU_EXPORT code_object_op compile_mlir(const context& migraphx_ctx,
MIGRAPHX_GPU_EXPORT code_object_op compile_mlir(const context& ctx,
module m, module m,
const std::vector<instruction_ref>& inputs); const std::vector<instruction_ref>& inputs,
const value& solution);
MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m, MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
instruction_ref ins, instruction_ref ins,
code_object_op co, code_object_op co,
const std::vector<instruction_ref>& inputs); const std::vector<instruction_ref>& inputs);
MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
module m,
const std::vector<shape>& inputs);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct tuning_config
{
value problem;
std::vector<value> solutions;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP
...@@ -300,7 +300,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler> ...@@ -300,7 +300,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
const auto& b_shape = inputs[1]; const auto& b_shape = inputs[1];
const auto& c_shape = inputs.back(); const auto& c_shape = inputs.back();
auto rank = a_shape.lens().size(); // cppcheck-suppress unreadVariable
auto rank = a_shape.ndim();
auto batch_count = get_batch_count(c_shape); auto batch_count = get_batch_count(c_shape);
auto m = c_shape.lens()[rank - 2]; auto m = c_shape.lens()[rank - 2];
......
...@@ -36,11 +36,12 @@ struct mlir_compiler : compiler<mlir_compiler> ...@@ -36,11 +36,12 @@ struct mlir_compiler : compiler<mlir_compiler>
operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; } operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; }
compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const compiler_replace
compile(const context& ctx, instruction_ref ins, const operation&, const value& solution) const
{ {
auto* smod = ins->module_inputs().front(); auto* smod = ins->module_inputs().front();
assert(smod->get_parameter_names().size() == ins->inputs().size() - 1); assert(smod->get_parameter_names().size() == ins->inputs().size() - 1);
return insert(compile_mlir(ctx, *smod, ins->inputs())); return insert(compile_mlir(ctx, *smod, ins->inputs(), solution));
} }
compiler_replace insert(code_object_op co) const compiler_replace insert(code_object_op co) const
...@@ -50,6 +51,18 @@ struct mlir_compiler : compiler<mlir_compiler> ...@@ -50,6 +51,18 @@ struct mlir_compiler : compiler<mlir_compiler>
m.replace_instruction(ins, mlir); m.replace_instruction(ins, mlir);
}}; }};
} }
optional<tuning_config> get_tuning_config(const context& ctx,
instruction_ref ins,
const operation&,
bool exhaustive) const
{
if(not exhaustive)
return nullopt;
auto shapes = to_shapes(ins->inputs());
auto* smod = ins->module_inputs().front();
return get_tuning_config_mlir(ctx, *smod, shapes);
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -72,7 +72,7 @@ struct pointwise_compiler : compiler<pointwise_compiler> ...@@ -72,7 +72,7 @@ struct pointwise_compiler : compiler<pointwise_compiler>
hip_compile_options options; hip_compile_options options;
options.inputs = inputs; options.inputs = inputs;
options.output = inputs.back(); options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs); options.virtual_inputs = reduce_dims(normalize_permutation(inputs));
options.params = "-Wno-float-equal"; options.params = "-Wno-float-equal";
auto axis = find_fast_axis(options.virtual_inputs); auto axis = find_fast_axis(options.virtual_inputs);
auto vec = vectorize::elements(ctx, axis, options.virtual_inputs); auto vec = vectorize::elements(ctx, axis, options.virtual_inputs);
......
...@@ -84,7 +84,7 @@ static shape get_reduced_shape(const shape& s, const std::vector<T>& axes) ...@@ -84,7 +84,7 @@ static shape get_reduced_shape(const shape& s, const std::vector<T>& axes)
std::fill(lens.begin(), lens.end(), 1); std::fill(lens.begin(), lens.end(), 1);
for(const auto& axis : axes) for(const auto& axis : axes)
lens[axis] = s.lens()[axis]; lens[axis] = s.lens()[axis];
return shape{s.type(), lens}; return s.with_lens(lens);
} }
template <class T> template <class T>
...@@ -93,7 +93,7 @@ static shape get_output_shape(const shape& s, const std::vector<T>& axes) ...@@ -93,7 +93,7 @@ static shape get_output_shape(const shape& s, const std::vector<T>& axes)
auto lens = s.lens(); auto lens = s.lens();
for(const auto& axis : axes) for(const auto& axis : axes)
lens[axis] = 1; lens[axis] = 1;
return shape{s.type(), lens}; return s.with_lens(lens);
} }
template <class ReduceLens> template <class ReduceLens>
...@@ -228,7 +228,7 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler> ...@@ -228,7 +228,7 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler>
auto virtual_inputs = inputs; auto virtual_inputs = inputs;
virtual_inputs.push_back(get_reduced_shape(inputs.front(), axes)); virtual_inputs.push_back(get_reduced_shape(inputs.front(), axes));
virtual_inputs.push_back(get_output_shape(inputs.front(), axes)); virtual_inputs.push_back(get_output_shape(inputs.front(), axes));
virtual_inputs = reduce_dims(virtual_inputs); virtual_inputs = reduce_dims(normalize_permutation(virtual_inputs));
auto reduce_output_shape = virtual_inputs.back(); auto reduce_output_shape = virtual_inputs.back();
virtual_inputs.pop_back(); virtual_inputs.pop_back();
auto reduction_shape = virtual_inputs.back(); auto reduction_shape = virtual_inputs.back();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment