Commit 4a39a0f7 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into add-conv_bn_add-test

parents 5564172e bb827865
...@@ -31,7 +31,7 @@ struct dnnl_layernorm : dnnl_op<dnnl_layernorm, dnnl::layer_normalization_forwar ...@@ -31,7 +31,7 @@ struct dnnl_layernorm : dnnl_op<dnnl_layernorm, dnnl::layer_normalization_forwar
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
return {dnnl::prop_kind::forward_inference, return {dnnl::prop_kind::forward_inference,
m.at(DNNL_ARG_SRC), m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
1e-12f, 1e-12f,
dnnl::normalization_flags::none}; dnnl::normalization_flags::none};
} }
......
...@@ -12,7 +12,7 @@ struct dnnl_logsoftmax : dnnl_extend_op<dnnl_logsoftmax, dnnl::logsoftmax_forwar ...@@ -12,7 +12,7 @@ struct dnnl_logsoftmax : dnnl_extend_op<dnnl_logsoftmax, dnnl::logsoftmax_forwar
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
int axis = this->op.axis; int axis = this->op.axis;
return {dnnl::prop_kind::forward_inference, m.at(DNNL_ARG_SRC_0), axis}; return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
} }
}; };
......
...@@ -66,7 +66,10 @@ struct cpu_im2col ...@@ -66,7 +66,10 @@ struct cpu_im2col
} }
static std::string name() { return "cpu::im2col"; } static std::string name() { return "cpu::im2col"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const
{
return op.normalize_compute_shape(inputs);
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
...@@ -389,8 +392,10 @@ struct cpu_apply ...@@ -389,8 +392,10 @@ struct cpu_apply
extend_op("concat", "dnnl::concat"); extend_op("concat", "dnnl::concat");
extend_op("contiguous", "dnnl::reorder"); extend_op("contiguous", "dnnl::reorder");
extend_op("convolution", "dnnl::convolution"); extend_op("convolution", "dnnl::convolution");
#ifndef MIGRAPHX_ENABLE_ZENDNN
extend_op("deconvolution", "dnnl::deconvolution"); extend_op("deconvolution", "dnnl::deconvolution");
extend_op("dot", "dnnl::dot"); extend_op("dot", "dnnl::dot");
#endif
extend_op("erf", "cpu::erf"); extend_op("erf", "cpu::erf");
extend_op("gather", "cpu::gather"); extend_op("gather", "cpu::gather");
extend_op("logsoftmax", "dnnl::logsoftmax"); extend_op("logsoftmax", "dnnl::logsoftmax");
...@@ -437,7 +442,7 @@ struct cpu_apply ...@@ -437,7 +442,7 @@ struct cpu_apply
} }
} }
instruction_ref apply_pow(instruction_ref ins) instruction_ref apply_pow(instruction_ref ins) const
{ {
auto beta = read_scalar<float>(ins->inputs()[1]); auto beta = read_scalar<float>(ins->inputs()[1]);
if(beta.empty()) if(beta.empty())
...@@ -448,7 +453,7 @@ struct cpu_apply ...@@ -448,7 +453,7 @@ struct cpu_apply
{ins->inputs().front()}); {ins->inputs().front()});
} }
instruction_ref apply_pooling(instruction_ref ins) instruction_ref apply_pooling(instruction_ref ins) const
{ {
auto&& op = ins->get_operator(); auto&& op = ins->get_operator();
auto v = op.to_value(); auto v = op.to_value();
...@@ -476,30 +481,20 @@ struct cpu_apply ...@@ -476,30 +481,20 @@ struct cpu_apply
return {r.at<T>()}; return {r.at<T>()};
} }
instruction_ref replace(instruction_ref ins, const operation& op) instruction_ref replace(instruction_ref ins, const operation& op) const
{ {
return replace(ins, op, ins->inputs()); return replace(ins, op, ins->inputs());
} }
instruction_ref instruction_ref
replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
{ {
inputs.push_back(insert_allocation(ins, ins->get_shape())); inputs.push_back(insert_allocation(ins, ins->get_shape()));
return modl->replace_instruction(ins, op, inputs); return modl->replace_instruction(ins, op, inputs);
} }
instruction_ref insert_allocation(instruction_ref ins, const shape& s) instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
{ {
auto ins_alias = instruction::get_output_alias(ins);
if(last->name() == "@return" and prog_output_names.count(ins_alias) > 0)
{
return modl->add_parameter(prog_output_names[ins_alias], s);
}
else if(ins == last)
{
return modl->add_parameter("output", s);
}
return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}})); return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
} }
}; };
......
...@@ -12,7 +12,7 @@ struct dnnl_lrn : dnnl_extend_op<dnnl_lrn, dnnl::lrn_forward, op::lrn> ...@@ -12,7 +12,7 @@ struct dnnl_lrn : dnnl_extend_op<dnnl_lrn, dnnl::lrn_forward, op::lrn>
{ {
return {dnnl::prop_kind::forward_inference, return {dnnl::prop_kind::forward_inference,
dnnl::algorithm::lrn_across_channels, dnnl::algorithm::lrn_across_channels,
m.at(DNNL_ARG_SRC_0), m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)),
this->op.size, this->op.size,
this->op.alpha, this->op.alpha,
this->op.beta, this->op.beta,
......
...@@ -63,7 +63,7 @@ struct cpu_pooling : auto_register_op<cpu_pooling<Op>> ...@@ -63,7 +63,7 @@ struct cpu_pooling : auto_register_op<cpu_pooling<Op>>
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
inputs.pop_back(); inputs.pop_back();
return op.compute_shape(inputs); return op.normalize_compute_shape(inputs);
} }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
...@@ -125,19 +125,22 @@ template struct cpu_pooling<max_pool>; ...@@ -125,19 +125,22 @@ template struct cpu_pooling<max_pool>;
struct dnnl_pooling : dnnl_extend_op<dnnl_pooling, dnnl::pooling_forward, op::pooling> struct dnnl_pooling : dnnl_extend_op<dnnl_pooling, dnnl::pooling_forward, op::pooling>
{ {
std::vector<int> arg_map(int) const { return {DNNL_ARG_SRC}; } std::vector<int> arg_map(int) const { return {MIGRAPHX_DNNL_PREFIX(ARG_SRC)}; }
dnnl::pooling_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const dnnl::pooling_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
auto algo = op.mode == "max" ? dnnl::algorithm::pooling_max : dnnl::algorithm::pooling_avg; auto algo = op.mode == "max" ? dnnl::algorithm::pooling_max : dnnl::algorithm::pooling_avg;
auto kdims = op.kdims();
std::vector<size_t> padding_l(op.padding.begin(), op.padding.begin() + kdims);
std::vector<size_t> padding_r(op.padding.begin() + kdims, op.padding.end());
return {dnnl::prop_kind::forward_inference, return {dnnl::prop_kind::forward_inference,
algo, algo,
m.at(DNNL_ARG_SRC), m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
m.at(DNNL_ARG_DST), m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
to_dnnl_dims(op.stride), to_dnnl_dims(op.stride),
to_dnnl_dims(op.lengths), to_dnnl_dims(op.lengths),
to_dnnl_dims(op.padding), to_dnnl_dims(padding_l),
to_dnnl_dims(op.padding)}; to_dnnl_dims(padding_r)};
} }
}; };
......
#include <migraphx/config.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/context.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
struct cpu_preallocate : auto_register_op<cpu_preallocate>
{
shape s;
std::string id = "";
argument data;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.s, "shape"), f(self.id, "id"));
}
std::string name() const { return "cpu::preallocate"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(0);
return s;
}
argument compute(context&, const shape&, const std::vector<argument>&) const { return data; }
void finalize(context&, const shape&, const std::vector<shape>&) { data = argument(s); }
lifetime get_lifetime() const { return lifetime::global; }
};
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -37,7 +37,11 @@ struct dnnl_reduction : dnnl_op<dnnl_reduction, dnnl::reduction> ...@@ -37,7 +37,11 @@ struct dnnl_reduction : dnnl_op<dnnl_reduction, dnnl::reduction>
dnnl::reduction::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const dnnl::reduction::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
return {to_dnnl_algo(algo), m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST), 0, 0}; return {to_dnnl_algo(algo),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
0,
0};
} }
}; };
......
...@@ -27,7 +27,7 @@ struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder> ...@@ -27,7 +27,7 @@ struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder>
}; };
desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
return {m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST)}; return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))};
} }
auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const
......
...@@ -11,7 +11,7 @@ struct dnnl_softmax : dnnl_extend_op<dnnl_softmax, dnnl::softmax_forward, op::so ...@@ -11,7 +11,7 @@ struct dnnl_softmax : dnnl_extend_op<dnnl_softmax, dnnl::softmax_forward, op::so
dnnl::softmax_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const dnnl::softmax_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{ {
int axis = this->op.axis; int axis = this->op.axis;
return {dnnl::prop_kind::forward_inference, m.at(DNNL_ARG_SRC_0), axis}; return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
} }
}; };
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include <migraphx/check_context.hpp> #include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp> #include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp> #include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp> #include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp> #include <migraphx/eliminate_concat.hpp>
...@@ -14,14 +13,16 @@ ...@@ -14,14 +13,16 @@
#include <migraphx/memory_coloring.hpp> #include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp> #include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp> #include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp> #include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp> #include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/cpu/fuse_ops.hpp> #include <migraphx/cpu/fuse_ops.hpp>
#include <migraphx/cpu/write_literals.hpp> #include <migraphx/cpu/write_literals.hpp>
#include <migraphx/cpu/allocation_model.hpp> #include <migraphx/cpu/allocation_model.hpp>
...@@ -45,9 +46,9 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -45,9 +46,9 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end()); std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end());
unsupported_types.erase(shape::type_t::float_type); unsupported_types.erase(shape::type_t::float_type);
return {normalize_ops{}, return {normalize_ops{},
eliminate_data_type{unsupported_types, shape::type_t::float_type}, rewrite_quantization{},
dead_code_elimination{}, dead_code_elimination{},
decompose{}, eliminate_data_type{unsupported_types, shape::type_t::float_type},
dead_code_elimination{}, dead_code_elimination{},
simplify_reshapes{}, simplify_reshapes{},
eliminate_identity{}, eliminate_identity{},
...@@ -76,6 +77,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -76,6 +77,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
write_literals{}, write_literals{},
dead_code_elimination{}, dead_code_elimination{},
memory_coloring{"cpu::allocate"}, memory_coloring{"cpu::allocate"},
dead_code_elimination{},
preallocate_param{"scratch", cpu_allocation_model{}},
dead_code_elimination{}}; dead_code_elimination{}};
} }
......
...@@ -41,6 +41,7 @@ add_library(migraphx_device ...@@ -41,6 +41,7 @@ add_library(migraphx_device
device/equal.cpp device/equal.cpp
device/erf.cpp device/erf.cpp
device/exp.cpp device/exp.cpp
device/fill.cpp
device/floor.cpp device/floor.cpp
device/gather.cpp device/gather.cpp
device/gelu.cpp device/gelu.cpp
...@@ -58,9 +59,12 @@ add_library(migraphx_device ...@@ -58,9 +59,12 @@ add_library(migraphx_device
device/mul.cpp device/mul.cpp
device/mul_add.cpp device/mul_add.cpp
device/mul_add_relu.cpp device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp device/pad.cpp
device/pow.cpp device/pow.cpp
device/prelu.cpp device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp device/recip.cpp
device/reduce_max.cpp device/reduce_max.cpp
device/reduce_mean.cpp device/reduce_mean.cpp
...@@ -68,9 +72,11 @@ add_library(migraphx_device ...@@ -68,9 +72,11 @@ add_library(migraphx_device
device/reduce_sum.cpp device/reduce_sum.cpp
device/reduce_prod.cpp device/reduce_prod.cpp
device/relu.cpp device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp device/rnn_variable_seq_lens.cpp
device/round.cpp device/round.cpp
device/rsqrt.cpp device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp device/sigmoid.cpp
device/sign.cpp device/sign.cpp
device/sin.cpp device/sin.cpp
...@@ -81,7 +87,9 @@ add_library(migraphx_device ...@@ -81,7 +87,9 @@ add_library(migraphx_device
device/sub.cpp device/sub.cpp
device/tan.cpp device/tan.cpp
device/tanh.cpp device/tanh.cpp
device/topk.cpp
device/unary_not.cpp device/unary_not.cpp
device/where.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION}) rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
...@@ -116,10 +124,12 @@ add_library(migraphx_gpu ...@@ -116,10 +124,12 @@ add_library(migraphx_gpu
code_object_op.cpp code_object_op.cpp
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compile_pointwise.cpp
concat.cpp concat.cpp
convert.cpp convert.cpp
convolution.cpp convolution.cpp
deconvolution.cpp deconvolution.cpp
device_name.cpp
eliminate_workspace.cpp eliminate_workspace.cpp
elu.cpp elu.cpp
fuse_ops.cpp fuse_ops.cpp
...@@ -131,21 +141,26 @@ add_library(migraphx_gpu ...@@ -131,21 +141,26 @@ add_library(migraphx_gpu
kernel.cpp kernel.cpp
lowering.cpp lowering.cpp
logsoftmax.cpp logsoftmax.cpp
loop.cpp
lrn.cpp lrn.cpp
leaky_relu.cpp leaky_relu.cpp
mlir_conv.cpp mlir_conv.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp pack_args.cpp
pack_int8_args.cpp pack_int8_args.cpp
pad.cpp pad.cpp
pooling.cpp pooling.cpp
preallocate_param.cpp
quant_convolution.cpp quant_convolution.cpp
reverse.cpp
rnn_variable_seq_lens.cpp rnn_variable_seq_lens.cpp
rocblas.cpp rocblas.cpp
softmax.cpp scatter.cpp
schedule_model.cpp schedule_model.cpp
softmax.cpp
sync_device.cpp sync_device.cpp
target.cpp target.cpp
topk.cpp
write_literals.cpp write_literals.cpp
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
...@@ -184,12 +199,16 @@ register_migraphx_gpu_ops(hip_ ...@@ -184,12 +199,16 @@ register_migraphx_gpu_ops(hip_
logical_and logical_and
logical_or logical_or
logical_xor logical_xor
loop
max max
min min
mul mul
multinomial
nonzero
pad pad
pow pow
prelu prelu
prefix_scan_sum
recip recip
reduce_max reduce_max
reduce_mean reduce_mean
...@@ -197,8 +216,10 @@ register_migraphx_gpu_ops(hip_ ...@@ -197,8 +216,10 @@ register_migraphx_gpu_ops(hip_
reduce_prod reduce_prod
reduce_sum reduce_sum
relu relu
reverse
round round
rsqrt rsqrt
scatter
sigmoid sigmoid
sign sign
sinh sinh
...@@ -209,7 +230,9 @@ register_migraphx_gpu_ops(hip_ ...@@ -209,7 +230,9 @@ register_migraphx_gpu_ops(hip_
sub sub
tanh tanh
tan tan
topk
unary_not unary_not
where
) )
register_migraphx_gpu_ops(miopen_ register_migraphx_gpu_ops(miopen_
abs abs
...@@ -275,19 +298,27 @@ if(MIGRAPHX_ENABLE_MLIR) ...@@ -275,19 +298,27 @@ if(MIGRAPHX_ENABLE_MLIR)
target_link_libraries(migraphx_gpu PUBLIC ${LIBMLIRMIOPEN}) target_link_libraries(migraphx_gpu PUBLIC ${LIBMLIRMIOPEN})
endif() endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
if(MIGRAPHX_USE_HIPRTC)
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1)
else()
# Get flags needed to compile hip # Get flags needed to compile hip
include(TargetFlags) include(TargetFlags)
target_flags(HIP_COMPILER_FLAGS hip::device) target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags # Remove cuda arch flags
string(REGEX REPLACE "--cuda-gpu-arch=[^ \t\r\n]+" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE "--offload-arch=[^ \t\r\n]+" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REPLACE "$<LINK_LANGUAGE:CXX>" "1" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REPLACE "SHELL:" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}") message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}" "-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}" "-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_OFFLOADBUNDLER_BIN=${MIGRAPHX_OFFLOADBUNDLER_BIN}" "-DMIGRAPHX_OFFLOADBUNDLER_BIN=${MIGRAPHX_OFFLOADBUNDLER_BIN}"
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}" "-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}"
"-DMIGRAPHX_USE_HIPRTC=0"
) )
endif()
# Check miopen find mode api # Check miopen find mode api
include(CheckLibraryExists) include(CheckLibraryExists)
...@@ -305,6 +336,8 @@ target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1) ...@@ -305,6 +336,8 @@ target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas) target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_gpu migraphx_device TARGETS migraphx_gpu migraphx_device
INCLUDE INCLUDE
......
...@@ -11,6 +11,11 @@ operation gpu_allocation_model::allocate(const shape& s) const ...@@ -11,6 +11,11 @@ operation gpu_allocation_model::allocate(const shape& s) const
return make_op(name(), {{"shape", to_value(s)}}); return make_op(name(), {{"shape", to_value(s)}});
} }
operation gpu_allocation_model::preallocate(const shape& s, const std::string& id) const
{
return make_op("hip::hip_allocate_memory", {{"shape", to_value(s)}, {"id", id}});
}
std::string gpu_allocation_model::copy() const { return "hip::copy"; } std::string gpu_allocation_model::copy() const { return "hip::copy"; }
} // namespace gpu } // namespace gpu
......
File mode changed from 100644 to 100755
#include <migraphx/gpu/compile_hip.hpp> #include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/errors.hpp> #include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
#if MIGRAPHX_USE_HIPRTC
#include <hip/hiprtc.h>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/env.hpp>
#else
#include <migraphx/compile_src.hpp> #include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp> #include <migraphx/process.hpp>
#include <cassert> #endif
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
#if MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC)
std::string hiprtc_error(hiprtcResult err, const std::string& msg)
{
return "hiprtc: " + (hiprtcGetErrorString(err) + (": " + msg));
}
void hiprtc_check_error(hiprtcResult err, const std::string& msg, const std::string& ctx)
{
if(err != HIPRTC_SUCCESS)
throw make_exception(ctx, hiprtc_error(err, msg));
}
#define MIGRAPHX_HIPRTC(...) \
hiprtc_check_error(__VA_ARGS__, #__VA_ARGS__, MIGRAPHX_MAKE_SOURCE_CTX())
#define MIGRAPHX_HIPRTC_THROW(error, msg) MIGRAPHX_THROW(hiprtc_error(error, msg))
// Workaround hiprtc's broken API
void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); }
using hiprtc_program_ptr = MIGRAPHX_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy);
template <class... Ts>
hiprtc_program_ptr hiprtc_program_create(Ts... xs)
{
hiprtcProgram prog = nullptr;
auto result = hiprtcCreateProgram(&prog, xs...);
hiprtc_program_ptr p{prog};
if(result != HIPRTC_SUCCESS)
MIGRAPHX_HIPRTC_THROW(result, "Create program failed.");
return p;
}
struct hiprtc_program
{
struct string_array
{
std::vector<std::string> strings{};
std::vector<const char*> c_strs{};
string_array() {}
string_array(const string_array&) = delete;
std::size_t size() const { return strings.size(); }
const char** data() { return c_strs.data(); }
void push_back(std::string s)
{
strings.push_back(std::move(s));
c_strs.push_back(strings.back().c_str());
}
};
hiprtc_program_ptr prog = nullptr;
string_array headers{};
string_array include_names{};
std::string cpp_src = "";
std::string cpp_name = "";
hiprtc_program(const std::vector<src_file>& srcs)
{
for(auto&& src : srcs)
{
std::string content{src.content.first, src.content.second};
std::string path = src.path.string();
if(src.path.extension().string() == ".cpp")
{
cpp_src = std::move(content);
cpp_name = std::move(path);
}
else
{
headers.push_back(std::move(content));
include_names.push_back(std::move(path));
}
}
prog = hiprtc_program_create(cpp_src.c_str(),
cpp_name.c_str(),
headers.size(),
headers.data(),
include_names.data());
}
void compile(const std::vector<std::string>& options)
{
if(enabled(MIGRAPHX_TRACE_HIPRTC{}))
std::cout << "hiprtc " << join_strings(options, " ") << " " << cpp_name << std::endl;
std::vector<const char*> c_options;
std::transform(options.begin(),
options.end(),
std::back_inserter(c_options),
[](const std::string& s) { return s.c_str(); });
auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data());
std::cerr << log() << std::endl;
if(result != HIPRTC_SUCCESS)
MIGRAPHX_HIPRTC_THROW(result, "Compilation failed.");
}
std::string log()
{
std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n));
if(n < 2)
return {};
std::vector<char> buffer(n);
MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data()));
assert(buffer.back() == 0);
return {buffer.begin(), buffer.end() - 1};
}
std::vector<char> get_code_obj()
{
std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n));
std::vector<char> buffer(n);
MIGRAPHX_HIPRTC(hiprtcGetCode(prog.get(), buffer.data()));
return buffer;
}
};
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{
hiprtc_program prog(srcs);
auto options = split_string(params, ' ');
if(enabled(MIGRAPHX_GPU_DEBUG{}))
options.push_back("-DMIGRAPHX_DEBUG");
if(std::none_of(options.begin(), options.end(), [](const std::string& s) {
return starts_with(s, "--std=") or starts_with(s, "-std=");
}))
options.push_back("-std=c++17");
options.push_back("-fno-gpu-rdc");
options.push_back(" -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3"));
options.push_back("-Wno-cuda-compat");
options.push_back("--cuda-gpu-arch=" + arch);
prog.compile(options);
return {prog.get_code_obj()};
}
#else // MIGRAPHX_USE_HIPRTC
bool is_hcc_compiler() bool is_hcc_compiler()
{ {
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "hcc"); static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "hcc");
...@@ -41,9 +197,12 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -41,9 +197,12 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{ {
params += " --cuda-gpu-arch=" + arch; params += " --cuda-gpu-arch=" + arch;
params += " --cuda-device-only"; params += " --cuda-device-only";
params += " -O3 "; params += " -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ";
} }
if(enabled(MIGRAPHX_GPU_DEBUG{}))
params += " -DMIGRAPHX_DEBUG";
params += " -Wno-unused-command-line-argument -Wno-cuda-compat "; params += " -Wno-unused-command-line-argument -Wno-cuda-compat ";
params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS); params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS);
...@@ -71,6 +230,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -71,6 +230,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return {compiler.compile(srcs)}; return {compiler.compile(srcs)};
} }
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#include <migraphx/gpu/compile_hip.hpp> #include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/code_object_op.hpp> #include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/context.hpp> #include <migraphx/context.hpp>
#include <migraphx_kernels.hpp> #include <migraphx_kernels.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
...@@ -12,36 +12,6 @@ namespace migraphx { ...@@ -12,36 +12,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
template <class HipDeviceProp>
std::string get_arch_name(rank<0>, const HipDeviceProp& props)
{
return "gfx" + std::to_string(props.gcnArch);
}
template <class HipDeviceProp>
auto get_arch_name(rank<1>, const HipDeviceProp& props) -> decltype(std::string(props.gcnArchName))
{
return std::string(props.gcnArchName);
}
int get_device_id()
{
int device;
auto status = hipGetDevice(&device);
if(status != hipSuccess)
MIGRAPHX_THROW("No device");
return device;
}
std::string get_device_name()
{
hipDeviceProp_t props{};
auto status = hipGetDeviceProperties(&props, get_device_id());
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to get device properties");
return get_arch_name(rank<1>{}, props);
}
template <class T> template <class T>
std::string generate_index_ints(const std::vector<T>& v) std::string generate_index_ints(const std::vector<T>& v)
{ {
...@@ -98,6 +68,31 @@ __content__ ...@@ -98,6 +68,31 @@ __content__
return replace_string(args_hpp, "__content__", inner); return replace_string(args_hpp, "__content__", inner);
} }
const std::vector<std::string>& compiler_warnings()
{
static std::vector<std::string> warnings = {"-Weverything",
"-Wno-c++98-compat",
"-Wno-c++98-compat-pedantic",
"-Wno-conversion",
"-Wno-double-promotion",
"-Wno-exit-time-destructors",
"-Wno-extra-semi",
"-Wno-extra-semi-stmt",
"-Wno-float-conversion",
"-Wno-gnu-anonymous-struct",
"-Wno-gnu-zero-variadic-macro-arguments",
"-Wno-missing-prototypes",
"-Wno-nested-anon-types",
"-Wno-padded",
"-Wno-shorten-64-to-32",
"-Wno-sign-conversion",
"-Wno-sign-compare",
"-Wno-unused-command-line-argument",
"-Wno-weak-vtables",
"-Wno-c99-extensions"};
return warnings;
}
operation compile_hip_code_object(const std::string& content, hip_compile_options options) operation compile_hip_code_object(const std::string& content, hip_compile_options options)
{ {
std::vector<src_file> srcs; std::vector<src_file> srcs;
...@@ -112,10 +107,14 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -112,10 +107,14 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
}); });
srcs.push_back(src_file{fs::path{"main.cpp"}, srcs.push_back(src_file{fs::path{"main.cpp"},
std::make_pair(content.data(), content.data() + content.size())}); std::make_pair(content.data(), content.data() + content.size())});
auto args_hpp = generate_args_hpp(options.inputs); auto args_hpp =
generate_args_hpp(options.reduced_inputs.empty() ? options.inputs : options.reduced_inputs);
srcs.push_back(src_file{fs::path{"args.hpp"}, srcs.push_back(src_file{fs::path{"args.hpp"},
std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())}); std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())});
options.params += " -I."; options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " " + join_strings(compiler_warnings(), " ");
options.params += " -Werror";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name()); auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name());
if(cos.size() != 1) if(cos.size() != 1)
MIGRAPHX_THROW("No code object"); MIGRAPHX_THROW("No code object");
......
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
using namespace migraphx;
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
int main() {}
)__migraphx__";
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local = 1024)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
{
hip_compile_options options;
options.global = compute_global(inputs.front().elements());
options.local = 1024;
options.inputs = inputs;
options.output = inputs.back();
options.reduced_inputs = reduce_dims(inputs);
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", lambda}});
return compile_hip_code_object(src, options);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -11,7 +11,7 @@ shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const ...@@ -11,7 +11,7 @@ shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
check_shapes{inputs, *this}.has(4).standard(); check_shapes{inputs, *this}.has(4).standard();
std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2); std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2);
check_shapes{conv_inputs, *this}.max_ndims(5); check_shapes{conv_inputs, *this}.max_ndims(5);
return op.compute_shape(conv_inputs); return op.normalize_compute_shape(conv_inputs);
} }
inline shape reshape_if_1d(const shape& input) inline shape reshape_if_1d(const shape& input)
......
#include <migraphx/gpu/device/fill.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void fill(hipStream_t stream, const argument& result, unsigned long val)
{
nary(stream, result)([=]() __device__ { return val; });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class... Ts>
using common_type = typename std::common_type<Ts...>::type;
template <class T, MIGRAPHX_REQUIRES(is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return std::isfinite(x) and std::isfinite(y) and
std::nextafter(x, std::numeric_limits<T>::lowest()) <= y and
std::nextafter(x, std::numeric_limits<T>::max()) >= y;
}
template <class T, MIGRAPHX_REQUIRES(not is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return x == y;
}
template <class T, class U>
__device__ bool float_equal(T x, U y)
{
return float_equal_device<common_type<T, U>>(x, y);
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -352,7 +352,8 @@ bool broadcastable(bool& divisible_by_4, ...@@ -352,7 +352,8 @@ bool broadcastable(bool& divisible_by_4,
auto b_len = result.get_shape().lens()[b_idx]; auto b_len = result.get_shape().lens()[b_idx];
auto b_stride = result.get_shape().strides()[b_idx]; auto b_stride = result.get_shape().strides()[b_idx];
assert(bshape.lens()[b_idx] == b_len); assert(bshape.lens()[b_idx] == b_len);
if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero)) if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero) and
is_divisor_encodable(b_stride * b_len))
{ {
divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment