"examples/vscode:/vscode.git/clone" did not exist on "6c5f0de7139ee538536478146ff6b3ac5c4ed055"
Commit 4f63c691 authored by charlie's avatar charlie
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_ref_broadcast

parents f02f5d98 e19f78ae
...@@ -268,7 +268,9 @@ jobs: ...@@ -268,7 +268,9 @@ jobs:
lcov --directory . --capture --output-file $(pwd)/coverage.info lcov --directory . --capture --output-file $(pwd)/coverage.info
lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
lcov --list $(pwd)/coverage.info lcov --list $(pwd)/coverage.info
curl -s https://codecov.io/bash | bash curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov -t ${CODECOV_TOKEN}
echo "Uploaded" echo "Uploaded"
linux-fpga: linux-fpga:
...@@ -364,5 +366,7 @@ jobs: ...@@ -364,5 +366,7 @@ jobs:
# lcov --directory . --capture --output-file $(pwd)/coverage.info # lcov --directory . --capture --output-file $(pwd)/coverage.info
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info # lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
# lcov --list $(pwd)/coverage.info # lcov --list $(pwd)/coverage.info
# curl -s https://codecov.io/bash | bash # curl -Os https://uploader.codecov.io/latest/linux/codecov
# echo "Uploaded" # chmod +x codecov
\ No newline at end of file # ./codecov -t ${CODECOV_TOKEN}
# echo "Uploaded"
...@@ -26,6 +26,8 @@ on: ...@@ -26,6 +26,8 @@ on:
required: true required: true
default: '-s' default: '-s'
concurrency: "perftest-${{ github.head_ref || github.base_ref || 'schedule' }}"
jobs: jobs:
release: release:
uses: rocmsoftwareplatform/migraphx-benchmark/.github/workflows/perf-test.yml@main uses: rocmsoftwareplatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
......
...@@ -212,6 +212,7 @@ rocm_enable_cppcheck( ...@@ -212,6 +212,7 @@ rocm_enable_cppcheck(
ConfigurationNotChecked ConfigurationNotChecked
unmatchedSuppression unmatchedSuppression
unusedFunction unusedFunction
ctuPointerArith
noExplicitConstructor noExplicitConstructor
passedByValue passedByValue
unusedStructMember unusedStructMember
......
...@@ -33,7 +33,7 @@ def rocmtestnode(Map conf) { ...@@ -33,7 +33,7 @@ def rocmtestnode(Map conf) {
} }
} }
node(name) { node(name) {
withEnv(['HSA_ENABLE_SDMA=0', 'MIOPEN_DEBUG_GCN_ASM_KERNELS=0']) { withEnv(['HSA_ENABLE_SDMA=0']) {
stage("checkout ${variant}") { stage("checkout ${variant}") {
checkout scm checkout scm
} }
......
...@@ -25,6 +25,6 @@ pfultz2/rocm-recipes ...@@ -25,6 +25,6 @@ pfultz2/rocm-recipes
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ccache@v4.1 ccache@v4.1
pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
danmar/cppcheck@2.8 -DHAVE_RULES=1 danmar/cppcheck@2.9 -DHAVE_RULES=1
RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build
-f requirements.txt -f requirements.txt
...@@ -50,8 +50,8 @@ struct layernorm_matcher ...@@ -50,8 +50,8 @@ struct layernorm_matcher
{ {
return f("div")(arg(0)(x_minus_mean()), return f("div")(arg(0)(x_minus_mean()),
arg(1)(skip_broadcasts(f("sqrt")( arg(1)(skip_broadcasts(f("sqrt")(arg(0)(
arg(0)(f("add")(either_arg(0, 1)(variance(), has_value(1e-12f)))))))); f("add")(either_arg(0, 1)(variance(), is_constant().bind("eps"))))))));
} }
auto matcher() const { return layernorm_onnx(); } auto matcher() const { return layernorm_onnx(); }
......
...@@ -40,7 +40,6 @@ struct fmod : binary<fmod> ...@@ -40,7 +40,6 @@ struct fmod : binary<fmod>
a["commutative"] = false; a["commutative"] = false;
return a; return a;
} }
std::string point_function() const { return "fmod"; }
auto apply() const auto apply() const
{ {
return [](auto x, auto y) { return std::fmod(x, y); }; return [](auto x, auto y) { return std::fmod(x, y); };
......
...@@ -38,9 +38,9 @@ struct mod : binary<mod> ...@@ -38,9 +38,9 @@ struct mod : binary<mod>
{ {
auto a = base_attributes(); auto a = base_attributes();
a["commutative"] = false; a["commutative"] = false;
a["point_op"] = "${function:fmod}((${function:remainder}(${0}, ${1})) + ${1}, ${1})";
return a; return a;
} }
std::string point_function() const { return "mod"; }
auto apply() const auto apply() const
{ {
return [](auto x, auto y) { return std::fmod((std::remainder(x, y)) + y, y); }; return [](auto x, auto y) { return std::fmod((std::remainder(x, y)) + y, y); };
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <algorithm> #include <algorithm>
#include <migraphx/rank.hpp> #include <migraphx/rank.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <vector>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r) ...@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r)
namespace detail { namespace detail {
inline void stream_write_value_impl(rank<2>, std::ostream& os, const std::string& x) { os << x; } template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(os << x, void())
{
os << x;
}
template <class Range> template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const Range& r) void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
-> decltype(r.begin(), r.end(), void())
{ {
os << "{"; os << "{";
os << stream_range(r); os << stream_range(r);
os << "}"; os << "}";
} }
template <class T> template <class Range>
void stream_write_value_impl(rank<0>, std::ostream& os, const T& x) auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
-> decltype(r.begin(), r.end(), void())
{ {
os << x; os << "{";
os << stream_range(r);
os << "}";
} }
} // namespace detail } // namespace detail
template <class T> template <class T>
void stream_write_value(std::ostream& os, const T& x) void stream_write_value(std::ostream& os, const T& x)
{ {
detail::stream_write_value_impl(rank<2>{}, os, x); detail::stream_write_value_impl(rank<1>{}, os, x);
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -184,6 +184,12 @@ struct value ...@@ -184,6 +184,12 @@ struct value
{ {
} }
explicit binary(std::size_t s) : base(s) {} explicit binary(std::size_t s) : base(s) {}
friend std::ostream& operator<<(std::ostream& os, const binary& obj)
{
os << "{binary_object: " << obj.size() << "}";
return os;
}
}; };
value() = default; value() = default;
......
...@@ -385,9 +385,13 @@ instruction_ref module::move_instruction(instruction_ref src, instruction_ref ds ...@@ -385,9 +385,13 @@ instruction_ref module::move_instruction(instruction_ref src, instruction_ref ds
instruction_ref module::move_instructions(instruction_ref src, instruction_ref dst) instruction_ref module::move_instructions(instruction_ref src, instruction_ref dst)
{ {
this->move_instruction(src, dst);
for(auto ins : src->inputs()) for(auto ins : src->inputs())
this->move_instruction(ins, src); {
if(not contains(this->impl->instructions, ins))
continue;
this->move_instructions(ins, dst);
}
this->move_instruction(src, dst);
return src; return src;
} }
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include <migraphx/onnx/op_parser.hpp> #include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/op/batch_norm_inference.hpp> #include <migraphx/instruction.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm> ...@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref parse(const op_desc& /*opd*/, instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser, const onnx_parser& parser,
onnx_parser::node_info info, const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const std::vector<instruction_ref> args) const
{ {
float epsilon = 1e-5f; float epsilon = 1e-5f;
float momentum = 0.9f;
op::batch_norm_inference::bn_infer_mode_t bn_mode = op::batch_norm_inference::spatial;
if(contains(info.attributes, "epsilon")) if(contains(info.attributes, "epsilon"))
{ {
epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>(); epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
} }
if(contains(info.attributes, "momentum")) auto x_lens = args[0]->get_shape().lens();
auto x_type = args[0]->get_shape().type();
if(std::any_of(args.cbegin() + 1, args.cend(), [](auto a) {
return a->get_shape().lens().size() != 1;
}))
{
MIGRAPHX_THROW("PARSE_BATCHNORM: argument scale, bias, mean, or var rank != 1");
}
if(x_lens.size() == 1)
{
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto n0 = info.add_broadcastable_binary_op("sub", args[0], args[3]);
auto d0 = info.add_broadcastable_binary_op("add", args[4], eps);
auto d1 = info.add_broadcastable_binary_op("pow", d0, rt);
auto div0 = info.add_broadcastable_binary_op("div", n0, d1);
auto r0 = info.add_broadcastable_binary_op("mul", div0, args[1]);
return info.add_broadcastable_binary_op("add", r0, args[2]);
}
else if(x_lens.size() > 2)
{ {
momentum = parser.parse_value(info.attributes.at("momentum")).at<float>(); // unsqueeze tensors of shape (C) to broadcast correctly
std::vector<int64_t> unsqueeze_axes(x_lens.size() - 2);
std::iota(unsqueeze_axes.begin(), unsqueeze_axes.end(), 1);
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto scale_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[1]);
auto bias_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[2]);
auto mean_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[3]);
auto var_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[4]);
auto numer = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto denom = info.add_broadcastable_binary_op("pow", var_eps, rt);
auto div0 = info.add_broadcastable_binary_op("div", numer, denom);
auto r0 = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze);
return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
} }
if(contains(info.attributes, "spatial")) else
{ {
bn_mode = (parser.parse_value(info.attributes.at("spatial")).at<uint64_t>() > 0) // num dims either 0 or 2
? op::batch_norm_inference::spatial MIGRAPHX_THROW("PARSE_BATCHNORM: rank " + std::to_string(x_lens.size()) +
: op::batch_norm_inference::per_activation; " input tensor, unhandled data format");
} }
op::batch_norm_inference op{epsilon, momentum, bn_mode};
return info.add_instruction(op, args);
} }
}; };
......
...@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval) ...@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if(conflict_table.find(vn) != conflict_table.end()) if(conflict_table.find(vn) != conflict_table.end())
{ {
std::set<int>& vn_set = conflict_table[vn]; const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set) for(const auto& iter : vn_set)
{ {
live_range* range = live_ranges[iter]; live_range* range = live_ranges[iter];
...@@ -267,8 +267,8 @@ void memory_coloring_impl::verify() ...@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{ {
for(int i = 0; i < num_of_lives; ++i) for(int i = 0; i < num_of_lives; ++i)
{ {
live_interval& interval = live_intervals[i]; const live_interval& interval = live_intervals[i];
live_range& segment = interval.segment; const live_range& segment = interval.segment;
if(segment.begin == invalid_offset) if(segment.begin == invalid_offset)
{ {
...@@ -284,7 +284,7 @@ void memory_coloring_impl::verify() ...@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int vn = segment.vn; int vn = segment.vn;
if(conflict_table.find(vn) != conflict_table.end()) if(conflict_table.find(vn) != conflict_table.end())
{ {
std::set<int>& vn_set = conflict_table[vn]; const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set) for(const auto& iter : vn_set)
{ {
live_range* range = live_ranges[iter]; live_range* range = live_ranges[iter];
...@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals() ...@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{ {
std::cout << " segment:" << i; std::cout << " segment:" << i;
std::cout << " =>"; std::cout << " =>";
std::set<int>& table = conflict_table[i]; const std::set<int>& table = conflict_table[i];
for(auto& iter : table) for(const auto& iter : table)
{ {
std::cout << (iter) << ","; std::cout << (iter) << ",";
} }
...@@ -357,7 +357,7 @@ void live_interval::dump() ...@@ -357,7 +357,7 @@ void live_interval::dump()
std::cout << "id:" << id; std::cout << "id:" << id;
segment.dump(); segment.dump();
std::cout << " uses:"; std::cout << " uses:";
for(auto& iter : use_points) for(const auto& iter : use_points)
{ {
std::cout << " " << get_ins_enum(iter) << ","; std::cout << " " << get_ins_enum(iter) << ",";
} }
......
...@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio ...@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes.insert(ps.begin(), ps.end()); name_shapes.insert(ps.begin(), ps.end());
} }
for(auto& pn : name_shapes) for(const auto& pn : name_shapes)
{ {
const auto& s = pn.second; const auto& s = pn.second;
instruction_ref output{}; instruction_ref output{};
......
...@@ -57,12 +57,14 @@ auto conv_const_weights() ...@@ -57,12 +57,14 @@ auto conv_const_weights()
auto reduction() { return match::name_contains("reduce"); } auto reduction() { return match::name_contains("reduce"); }
// conv(x, w) * a => conv(x, a * w)
struct find_mul_conv struct find_mul_conv
{ {
auto matcher() const auto matcher() const
{ {
return match::name("mul")(match::either_arg(0, 1)(conv_const_weights().bind("conv"), return match::name("mul")(
match::name("broadcast").bind("a"))); match::either_arg(0, 1)(conv_const_weights().bind("conv"),
match::name("broadcast", "multibroadcast").bind("a")));
} }
void apply(module& m, const match::matcher_result& r) const void apply(module& m, const match::matcher_result& r) const
...@@ -72,14 +74,35 @@ struct find_mul_conv ...@@ -72,14 +74,35 @@ struct find_mul_conv
auto a_ins = r.instructions["a"]; auto a_ins = r.instructions["a"];
auto w_ins = r.instructions["w"]; auto w_ins = r.instructions["w"];
auto broadcast_op = any_cast<op::broadcast>(a_ins->get_operator()); const auto& a_input_lens = a_ins->inputs().front()->get_shape().lens();
if(broadcast_op.axis != 1)
std::size_t num_not_one_dims = std::count_if(
a_input_lens.cbegin(), a_input_lens.cend(), [](auto dim) { return dim != 1; });
if(num_not_one_dims > 1)
return;
// check broadcasted along channels
const auto& a_lens = a_ins->get_shape().lens();
const auto& a_strides = a_ins->get_shape().strides();
auto is_broadcasted_axis = [](auto len, auto stride) { return len == 1 or stride == 0; };
if(a_strides.at(1) != 1)
return;
if(not is_broadcasted_axis(a_lens.front(), a_strides.front()))
return;
if(not std::equal(a_lens.begin() + 2,
a_lens.end(),
a_strides.begin() + 2,
a_strides.end(),
is_broadcasted_axis))
return; return;
auto sq = m.insert_instruction(ins, make_op("squeeze"), a_ins->inputs().front());
auto new_a = m.insert_instruction( auto new_a = m.insert_instruction(
ins, ins, make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}), sq);
make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}),
a_ins->inputs().front());
auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins); auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins);
auto new_conv = m.insert_instruction( auto new_conv = m.insert_instruction(
ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul); ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul);
...@@ -412,6 +435,24 @@ struct find_concat_op ...@@ -412,6 +435,24 @@ struct find_concat_op
} }
}; };
void move_instructions_back(module& m, instruction_ref pos, std::vector<instruction_ref> inss)
{
auto start = range(m.begin(), pos);
for(auto ins : iterator_for(start))
{
auto it = std::find(inss.begin(), inss.end(), ins);
if(it != inss.end())
inss.erase(it);
}
for(auto ins : inss)
{
if(not m.has_instruction(ins))
continue;
move_instructions_back(m, pos, ins->inputs());
m.move_instruction(ins, pos);
}
}
std::vector<instruction_ref> get_splits(instruction_ref ins) std::vector<instruction_ref> get_splits(instruction_ref ins)
{ {
std::vector<instruction_ref> result; std::vector<instruction_ref> result;
...@@ -587,8 +628,7 @@ struct find_splits ...@@ -587,8 +628,7 @@ struct find_splits
})) }))
return; return;
for(auto data : data_args) move_instructions_back(m, ins, data_args);
m.move_instructions(data, ins);
auto slice_op = any_cast<op::slice>(splits.front()->get_operator()); auto slice_op = any_cast<op::slice>(splits.front()->get_operator());
assert(not slice_op.axes.empty()); assert(not slice_op.axes.empty());
...@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion ...@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion
concat_axis = axis; concat_axis = axis;
} }
for(auto arg : args) move_instructions_back(m, input, args);
m.move_instructions(arg, input);
// TODO: Check if axes match // TODO: Check if axes match
auto concat = auto concat =
m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args); m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args);
......
...@@ -35,6 +35,7 @@ add_library(migraphx_cpu ...@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp dnnl.cpp
eltwise.cpp eltwise.cpp
erf.cpp erf.cpp
fmod.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
gemm.cpp gemm.cpp
...@@ -42,6 +43,7 @@ add_library(migraphx_cpu ...@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp logsoftmax.cpp
lowering.cpp lowering.cpp
lrn.cpp lrn.cpp
mod.cpp
preallocate.cpp preallocate.cpp
pooling.cpp pooling.cpp
reduction.cpp reduction.cpp
......
...@@ -21,22 +21,16 @@ ...@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace cpu {
struct hip_cos : unary_device<hip_cos, device::cos> template struct cpu_binary<op::fmod>;
{
};
} // namespace gpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include <migraphx/op/argmax.hpp> #include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp> #include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp> #include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp> #include <migraphx/par_dfor.hpp>
......
...@@ -21,22 +21,16 @@ ...@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP #include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace cpu {
struct hip_exp : unary_device<hip_exp, device::exp> template struct cpu_binary<op::mod>;
{
};
} // namespace gpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif
...@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS} ...@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES}) add_embed_library(migraphx_kernels ${KERNEL_FILES})
add_library(migraphx_device file(GLOB DEVICE_GPU_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
device/acos.cpp add_library(migraphx_device ${DEVICE_GPU_SRCS})
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
add_library(compile_for_gpu INTERFACE) add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns) target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored) target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
...@@ -151,15 +79,12 @@ add_library(migraphx_gpu ...@@ -151,15 +79,12 @@ add_library(migraphx_gpu
argmax.cpp argmax.cpp
argmin.cpp argmin.cpp
batch_norm_inference.cpp batch_norm_inference.cpp
clip.cpp
code_object_op.cpp code_object_op.cpp
compile_ops.cpp compile_ops.cpp
compile_gen.cpp compile_gen.cpp
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compiler.cpp compiler.cpp
concat.cpp
convert.cpp
convolution.cpp convolution.cpp
deconvolution.cpp deconvolution.cpp
device_name.cpp device_name.cpp
...@@ -192,7 +117,6 @@ add_library(migraphx_gpu ...@@ -192,7 +117,6 @@ add_library(migraphx_gpu
rocblas.cpp rocblas.cpp
scatter.cpp scatter.cpp
schedule_model.cpp schedule_model.cpp
softmax.cpp
sync_device.cpp sync_device.cpp
target.cpp target.cpp
topk.cpp topk.cpp
...@@ -207,68 +131,18 @@ function(register_migraphx_gpu_ops PREFIX) ...@@ -207,68 +131,18 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach() endforeach()
endfunction() endfunction()
register_migraphx_gpu_ops(hip_ register_migraphx_gpu_ops(hip_
acosh
acos
add
argmax argmax
argmin argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather gather
greater
less
log
logsoftmax logsoftmax
logical_and
logical_or
logical_xor
loop loop
max
min
mul
multinomial multinomial
nonzero nonzero
pad pad
pow
prelu
prefix_scan_sum prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse reverse
round
rsqrt
scatter scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk topk
unary_not
where
) )
register_migraphx_gpu_ops(miopen_ register_migraphx_gpu_ops(miopen_
abs abs
...@@ -365,9 +239,18 @@ endif() ...@@ -365,9 +239,18 @@ endif()
include(CheckLibraryExists) include(CheckLibraryExists)
get_target_property(MIOPEN_LOCATION MIOpen LOCATION) get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
if(HAS_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else()
message(STATUS "MIOpen does not have Find-2.0 API")
endif()
if(HAS_FIND_MODE_API) if(HAS_FIND_MODE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API) target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
message(STATUS "MIOpen has find mode api") message(STATUS "MIGraphx is using Find Mode API of MIOpen")
else() else()
message(STATUS "MIOpen does not have find mode api") message(STATUS "MIOpen does not have find mode api")
endif() endif()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment