Unverified Commit 66483df6 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into simplify_1_mul_div_ops

parents 9310bff0 40118191
...@@ -268,7 +268,9 @@ jobs: ...@@ -268,7 +268,9 @@ jobs:
lcov --directory . --capture --output-file $(pwd)/coverage.info lcov --directory . --capture --output-file $(pwd)/coverage.info
lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
lcov --list $(pwd)/coverage.info lcov --list $(pwd)/coverage.info
curl -s https://codecov.io/bash | bash curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov -t ${CODECOV_TOKEN}
echo "Uploaded" echo "Uploaded"
linux-fpga: linux-fpga:
...@@ -364,5 +366,7 @@ jobs: ...@@ -364,5 +366,7 @@ jobs:
# lcov --directory . --capture --output-file $(pwd)/coverage.info # lcov --directory . --capture --output-file $(pwd)/coverage.info
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info # lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
# lcov --list $(pwd)/coverage.info # lcov --list $(pwd)/coverage.info
# curl -s https://codecov.io/bash | bash # curl -Os https://uploader.codecov.io/latest/linux/codecov
# echo "Uploaded" # chmod +x codecov
\ No newline at end of file # ./codecov -t ${CODECOV_TOKEN}
# echo "Uploaded"
...@@ -26,7 +26,7 @@ on: ...@@ -26,7 +26,7 @@ on:
required: true required: true
default: '-s' default: '-s'
concurrency: benchmark concurrency: "perftest-${{ github.head_ref || github.base_ref || 'schedule' }}"
jobs: jobs:
release: release:
......
...@@ -212,6 +212,7 @@ rocm_enable_cppcheck( ...@@ -212,6 +212,7 @@ rocm_enable_cppcheck(
ConfigurationNotChecked ConfigurationNotChecked
unmatchedSuppression unmatchedSuppression
unusedFunction unusedFunction
ctuPointerArith
noExplicitConstructor noExplicitConstructor
passedByValue passedByValue
unusedStructMember unusedStructMember
......
...@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@d2cb9e580550e92ab75a0a417e7a4abd02a24edf -DBUILD_MIXR_TARGET=On RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@e8e77eb16be413d301ea8509726d47f265d9011f -DBUILD_MIXR_TARGET=On
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -33,7 +33,7 @@ def rocmtestnode(Map conf) { ...@@ -33,7 +33,7 @@ def rocmtestnode(Map conf) {
} }
} }
node(name) { node(name) {
withEnv(['HSA_ENABLE_SDMA=0', 'MIOPEN_DEBUG_GCN_ASM_KERNELS=0']) { withEnv(['HSA_ENABLE_SDMA=0']) {
stage("checkout ${variant}") { stage("checkout ${variant}") {
checkout scm checkout scm
} }
......
...@@ -25,6 +25,6 @@ pfultz2/rocm-recipes ...@@ -25,6 +25,6 @@ pfultz2/rocm-recipes
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ccache@v4.1 ccache@v4.1
pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
danmar/cppcheck@2.8 -DHAVE_RULES=1 danmar/cppcheck@2.9 -DHAVE_RULES=1
RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build
-f requirements.txt -f requirements.txt
...@@ -84,6 +84,12 @@ argument ...@@ -84,6 +84,12 @@ argument
Construct an argument from a python buffer. This can include numpy arrays. Construct an argument from a python buffer. This can include numpy arrays.
.. py:method:: data_ptr()
Returns the address to the underlying argument data.
:rtype: int
.. py:method:: get_shape() .. py:method:: get_shape()
Returns the shape of the argument. Returns the shape of the argument.
...@@ -113,7 +119,16 @@ argument ...@@ -113,7 +119,16 @@ argument
:param shape s: Shape of argument to fill. :param shape s: Shape of argument to fill.
:param int value: Value to fill in the argument. :param int value: Value to fill in the argument.
:rtype argument :rtype: argument
.. py:function:: argument_from_pointer(shape, address)
Create argument from data stored in given address without copy.
:param shape shape: Shape of the data stored in address.
:param long address: Memory address of data from another source
:rtype: argument
target target
------ ------
......
...@@ -50,8 +50,8 @@ struct layernorm_matcher ...@@ -50,8 +50,8 @@ struct layernorm_matcher
{ {
return f("div")(arg(0)(x_minus_mean()), return f("div")(arg(0)(x_minus_mean()),
arg(1)(skip_broadcasts(f("sqrt")( arg(1)(skip_broadcasts(f("sqrt")(arg(0)(
arg(0)(f("add")(either_arg(0, 1)(variance(), has_value(1e-12f)))))))); f("add")(either_arg(0, 1)(variance(), is_constant().bind("eps"))))))));
} }
auto matcher() const { return layernorm_onnx(); } auto matcher() const { return layernorm_onnx(); }
......
...@@ -40,7 +40,6 @@ struct fmod : binary<fmod> ...@@ -40,7 +40,6 @@ struct fmod : binary<fmod>
a["commutative"] = false; a["commutative"] = false;
return a; return a;
} }
std::string point_function() const { return "fmod"; }
auto apply() const auto apply() const
{ {
return [](auto x, auto y) { return std::fmod(x, y); }; return [](auto x, auto y) { return std::fmod(x, y); };
......
...@@ -38,9 +38,9 @@ struct mod : binary<mod> ...@@ -38,9 +38,9 @@ struct mod : binary<mod>
{ {
auto a = base_attributes(); auto a = base_attributes();
a["commutative"] = false; a["commutative"] = false;
a["point_op"] = "${function:fmod}((${function:remainder}(${0}, ${1})) + ${1}, ${1})";
return a; return a;
} }
std::string point_function() const { return "mod"; }
auto apply() const auto apply() const
{ {
return [](auto x, auto y) { return std::fmod((std::remainder(x, y)) + y, y); }; return [](auto x, auto y) { return std::fmod((std::remainder(x, y)) + y, y); };
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include <migraphx/onnx/op_parser.hpp> #include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/op/batch_norm_inference.hpp> #include <migraphx/instruction.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm> ...@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref parse(const op_desc& /*opd*/, instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser, const onnx_parser& parser,
onnx_parser::node_info info, const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const std::vector<instruction_ref> args) const
{ {
float epsilon = 1e-5f; float epsilon = 1e-5f;
float momentum = 0.9f;
op::batch_norm_inference::bn_infer_mode_t bn_mode = op::batch_norm_inference::spatial;
if(contains(info.attributes, "epsilon")) if(contains(info.attributes, "epsilon"))
{ {
epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>(); epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
} }
if(contains(info.attributes, "momentum")) auto x_lens = args[0]->get_shape().lens();
auto x_type = args[0]->get_shape().type();
if(std::any_of(args.cbegin() + 1, args.cend(), [](auto a) {
return a->get_shape().lens().size() != 1;
}))
{
MIGRAPHX_THROW("PARSE_BATCHNORM: argument scale, bias, mean, or var rank != 1");
}
if(x_lens.size() == 1)
{
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto n0 = info.add_broadcastable_binary_op("sub", args[0], args[3]);
auto d0 = info.add_broadcastable_binary_op("add", args[4], eps);
auto d1 = info.add_broadcastable_binary_op("pow", d0, rt);
auto div0 = info.add_broadcastable_binary_op("div", n0, d1);
auto r0 = info.add_broadcastable_binary_op("mul", div0, args[1]);
return info.add_broadcastable_binary_op("add", r0, args[2]);
}
else if(x_lens.size() > 2)
{ {
momentum = parser.parse_value(info.attributes.at("momentum")).at<float>(); // unsqueeze tensors of shape (C) to broadcast correctly
std::vector<int64_t> unsqueeze_axes(x_lens.size() - 2);
std::iota(unsqueeze_axes.begin(), unsqueeze_axes.end(), 1);
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto scale_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[1]);
auto bias_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[2]);
auto mean_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[3]);
auto var_unsqueeze = info.add_instruction(
migraphx::make_op("unsqueeze", {{"axes", unsqueeze_axes}}), args[4]);
auto numer = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto denom = info.add_broadcastable_binary_op("pow", var_eps, rt);
auto div0 = info.add_broadcastable_binary_op("div", numer, denom);
auto r0 = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze);
return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
} }
if(contains(info.attributes, "spatial")) else
{ {
bn_mode = (parser.parse_value(info.attributes.at("spatial")).at<uint64_t>() > 0) // num dims either 0 or 2
? op::batch_norm_inference::spatial MIGRAPHX_THROW("PARSE_BATCHNORM: rank " + std::to_string(x_lens.size()) +
: op::batch_norm_inference::per_activation; " input tensor, unhandled data format");
} }
op::batch_norm_inference op{epsilon, momentum, bn_mode};
return info.add_instruction(op, args);
} }
}; };
......
...@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval) ...@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if(conflict_table.find(vn) != conflict_table.end()) if(conflict_table.find(vn) != conflict_table.end())
{ {
std::set<int>& vn_set = conflict_table[vn]; const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set) for(const auto& iter : vn_set)
{ {
live_range* range = live_ranges[iter]; live_range* range = live_ranges[iter];
...@@ -267,8 +267,8 @@ void memory_coloring_impl::verify() ...@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{ {
for(int i = 0; i < num_of_lives; ++i) for(int i = 0; i < num_of_lives; ++i)
{ {
live_interval& interval = live_intervals[i]; const live_interval& interval = live_intervals[i];
live_range& segment = interval.segment; const live_range& segment = interval.segment;
if(segment.begin == invalid_offset) if(segment.begin == invalid_offset)
{ {
...@@ -284,7 +284,7 @@ void memory_coloring_impl::verify() ...@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int vn = segment.vn; int vn = segment.vn;
if(conflict_table.find(vn) != conflict_table.end()) if(conflict_table.find(vn) != conflict_table.end())
{ {
std::set<int>& vn_set = conflict_table[vn]; const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set) for(const auto& iter : vn_set)
{ {
live_range* range = live_ranges[iter]; live_range* range = live_ranges[iter];
...@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals() ...@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{ {
std::cout << " segment:" << i; std::cout << " segment:" << i;
std::cout << " =>"; std::cout << " =>";
std::set<int>& table = conflict_table[i]; const std::set<int>& table = conflict_table[i];
for(auto& iter : table) for(const auto& iter : table)
{ {
std::cout << (iter) << ","; std::cout << (iter) << ",";
} }
...@@ -357,7 +357,7 @@ void live_interval::dump() ...@@ -357,7 +357,7 @@ void live_interval::dump()
std::cout << "id:" << id; std::cout << "id:" << id;
segment.dump(); segment.dump();
std::cout << " uses:"; std::cout << " uses:";
for(auto& iter : use_points) for(const auto& iter : use_points)
{ {
std::cout << " " << get_ins_enum(iter) << ","; std::cout << " " << get_ins_enum(iter) << ",";
} }
......
...@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py::class_<migraphx::argument>(m, "argument", py::buffer_protocol()) py::class_<migraphx::argument>(m, "argument", py::buffer_protocol())
.def_buffer([](migraphx::argument& x) -> py::buffer_info { return to_buffer_info(x); }) .def_buffer([](migraphx::argument& x) -> py::buffer_info { return to_buffer_info(x); })
.def("__init__", .def(py::init([](py::buffer b) {
[](migraphx::argument& x, py::buffer b) { py::buffer_info info = b.request();
py::buffer_info info = b.request(); return migraphx::argument(to_shape(info), info.ptr);
new(&x) migraphx::argument(to_shape(info), info.ptr); }))
})
.def("get_shape", &migraphx::argument::get_shape) .def("get_shape", &migraphx::argument::get_shape)
.def("data_ptr",
[](migraphx::argument& x) { return reinterpret_cast<std::uintptr_t>(x.data()); })
.def("tolist", .def("tolist",
[](migraphx::argument& x) { [](migraphx::argument& x) {
py::list l{x.get_shape().elements()}; py::list l{x.get_shape().elements()};
......
...@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio ...@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes.insert(ps.begin(), ps.end()); name_shapes.insert(ps.begin(), ps.end());
} }
for(auto& pn : name_shapes) for(const auto& pn : name_shapes)
{ {
const auto& s = pn.second; const auto& s = pn.second;
instruction_ref output{}; instruction_ref output{};
......
...@@ -57,12 +57,14 @@ auto conv_const_weights() ...@@ -57,12 +57,14 @@ auto conv_const_weights()
auto reduction() { return match::name_contains("reduce"); } auto reduction() { return match::name_contains("reduce"); }
// conv(x, w) * a => conv(x, a * w)
struct find_mul_conv struct find_mul_conv
{ {
auto matcher() const auto matcher() const
{ {
return match::name("mul")(match::either_arg(0, 1)(conv_const_weights().bind("conv"), return match::name("mul")(
match::name("broadcast").bind("a"))); match::either_arg(0, 1)(conv_const_weights().bind("conv"),
match::name("broadcast", "multibroadcast").bind("a")));
} }
void apply(module& m, const match::matcher_result& r) const void apply(module& m, const match::matcher_result& r) const
...@@ -72,14 +74,35 @@ struct find_mul_conv ...@@ -72,14 +74,35 @@ struct find_mul_conv
auto a_ins = r.instructions["a"]; auto a_ins = r.instructions["a"];
auto w_ins = r.instructions["w"]; auto w_ins = r.instructions["w"];
auto broadcast_op = any_cast<op::broadcast>(a_ins->get_operator()); const auto& a_input_lens = a_ins->inputs().front()->get_shape().lens();
if(broadcast_op.axis != 1)
std::size_t num_not_one_dims = std::count_if(
a_input_lens.cbegin(), a_input_lens.cend(), [](auto dim) { return dim != 1; });
if(num_not_one_dims > 1)
return;
// check broadcasted along channels
const auto& a_lens = a_ins->get_shape().lens();
const auto& a_strides = a_ins->get_shape().strides();
auto is_broadcasted_axis = [](auto len, auto stride) { return len == 1 or stride == 0; };
if(a_strides.at(1) != 1)
return;
if(not is_broadcasted_axis(a_lens.front(), a_strides.front()))
return;
if(not std::equal(a_lens.begin() + 2,
a_lens.end(),
a_strides.begin() + 2,
a_strides.end(),
is_broadcasted_axis))
return; return;
auto sq = m.insert_instruction(ins, make_op("squeeze"), a_ins->inputs().front());
auto new_a = m.insert_instruction( auto new_a = m.insert_instruction(
ins, ins, make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}), sq);
make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}),
a_ins->inputs().front());
auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins); auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins);
auto new_conv = m.insert_instruction( auto new_conv = m.insert_instruction(
ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul); ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul);
...@@ -1052,20 +1075,35 @@ struct find_split_reshape ...@@ -1052,20 +1075,35 @@ struct find_split_reshape
auto rsp_lens = rsp->get_shape().lens(); auto rsp_lens = rsp->get_shape().lens();
auto rsp_strides = rsp->get_shape().strides(); auto rsp_strides = rsp->get_shape().strides();
rsp_strides.insert(rsp_strides.begin(), rsp_strides[0] * rsp_lens[0]); rsp_strides.insert(rsp_strides.begin(), rsp_strides[0] * rsp_lens[0]);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
int rsp_axis = -1;
if(ait == rsp_strides.end()) if(ait == rsp_strides.end())
{ {
return; return;
} }
int rsp_axis = std::distance(rsp_strides.begin(), ait); else if(ait == rsp_strides.end() - 1)
{
// edge case
// slice_dim == 1, in that case it could match with last stride of 1.
// it should accumulate lengths from last dim in that case. discount 1 to avoid going
// out of bounds.
assert(slc_dim_size == 1);
rsp_axis = std::distance(rsp_strides.begin(), ait) - 1;
}
else
{
rsp_axis = std::distance(rsp_strides.begin(), ait);
}
// calculate reshape output shape // calculate reshape output shape
std::vector<int64_t> vec_dims(vec_rsp.size()); std::vector<int64_t> vec_dims(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) { std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) {
return is->get_shape().lens()[rsp_axis]; return is->get_shape().lens()[rsp_axis];
}); });
std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end()); std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end());
rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0}); rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0});
// insert the reshape instruction and add contiguous if needed // insert the reshape instruction and add contiguous if needed
......
...@@ -271,6 +271,44 @@ struct find_nested_slice ...@@ -271,6 +271,44 @@ struct find_nested_slice
} }
}; };
struct find_concat_multibroadcasts
{
auto matcher() const
{
return match::name("concat")(match::all_of[match::inputs()](match::name("multibroadcast")));
}
void apply(module& m, const match::matcher_result& mr) const
{
auto ins = mr.result;
auto op = any_cast<op::concat>(ins->get_operator());
auto out_lens = ins->get_shape().lens();
auto inputs = ins->inputs();
auto in_strides = inputs.front()->get_shape().strides();
// Only apply when concat axis is not a broadcasted dimension
if(std::any_of(inputs.begin(), inputs.end(), [&](auto i) {
return i->get_shape().strides()[op.axis] == 0;
}))
{
return;
}
// Use inputs of multibroadcast ops as inputs to new concat op
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [](auto i) {
return i->inputs().front();
});
// Reduce axis by number of leading broadcasted dimensions
if(inputs.front()->get_shape().lens().size() < out_lens.size())
op.axis -= std::count(in_strides.begin(), in_strides.begin() + op.axis, 0);
auto concat = m.insert_instruction(ins, op, inputs);
m.replace_instruction(
ins, migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), concat);
}
};
struct find_concat_transpose struct find_concat_transpose
{ {
auto matcher() const auto matcher() const
...@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const ...@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const
find_reshaper{}, find_reshaper{},
find_transpose{}, find_transpose{},
find_concat_transpose{}, find_concat_transpose{},
find_concat_multibroadcasts{},
find_nested_convert{}, find_nested_convert{},
find_nested_slice{}, find_nested_slice{},
find_nested_concat{}, find_nested_concat{},
......
...@@ -35,6 +35,7 @@ add_library(migraphx_cpu ...@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp dnnl.cpp
eltwise.cpp eltwise.cpp
erf.cpp erf.cpp
fmod.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
gemm.cpp gemm.cpp
...@@ -42,6 +43,7 @@ add_library(migraphx_cpu ...@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp logsoftmax.cpp
lowering.cpp lowering.cpp
lrn.cpp lrn.cpp
mod.cpp
preallocate.cpp preallocate.cpp
pooling.cpp pooling.cpp
reduction.cpp reduction.cpp
......
...@@ -21,22 +21,16 @@ ...@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace cpu {
struct hip_cos : unary_device<hip_cos, device::cos> template struct cpu_binary<op::fmod>;
{
};
} // namespace gpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include <migraphx/op/argmax.hpp> #include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp> #include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp> #include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp> #include <migraphx/par_dfor.hpp>
......
...@@ -21,22 +21,16 @@ ...@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP #include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace cpu {
struct hip_exp : unary_device<hip_exp, device::exp> template struct cpu_binary<op::mod>;
{
};
} // namespace gpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment