Commit a5c87ec5 authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into jit-reduce-reg

parents dae94657 3f49f8eb
name: Add items to GH project
on:
pull_request:
types:
- opened
issues:
types:
- opened
jobs:
add-to-project:
name: Add PRs and issues to MIGX project
runs-on: ubuntu-latest
steps:
- uses: actions/add-to-project@v0.4.0
with:
project-url: https://github.com/orgs/ROCmSoftwarePlatform/projects/20
github-token: ${{ secrets.TEST_PR_WORKFLOW }}
...@@ -87,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -87,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@0f38fb33f518b53b94b541feb9b079668c5518e8 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@78b706fe9879587ab98b6614ae539265374a3fae -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -11,16 +11,22 @@ def rocmtestnode(Map conf) { ...@@ -11,16 +11,22 @@ def rocmtestnode(Map conf) {
def image = 'migraphxlib' def image = 'migraphxlib'
env.CCACHE_COMPRESSLEVEL = 7 env.CCACHE_COMPRESSLEVEL = 7
env.CCACHE_DIR = ccache env.CCACHE_DIR = ccache
def cmake_build = { compiler, flags -> def cmake_build = { bconf ->
def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++")
def flags = bconf.get("flags", "")
def gpu_debug = bconf.get("gpu_debug", "0")
def cmd = """ def cmd = """
env
ulimit -c unlimited ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt" export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export CXX=${compiler}
export CXXFLAGS='-Werror'
env
rm -rf build rm -rf build
mkdir build mkdir build
cd build cd build
CXX=${compiler} CXXFLAGS='-Werror' cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ${flags} .. cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ${flags} ..
make -j\$(nproc) generate all doc package check VERBOSE=1 make -j\$(nproc) generate all doc package check VERBOSE=1
""" """
echo cmd echo cmd
...@@ -93,28 +99,32 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build -> ...@@ -93,28 +99,32 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
stage('Hip Clang Debug') { stage('Hip Clang Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
}
}, clang_gpu_debug: rocmnode('vega') { cmake_build ->
stage('Hip Clang GPU Debug') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release", gpu_debug: true)
} }
}, clang_release: rocmnode('vega') { cmake_build -> }, clang_release: rocmnode('vega') { cmake_build ->
stage('Hip Clang Release') { stage('Hip Clang Release') {
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=release") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
stash includes: 'build/*.deb', name: 'migraphx-package' stash includes: 'build/*.deb', name: 'migraphx-package'
} }
}, mlir_debug: rocmnode('vega') { cmake_build -> }, mlir_debug: rocmnode('vega') { cmake_build ->
stage('MLIR Debug') { stage('MLIR Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
} }
}, clang_asan: rocmnode('nogpu') { cmake_build -> }, clang_asan: rocmnode('nogpu') { cmake_build ->
stage('Clang ASAN') { stage('Clang ASAN') {
def sanitizers = "undefined,address" def sanitizers = "undefined,address"
def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
} }
}//, clang_release_navi: rocmnode('navi21') { cmake_build -> }//, clang_release_navi: rocmnode('navi21') { cmake_build ->
// stage('HIP Clang Release Navi') { // stage('HIP Clang Release Navi') {
// cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=release") // cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
// } // }
//} //}
......
...@@ -59,18 +59,29 @@ struct pad ...@@ -59,18 +59,29 @@ struct pad
std::string name() const { return "pad"; } std::string name() const { return "pad"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this, true}.has(1);
auto&& idims = inputs.front().lens(); const auto& s0 = inputs.front();
std::vector<std::size_t> rdims(idims.begin(), idims.end()); if(s0.dynamic())
std::size_t num_dims = rdims.size();
for(std::size_t i = 0; i < num_dims; i++)
{ {
rdims[i] += pads[i] + pads[i + num_dims]; auto out_dyn_dims = s0.dyn_dims();
for(std::size_t i = 0; i < s0.ndim(); ++i)
{
out_dyn_dims[i] += pads[i] + pads[i + s0.ndim()];
}
return {s0.type(), out_dyn_dims};
}
else
{
auto&& idims = s0.lens();
std::vector<std::size_t> rdims(idims.begin(), idims.end());
std::size_t num_dims = rdims.size();
for(std::size_t i = 0; i < num_dims; i++)
{
rdims[i] += pads[i] + pads[i + num_dims];
}
shape s{s0.type(), rdims};
return s;
} }
shape s{inputs.front().type(), rdims};
return s;
} }
std::size_t pad_ndims() const std::size_t pad_ndims() const
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <migraphx/op/name.hpp> #include <migraphx/op/name.hpp>
#include <migraphx/check_shapes.hpp> #include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/tensor_view.hpp> #include <migraphx/tensor_view.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
...@@ -105,18 +106,41 @@ struct reduce_op : op_name<Derived> ...@@ -105,18 +106,41 @@ struct reduce_op : op_name<Derived>
return tuned_axes; return tuned_axes;
} }
/**
* @brief returns a shape in which the axis or axes named
* for reduction by this op are set, to size 1.
*
* @param inputs list of input shapes
* @return shape
*/
shape normalize_compute_shape(std::vector<shape> inputs) const shape normalize_compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this, true}.has(1);
auto s = inputs.at(0); auto s = inputs.at(0);
auto lens = s.lens(); if(s.dynamic())
auto tuned_axes = tune_axes(lens.size());
for(auto axis : tuned_axes)
{ {
lens[axis] = 1; auto output_dyn_dims = s.dyn_dims();
auto tuned_axes = tune_axes(output_dyn_dims.size());
for(const auto& axis : tuned_axes)
{
// At the time of writing, there's no functional difference between
// optimum of 0 (no opt) or 1.
output_dyn_dims[axis] = {1, 1, 0};
}
return shape{s.type(), output_dyn_dims};
}
else
{
auto lens = s.lens();
auto tuned_axes = tune_axes(lens.size());
for(const auto& axis : tuned_axes)
{
lens[axis] = 1;
}
return inputs[0].with_lens(lens);
} }
return inputs[0].with_lens(lens);
} }
template <class T> template <class T>
...@@ -124,7 +148,7 @@ struct reduce_op : op_name<Derived> ...@@ -124,7 +148,7 @@ struct reduce_op : op_name<Derived>
const std::vector<T>& in_lens, const std::vector<T>& in_lens,
std::vector<T>& out_lens) const std::vector<T>& out_lens) const
{ {
for(auto axis : tuned_axes) for(const auto& axis : tuned_axes)
{ {
out_lens[axis] = in_lens[axis]; out_lens[axis] = in_lens[axis];
} }
...@@ -151,17 +175,17 @@ struct reduce_op : op_name<Derived> ...@@ -151,17 +175,17 @@ struct reduce_op : op_name<Derived>
static_cast<const Derived&>(*this).output(batch_shape)(val); static_cast<const Derived&>(*this).output(batch_shape)(val);
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
auto arg_lens = args.front().get_shape().lens(); auto arg_lens = args.front().get_shape().lens();
auto tuned_axes = tune_axes(arg_lens.size()); auto tuned_axes = tune_axes(arg_lens.size());
std::vector<std::size_t> batch_lens(output_shape.lens().size(), 1); std::vector<std::size_t> batch_lens(dyn_out.computed_shape.lens().size(), 1);
tune_dims(tuned_axes, arg_lens, batch_lens); tune_dims(tuned_axes, arg_lens, batch_lens);
shape batch_shape{output_shape.type(), batch_lens}; shape batch_shape{dyn_out.computed_shape.type(), batch_lens};
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
par_for(output_shape.elements(), [&](auto i) { par_for(dyn_out.computed_shape.elements(), [&](auto i) {
auto out_idx = output_shape.multi(i); auto out_idx = dyn_out.computed_shape.multi(i);
this->reduce(input, batch_shape, tuned_axes, out_idx, output); this->reduce(input, batch_shape, tuned_axes, out_idx, output);
}); });
}); });
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -46,14 +47,60 @@ struct reshape ...@@ -46,14 +47,60 @@ struct reshape
value attributes() const { return {{"require_std_shape", true}}; } value attributes() const { return {{"require_std_shape", true}}; }
std::string name() const { return "reshape"; } std::string name() const { return "reshape"; }
shape compute_shape(std::vector<shape> inputs) const
shape dyn_compute_shape(shape s0) const
{
auto dyn_dims = s0.dyn_dims();
auto num_not_fixed = std::count_if(
dyn_dims.cbegin(), dyn_dims.cend(), [](auto dd) { return not dd.is_fixed(); });
if(num_not_fixed != 1)
{
MIGRAPHX_THROW("Reshape: Only supports one non-fixed dynamic_dimension");
}
// track number of fixed elements in input and output
std::size_t num_dims_ele = 1;
std::size_t num_dd_ele = 1;
for(std::size_t i = 0; i < dyn_dims.size(); ++i)
{
if(dyn_dims[i].is_fixed())
{
num_dims_ele *= dims[i];
num_dd_ele *= dyn_dims[i].min;
}
else
{
if(dims[i] != 0 and dims[i] != -1)
{
MIGRAPHX_THROW(
"Reshape: Non-fixed dynamic_dimension doesn't match with 0 or -1 "
"output dimension");
}
}
}
if(num_dims_ele != num_dd_ele)
{
MIGRAPHX_THROW("Reshape: Number of fixed elements must match. Input: " +
std::to_string(num_dd_ele) + " Output: " + std::to_string(num_dims_ele));
}
// construct output dynamic shape from dims attribute
std::vector<shape::dynamic_dimension> output_dyn_dims(dims.size());
std::transform(dims.cbegin(),
dims.cend(),
dyn_dims.cbegin(),
output_dyn_dims.begin(),
[](std::size_t dim, auto dyn_dim) {
if(not dyn_dim.is_fixed())
return dyn_dim;
return shape::dynamic_dimension{dim, dim};
});
return {s0.type(), output_dyn_dims};
}
shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
{ {
check_shapes{inputs, *this}.has(1).standard(); check_shapes{inputs, *this}.standard();
auto&& idims = inputs.front().lens(); auto&& idims = inputs.front().lens();
std::vector<std::size_t> rdims(dims.begin(), dims.end()); std::vector<std::size_t> rdims(dims.begin(), dims.end());
auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
if(n_neg_dims > 1)
MIGRAPHX_THROW("Reshape: Dimensions for reshape can only have one -1 dim");
for(std::size_t i = 0; i < dims.size(); i++) for(std::size_t i = 0; i < dims.size(); i++)
{ {
...@@ -86,9 +133,26 @@ struct reshape ...@@ -86,9 +133,26 @@ struct reshape
return s; return s;
} }
argument compute(shape output_shape, std::vector<argument> args) const shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this, true}.has(1);
auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
if(n_neg_dims > 1)
MIGRAPHX_THROW("Reshape: Dimensions for reshape can only have one -1 dim");
auto s0 = inputs[0];
if(s0.dynamic())
{
return dyn_compute_shape(s0);
}
else
{
return static_compute_shape(inputs, n_neg_dims);
}
}
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
return args[0].reshape(output_shape); return args[0].reshape(dyn_out.computed_shape);
} }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; } std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
......
...@@ -107,6 +107,13 @@ struct shape ...@@ -107,6 +107,13 @@ struct shape
friend bool operator==(const std::size_t& x, const dynamic_dimension& y); friend bool operator==(const std::size_t& x, const dynamic_dimension& y);
friend bool operator!=(const dynamic_dimension& x, const std::size_t& y); friend bool operator!=(const dynamic_dimension& x, const std::size_t& y);
friend bool operator!=(const std::size_t& x, const dynamic_dimension& y); friend bool operator!=(const std::size_t& x, const dynamic_dimension& y);
// add and subtract fixed std::size_t dimension
dynamic_dimension& operator+=(const std::size_t& x);
dynamic_dimension& operator-=(const std::size_t& x);
friend dynamic_dimension operator+(const dynamic_dimension& x, const std::size_t& y);
friend dynamic_dimension operator+(const std::size_t& x, const dynamic_dimension& y);
friend dynamic_dimension operator-(const dynamic_dimension& x, const std::size_t& y);
}; };
static const std::vector<type_t>& types(); static const std::vector<type_t>& types();
......
...@@ -110,9 +110,19 @@ instruction_ref onnx_parser::node_info::add_bias(const std::vector<instruction_r ...@@ -110,9 +110,19 @@ instruction_ref onnx_parser::node_info::add_bias(const std::vector<instruction_r
{ {
if(args.size() == 3) if(args.size() == 3)
{ {
auto bias_bcast = mod->add_instruction( instruction_ref bias_bcast;
make_op("broadcast", {{"axis", axis}, {"out_lens", curr_ins->get_shape().lens()}}), // if curr_ins has a dynamic output shape use 2 input broadcast
args[2]); if(curr_ins->get_shape().dynamic())
{
bias_bcast =
mod->add_instruction(make_op("broadcast", {{"axis", axis}}), args[2], curr_ins);
}
else
{
bias_bcast = mod->add_instruction(
make_op("broadcast", {{"axis", axis}, {"out_lens", curr_ins->get_shape().lens()}}),
args[2]);
}
return mod->add_instruction(make_op("add"), curr_ins, bias_bcast); return mod->add_instruction(make_op("add"), curr_ins, bias_bcast);
} }
return curr_ins; return curr_ins;
......
...@@ -43,55 +43,79 @@ struct parse_matmul : op_parser<parse_matmul> ...@@ -43,55 +43,79 @@ struct parse_matmul : op_parser<parse_matmul>
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
auto l0 = args[0]; auto a0 = args[0];
auto l1 = args[1]; auto a1 = args[1];
auto l0_lens = l0->get_shape().lens(); auto s0 = a0->get_shape();
auto l1_lens = l1->get_shape().lens(); auto s1 = a1->get_shape();
// args[0] is a vector, prepend 1 to the shape instruction_ref dot_res;
bool is_a_prepended = false; bool is_a_prepended = false;
if(l0_lens.size() == 1) bool is_b_appended = false;
if(s0.ndim() == 1)
{ {
is_a_prepended = true; is_a_prepended = true;
l0_lens.insert(l0_lens.begin(), 1); a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), args[0]);
l0 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), args[0]);
} }
if(s1.ndim() == 1)
bool is_b_appended = false;
if(l1_lens.size() == 1)
{ {
is_b_appended = true; is_b_appended = true;
l1_lens.push_back(1); a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]);
l1 = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]);
} }
instruction_ref bl0 = l0; if(s0.dynamic() or s1.dynamic())
instruction_ref bl1 = l1;
if(not std::equal(
l0_lens.rbegin() + 2, l0_lens.rend(), l1_lens.rbegin() + 2, l1_lens.rend()))
{ {
auto l0_it = l0_lens.begin() + l0_lens.size() - 2; if(opd.op_name == "quant_dot")
std::vector<std::size_t> l0_broadcasted_lens(l0_lens.begin(), l0_it); {
auto l1_it = l1_lens.begin() + l1_lens.size() - 2; MIGRAPHX_THROW("PARSE_MATMUL: dynamic MatMulInteger not supported");
std::vector<std::size_t> l1_broadcasted_lens(l1_lens.begin(), l1_it); }
auto output_lens = compute_broadcasted_lens(l0_broadcasted_lens, l1_broadcasted_lens); auto s0_dds = a0->get_shape().to_dynamic().dyn_dims();
l0_broadcasted_lens = output_lens; auto s1_dds = a1->get_shape().to_dynamic().dyn_dims();
l0_broadcasted_lens.insert(l0_broadcasted_lens.end(), l0_it, l0_lens.end());
l1_broadcasted_lens = output_lens; // TODO: handling this case requires a new multibroadcast mode
l1_broadcasted_lens.insert(l1_broadcasted_lens.end(), l1_it, l1_lens.end()); if(not std::equal(
if(l0_lens != l0_broadcasted_lens) s0_dds.rbegin() + 2, s0_dds.rend(), s1_dds.rbegin() + 2, s1_dds.rend()))
{ {
bl0 = info.add_instruction( MIGRAPHX_THROW("PARSE_MATMUL: dynamic shape broadcasting not supported");
make_op("multibroadcast", {{"out_lens", l0_broadcasted_lens}}), l0);
} }
if(l1_lens != l1_broadcasted_lens)
dot_res = info.add_instruction(make_op(opd.op_name), a0, a1);
}
else
{
auto s0_lens = a0->get_shape().lens();
auto s1_lens = a1->get_shape().lens();
instruction_ref ba0 = a0;
instruction_ref ba1 = a1;
// try broadcasting if dimensions other than last two do not match
if(not std::equal(
s0_lens.rbegin() + 2, s0_lens.rend(), s1_lens.rbegin() + 2, s1_lens.rend()))
{ {
bl1 = info.add_instruction( auto l0_it = s0_lens.begin() + s0_lens.size() - 2;
make_op("multibroadcast", {{"out_lens", l1_broadcasted_lens}}), l1); std::vector<std::size_t> l0_broadcasted_lens(s0_lens.begin(), l0_it);
auto l1_it = s1_lens.begin() + s1_lens.size() - 2;
std::vector<std::size_t> l1_broadcasted_lens(s1_lens.begin(), l1_it);
auto output_lens =
compute_broadcasted_lens(l0_broadcasted_lens, l1_broadcasted_lens);
l0_broadcasted_lens = output_lens;
l0_broadcasted_lens.insert(l0_broadcasted_lens.end(), l0_it, s0_lens.end());
l1_broadcasted_lens = output_lens;
l1_broadcasted_lens.insert(l1_broadcasted_lens.end(), l1_it, s1_lens.end());
if(s0_lens != l0_broadcasted_lens)
{
ba0 = info.add_instruction(
make_op("multibroadcast", {{"out_lens", l0_broadcasted_lens}}), a0);
}
if(s1_lens != l1_broadcasted_lens)
{
ba1 = info.add_instruction(
make_op("multibroadcast", {{"out_lens", l1_broadcasted_lens}}), a1);
}
} }
dot_res = info.add_instruction(make_op(opd.op_name), ba0, ba1);
} }
instruction_ref dot_res = info.add_instruction(make_op(opd.op_name), bl0, bl1);
int64_t num_axis = static_cast<int64_t>(dot_res->get_shape().lens().size()); // squeeze the appended or prepended dimensions
int64_t num_axis = dot_res->get_shape().ndim();
if(is_a_prepended) if(is_a_prepended)
{ {
dot_res = info.add_instruction(make_op("squeeze", {{"axes", {num_axis - 2}}}), dot_res); dot_res = info.add_instruction(make_op("squeeze", {{"axes", {num_axis - 2}}}), dot_res);
......
...@@ -147,7 +147,13 @@ struct parse_pad : op_parser<parse_pad> ...@@ -147,7 +147,13 @@ struct parse_pad : op_parser<parse_pad>
{ {
auto mode = info.attributes.at("mode").s(); auto mode = info.attributes.at("mode").s();
if(mode == "reflect") if(mode == "reflect")
{
if(args.front()->get_shape().dynamic())
{
MIGRAPHX_THROW("PARSE_PAD: reflect padding with dynamic shape not supported");
}
return reflect_pad(info, pads, args.front()); return reflect_pad(info, pads, args.front());
}
if(mode != "constant") if(mode != "constant")
{ {
MIGRAPHX_THROW( MIGRAPHX_THROW(
......
...@@ -68,8 +68,7 @@ instruction_ref parse_reduce_oper(const std::string& op_name, ...@@ -68,8 +68,7 @@ instruction_ref parse_reduce_oper(const std::string& op_name,
} }
else else
{ {
std::size_t n_dim = args.front()->get_shape().lens().size(); axes.resize(args.front()->get_shape().ndim());
axes.resize(n_dim);
std::iota(axes.begin(), axes.end(), 0); std::iota(axes.begin(), axes.end(), 0);
} }
} }
......
...@@ -49,7 +49,7 @@ struct parse_reshape : op_parser<parse_reshape> ...@@ -49,7 +49,7 @@ struct parse_reshape : op_parser<parse_reshape>
if(args.size() == 2) if(args.size() == 2)
{ {
auto s = args[1]->eval(); auto s = args[1]->eval();
check_arg_empty(s, "Reshape: dynamic shape is not supported"); check_arg_empty(s, "Reshape: non-constant shape input is not supported");
s.visit([&](auto v) { copy(v, std::back_inserter(dims)); }); s.visit([&](auto v) { copy(v, std::back_inserter(dims)); });
} }
......
...@@ -504,6 +504,31 @@ bool shape::dynamic_dimension::is_fixed() const { return this->min == this->max; ...@@ -504,6 +504,31 @@ bool shape::dynamic_dimension::is_fixed() const { return this->min == this->max;
bool shape::dynamic_dimension::has_optimal() const { return opt != 0; } bool shape::dynamic_dimension::has_optimal() const { return opt != 0; }
shape::dynamic_dimension& shape::dynamic_dimension::operator+=(const std::size_t& x)
{
this->min += x;
this->max += x;
if(this->opt != 0)
{
this->opt += x;
};
return *this;
}
shape::dynamic_dimension& shape::dynamic_dimension::operator-=(const std::size_t& x)
{
assert(this->min >= x);
assert(this->max >= x);
this->min -= x;
this->max -= x;
if(this->opt != 0)
{
assert(this->opt >= x);
this->opt -= x;
}
return *this;
}
bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y) bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y)
{ {
// don't check opt if both are fixed // don't check opt if both are fixed
...@@ -529,6 +554,23 @@ bool operator==(const std::size_t& x, const shape::dynamic_dimension& y) { retur ...@@ -529,6 +554,23 @@ bool operator==(const std::size_t& x, const shape::dynamic_dimension& y) { retur
bool operator!=(const shape::dynamic_dimension& x, const std::size_t& y) { return not(x == y); } bool operator!=(const shape::dynamic_dimension& x, const std::size_t& y) { return not(x == y); }
bool operator!=(const std::size_t& x, const shape::dynamic_dimension& y) { return not(x == y); } bool operator!=(const std::size_t& x, const shape::dynamic_dimension& y) { return not(x == y); }
shape::dynamic_dimension operator+(const shape::dynamic_dimension& x, const std::size_t& y)
{
auto dd = x;
return dd += y;
}
shape::dynamic_dimension operator+(const std::size_t& x, const shape::dynamic_dimension& y)
{
return y + x;
}
shape::dynamic_dimension operator-(const shape::dynamic_dimension& x, const std::size_t& y)
{
auto dd = x;
return dd -= y;
}
bool operator==(const shape& x, const shape& y) bool operator==(const shape& x, const shape& y)
{ {
if(x.dynamic() and y.dynamic()) if(x.dynamic() and y.dynamic())
......
...@@ -1065,11 +1065,23 @@ struct find_split_reshape ...@@ -1065,11 +1065,23 @@ struct find_split_reshape
return; return;
} }
// Only want to apply this optimization if each split output is followed by
// a contiguous op and a reshape
if(std::any_of(split_outputs.begin(), split_outputs.end(), [](auto i) {
if(i->outputs().size() == 1)
{
auto cont = i->outputs().front();
return cont->outputs().size() != 1;
}
return false;
}))
{
return;
}
std::vector<instruction_ref> vec_rsp(split_outputs.size()); std::vector<instruction_ref> vec_rsp(split_outputs.size());
std::transform(split_outputs.begin(), split_outputs.end(), vec_rsp.begin(), [](auto i) { std::transform(split_outputs.begin(), split_outputs.end(), vec_rsp.begin(), [](auto i) {
assert(i->outputs().size() == 1);
auto cont = i->outputs().front(); auto cont = i->outputs().front();
assert(cont->outputs().size() == 1);
return cont->outputs().front(); return cont->outputs().front();
}); });
......
...@@ -763,16 +763,23 @@ struct find_transpose_slice ...@@ -763,16 +763,23 @@ struct find_transpose_slice
// Compute axis before transpose to use for unsqueeze // Compute axis before transpose to use for unsqueeze
auto perm = ins->get_operator().to_value()["permutation"].to_vector<int64_t>(); auto perm = ins->get_operator().to_value()["permutation"].to_vector<int64_t>();
auto preaxis = std::find(perm.begin(), perm.end(), axis) - perm.begin(); auto preaxis = std::find(perm.begin(), perm.end(), axis) - perm.begin();
// Make unsqeeze // Make unsqueeze
std::vector<int64_t> steps(sdistance.size());
std::transform(
slice.axes.begin(),
slice.axes.end(),
sdistance.begin(),
steps.begin(),
[&](const auto ax, const auto sdis) { return ins->get_shape().lens().at(ax) / sdis; });
auto unsqueeze = m.insert_instruction( auto unsqueeze = m.insert_instruction(
ins, make_op("unsqueeze", {{"axes", {preaxis}}, {"steps", sdistance}}), ins->inputs()); ins, make_op("unsqueeze", {{"axes", {preaxis}}, {"steps", steps}}), ins->inputs());
// Make transpose // Make transpose
std::transform(perm.begin(), perm.end(), perm.begin(), [&](auto i) { std::transform(perm.begin(), perm.end(), perm.begin(), [&](auto i) {
if(i > preaxis) if(i >= preaxis)
return i + 1; return i + 1;
return i; return i;
}); });
perm.insert(perm.begin(), preaxis + 1); perm.insert(perm.begin(), preaxis);
auto transpose = auto transpose =
m.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), unsqueeze); m.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), unsqueeze);
// Slice and squeeze // Slice and squeeze
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
// NOLINTNEXTLINE
static const char* const gather_kernel = R"__migraphx__(
#include <migraphx/kernels/gather.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void gather_kernel(void* in_data, void* in_indices, void* output)
{
make_tensors()(in_data, in_indices, output)([](auto&&... xs) {
gather<${axis}>(xs...);
});
}
}
} // namespace migraphx
)__migraphx__";
struct gather_compiler : compiler<gather_compiler>
{
std::vector<std::string> names() const { return {"gather"}; }
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
const auto& out_s = inputs.back();
options.set_launch_params(v, compute_global_for(ctx, out_s.elements()));
options.inputs = inputs;
options.output = out_s;
options.kernel_name = "gather_kernel";
options.virtual_inputs = inputs;
auto axis = v.at("axis").to<std::string>();
auto src = interpolate_string(gather_kernel, {{"axis", axis}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value()));
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -156,16 +156,25 @@ struct reduce_compiler : compiler<reduce_compiler> ...@@ -156,16 +156,25 @@ struct reduce_compiler : compiler<reduce_compiler>
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{ {
value v = value::object{}; value v = value::object{};
auto reduce_elements = get_reduce_elements(ins->inputs());
if(op.name() == "reduce_sum") if(op.name() == "reduce_sum")
{ {
v["reduction"] = "op::sum{}"; v["reduction"] = "op::sum{}";
} }
else if(op.name() == "reduce_mean") else if(op.name() == "reduce_mean")
{ {
v["reduction"] = "op::sum{}"; auto reduce_elements = get_reduce_elements(ins->inputs());
v["write"] = "op::mean{" + std::to_string(reduce_elements) + "}"; auto reduce_type = ins->inputs().front()->get_shape().type();
v["reduction"] = "op::sum{}";
std::string mean = "op::mean{" + std::to_string(reduce_elements) + "}";
// Use float accumulator when reduction size is too large for half
if(reduce_type == shape::half_type and reduce_elements > 16384)
v["read"] = "compose(" + mean + ", op::convert_to<float>{})";
else if(contains({shape::float_type, shape::half_type, shape::double_type},
reduce_type))
v["read"] = mean;
else
v["write"] = mean;
} }
else if(op.name() == "reduce_max") else if(op.name() == "reduce_max")
{ {
......
...@@ -187,6 +187,14 @@ constexpr auto fold(F f) ...@@ -187,6 +187,14 @@ constexpr auto fold(F f)
return [=](auto&&... xs) { return fold_impl(f, static_cast<decltype(xs)&&>(xs)...); }; return [=](auto&&... xs) { return fold_impl(f, static_cast<decltype(xs)&&>(xs)...); };
} }
template <class... Fs>
constexpr auto compose(Fs... fs)
{
return fold([](auto f, auto g) {
return [=](auto&&... xs) { return f(g(static_cast<decltype(xs)>(xs)...)); };
})(fs...);
}
template <class... Ts> template <class... Ts>
constexpr auto pack(Ts... xs) constexpr auto pack(Ts... xs)
{ {
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_GATHER_HPP
#define MIGRAPHX_GUARD_KERNELS_GATHER_HPP
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/shape.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/tensor_view.hpp>
namespace migraphx {
template <int Axis, class Input, class Indices>
constexpr auto gather_shape(Input input, Indices indices)
{
auto lengths = input.lens;
lengths[Axis] = indices.elements();
return make_shape(lengths, input.strides);
}
template <int Axis, class Input, class Indices, class Output>
__device__ void gather(Input input, Indices indices, Output output)
{
auto ind = make_index();
auto axis_dim_size = input.get_shape().lens[Axis];
constexpr auto out_comp = gather_shape<Axis>(get_shape_c<Input>{}, get_shape_c<Indices>{});
ind.global_stride(output.get_shape().elements(), [&](auto i) {
auto idx = out_comp.multi(i);
auto in_index = indices[idx[Axis]];
auto new_in_index = (in_index < 0) ? in_index + axis_dim_size : in_index;
idx[Axis] = new_in_index;
output[i] = input[idx];
});
}
} // namespace migraphx
#endif
...@@ -132,9 +132,14 @@ MIGRAPHX_DEVICE_MATH_FOR(float, fmod, ::fmodf) ...@@ -132,9 +132,14 @@ MIGRAPHX_DEVICE_MATH_FOR(float, fmod, ::fmodf)
// Builtin half functions // Builtin half functions
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, abs, ::__habs) MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, abs, ::__habs)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, ceil, ::hceil)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, cos, ::hcos)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, exp, ::hexp) MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, exp, ::hexp)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, floor, ::hfloor)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, isnan, ::__hisnan)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, log, ::hlog) MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, log, ::hlog)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, rsqrt, ::hrsqrt) MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, rsqrt, ::hrsqrt)
// MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, sin, ::hsin)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, sqrt, ::hsqrt) MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, sqrt, ::hsqrt)
// Use float to compute half overload // Use float to compute half overload
...@@ -144,16 +149,11 @@ MIGRAPHX_DEVICE_MATH_HALF(asin, ::asin) ...@@ -144,16 +149,11 @@ MIGRAPHX_DEVICE_MATH_HALF(asin, ::asin)
MIGRAPHX_DEVICE_MATH_HALF(asinh, ::asinh) MIGRAPHX_DEVICE_MATH_HALF(asinh, ::asinh)
MIGRAPHX_DEVICE_MATH_HALF(atan, ::atan) MIGRAPHX_DEVICE_MATH_HALF(atan, ::atan)
MIGRAPHX_DEVICE_MATH_HALF(atanh, ::atanh) MIGRAPHX_DEVICE_MATH_HALF(atanh, ::atanh)
MIGRAPHX_DEVICE_MATH_HALF(ceil, ::ceil)
MIGRAPHX_DEVICE_MATH_HALF(cos, ::cos)
MIGRAPHX_DEVICE_MATH_HALF(cosh, ::cosh) MIGRAPHX_DEVICE_MATH_HALF(cosh, ::cosh)
MIGRAPHX_DEVICE_MATH_HALF(erf, ::erf) MIGRAPHX_DEVICE_MATH_HALF(erf, ::erf)
MIGRAPHX_DEVICE_MATH_HALF(floor, ::floor)
MIGRAPHX_DEVICE_MATH_HALF(isnan, ::isnan)
MIGRAPHX_DEVICE_MATH_HALF(pow, ::pow) MIGRAPHX_DEVICE_MATH_HALF(pow, ::pow)
MIGRAPHX_DEVICE_MATH_HALF(remainder, ::remainder) MIGRAPHX_DEVICE_MATH_HALF(remainder, ::remainder)
MIGRAPHX_DEVICE_MATH_HALF(round, ::round) MIGRAPHX_DEVICE_MATH_HALF(round, ::round)
MIGRAPHX_DEVICE_MATH_HALF(sin, ::sin)
MIGRAPHX_DEVICE_MATH_HALF(sinh, ::sinh) MIGRAPHX_DEVICE_MATH_HALF(sinh, ::sinh)
MIGRAPHX_DEVICE_MATH_HALF(tan, ::tan) MIGRAPHX_DEVICE_MATH_HALF(tan, ::tan)
MIGRAPHX_DEVICE_MATH_HALF(tanh, ::tanh) MIGRAPHX_DEVICE_MATH_HALF(tanh, ::tanh)
...@@ -166,19 +166,19 @@ MIGRAPHX_DEVICE_MATH_HALF(fmod, ::fmod) ...@@ -166,19 +166,19 @@ MIGRAPHX_DEVICE_MATH_HALF(fmod, ::fmod)
// at this time are: exp2, exp10, log2, log10, isinf // at this time are: exp2, exp10, log2, log10, isinf
MIGRAPHX_DEVICE_MATH_HALF2(abs, ::__habs2) MIGRAPHX_DEVICE_MATH_HALF2(abs, ::__habs2)
MIGRAPHX_DEVICE_MATH_HALF2(ceil, ::h2ceil) MIGRAPHX_DEVICE_MATH_HALF2(ceil, ::h2ceil)
MIGRAPHX_DEVICE_MATH_HALF2(floor, ::h2floor)
MIGRAPHX_DEVICE_MATH_HALF2(sin, ::h2sin)
MIGRAPHX_DEVICE_MATH_HALF2(cos, ::h2cos) MIGRAPHX_DEVICE_MATH_HALF2(cos, ::h2cos)
MIGRAPHX_DEVICE_MATH_HALF2(exp, ::h2exp) MIGRAPHX_DEVICE_MATH_HALF2(exp, ::h2exp)
MIGRAPHX_DEVICE_MATH_HALF2(exp2, ::h2exp2)
MIGRAPHX_DEVICE_MATH_HALF2(exp10, ::h2exp10) MIGRAPHX_DEVICE_MATH_HALF2(exp10, ::h2exp10)
MIGRAPHX_DEVICE_MATH_HALF2(log2, ::h2log2) MIGRAPHX_DEVICE_MATH_HALF2(exp2, ::h2exp2)
MIGRAPHX_DEVICE_MATH_HALF2(floor, ::h2floor)
MIGRAPHX_DEVICE_MATH_HALF2(isinf, ::__hisinf2)
MIGRAPHX_DEVICE_MATH_HALF2(isnan, ::__hisnan2)
MIGRAPHX_DEVICE_MATH_HALF2(log, ::h2log) MIGRAPHX_DEVICE_MATH_HALF2(log, ::h2log)
MIGRAPHX_DEVICE_MATH_HALF2(log10, ::h2log10) MIGRAPHX_DEVICE_MATH_HALF2(log10, ::h2log10)
MIGRAPHX_DEVICE_MATH_HALF2(log2, ::h2log2)
MIGRAPHX_DEVICE_MATH_HALF2(rsqrt, ::h2rsqrt) MIGRAPHX_DEVICE_MATH_HALF2(rsqrt, ::h2rsqrt)
// MIGRAPHX_DEVICE_MATH_HALF2(sin, ::h2sin)
MIGRAPHX_DEVICE_MATH_HALF2(sqrt, ::h2sqrt) MIGRAPHX_DEVICE_MATH_HALF2(sqrt, ::h2sqrt)
MIGRAPHX_DEVICE_MATH_HALF2(isinf, ::__hisinf2)
MIGRAPHX_DEVICE_MATH_HALF2(isnan, ::__hisnan2)
template <class T, class U> template <class T, class U>
constexpr auto where(bool cond, const T& a, const U& b) constexpr auto where(bool cond, const T& a, const U& b)
...@@ -218,6 +218,14 @@ constexpr auto min(const T& a, const U& b) ...@@ -218,6 +218,14 @@ constexpr auto min(const T& a, const U& b)
return min<common_type_t<T, U>>(a, b); return min<common_type_t<T, U>>(a, b);
} }
// Sin for half is broken on hip, so use cos instead
template <class T, MIGRAPHX_REQUIRES(is_same<vec_type<T>, half>{})>
constexpr T sin(T x)
{
constexpr const T shift = M_PI_2;
return migraphx::cos(shift - x);
}
MIGRAPHX_DEVICE_MATH_VEC(abs) MIGRAPHX_DEVICE_MATH_VEC(abs)
MIGRAPHX_DEVICE_MATH_VEC(acos) MIGRAPHX_DEVICE_MATH_VEC(acos)
MIGRAPHX_DEVICE_MATH_VEC(acosh) MIGRAPHX_DEVICE_MATH_VEC(acosh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment