Commit a797f890 authored by Paul Fultz II's avatar Paul Fultz II Committed by mvermeulen
Browse files

Fix bug in bert accuraccy (#385)

* Fix bug in bert accuraccy

* Formatting

* add another test

* Fix add and overflow

* Formatting

* Fix bug in shape_for_each

* Use front instead of iterator

* Use result.front()

* Split add_unary files

* Formatting

* Fix incorrect last index

* Remove comment

* Inline function

* Fix carry check

* Fix metadata errors

* Formatting

* Reflow

* Reflow
parent a625f7b4
...@@ -14,11 +14,12 @@ void shape_for_each(const migraphx::shape& s, F f) ...@@ -14,11 +14,12 @@ void shape_for_each(const migraphx::shape& s, F f)
// Ensure calls to f use const ref to vector // Ensure calls to f use const ref to vector
auto call = [&f](const std::vector<std::size_t>& i) { f(i); }; auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
std::vector<std::size_t> indices(s.lens().size()); std::vector<std::size_t> indices(s.lens().size());
for(std::size_t i = 0; i < s.elements(); i++) shape ss{s.type(), s.lens()};
for(std::size_t i = 0; i < ss.elements(); i++)
{ {
std::transform(s.strides().begin(), std::transform(ss.strides().begin(),
s.strides().end(), ss.strides().end(),
s.lens().begin(), ss.lens().begin(),
indices.begin(), indices.begin(),
[&](std::size_t stride, std::size_t len) { [&](std::size_t stride, std::size_t len) {
assert(len > 0 and stride > 0); assert(len > 0 and stride > 0);
......
...@@ -945,7 +945,7 @@ struct onnx_parser ...@@ -945,7 +945,7 @@ struct onnx_parser
l_val.visit([&](auto val) { l_val.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>; using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
// l_val contains only one element // l_val contains only one element
std::vector<val_type> out_vec(s.elements(), *val.begin()); std::vector<val_type> out_vec(s.elements(), val.front());
l_out = literal(s, out_vec); l_out = literal(s, out_vec);
}); });
......
...@@ -32,8 +32,10 @@ struct shape_impl ...@@ -32,8 +32,10 @@ struct shape_impl
assert(m_lens.size() == m_strides.size()); assert(m_lens.size() == m_strides.size());
// assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and // assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and
// "At least one stride must be non-zero"); // "At least one stride must be non-zero");
m_standard = this->elements() == this->element_space() and m_standard =
std::is_sorted(m_strides.rbegin(), m_strides.rend()); this->elements() == this->element_space() and
std::is_sorted(m_strides.rbegin(), m_strides.rend()) and
std::none_of(m_strides.begin(), m_strides.end(), [](auto x) { return x == 0; });
} }
shape::type_t m_type; shape::type_t m_type;
std::vector<std::size_t> m_lens; std::vector<std::size_t> m_lens;
...@@ -160,7 +162,21 @@ bool shape::packed() const { return this->elements() == this->element_space(); } ...@@ -160,7 +162,21 @@ bool shape::packed() const { return this->elements() == this->element_space(); }
bool shape::transposed() const bool shape::transposed() const
{ {
if(this->broadcasted())
{
// TODO: Use a filter_iterator instead
std::vector<std::size_t> s;
s.reserve(this->strides().size());
std::copy_if(this->strides().begin(),
this->strides().end(),
std::back_inserter(s),
[](std::size_t x) { return x != 0; });
return not std::is_sorted(s.rbegin(), s.rend());
}
else
{
return not std::is_sorted(this->strides().rbegin(), this->strides().rend()); return not std::is_sorted(this->strides().rbegin(), this->strides().rend());
}
} }
bool shape::broadcasted() const bool shape::broadcasted() const
......
...@@ -11,51 +11,55 @@ if(NOT TARGET MIOpen) ...@@ -11,51 +11,55 @@ if(NOT TARGET MIOpen)
endif() endif()
add_library(migraphx_device add_library(migraphx_device
device/acos.cpp
device/add.cpp device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp device/argmax.cpp
device/argmin.cpp device/argmin.cpp
device/max.cpp
device/min.cpp
device/mul_add.cpp
device/exp.cpp
device/erf.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/tanh.cpp
device/asin.cpp device/asin.cpp
device/acos.cpp
device/atan.cpp device/atan.cpp
device/relu.cpp device/ceil.cpp
device/add_unary.cpp device/clip.cpp
device/concat.cpp
device/contiguous.cpp device/contiguous.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/sigmoid.cpp
device/convert.cpp device/convert.cpp
device/mul.cpp device/cos.cpp
device/concat.cpp device/cosh.cpp
device/pad.cpp device/div.cpp
device/erf.cpp
device/exp.cpp
device/floor.cpp
device/gather.cpp device/gather.cpp
device/sub.cpp
device/int8_gemm_pack.cpp device/int8_gemm_pack.cpp
device/div.cpp device/log.cpp
device/clip.cpp device/logsoftmax.cpp
device/reduce_sum.cpp device/max.cpp
device/rsqrt.cpp device/min.cpp
device/round.cpp device/mul.cpp
device/sqrt.cpp device/mul_add.cpp
device/mul_add_relu.cpp
device/pad.cpp
device/pow.cpp
device/reduce_max.cpp
device/reduce_mean.cpp device/reduce_mean.cpp
device/reduce_min.cpp device/reduce_min.cpp
device/reduce_max.cpp device/reduce_sum.cpp
device/pow.cpp device/relu.cpp
device/sqdiff.cpp device/round.cpp
device/rsqrt.cpp
device/sigmoid.cpp
device/sign.cpp device/sign.cpp
device/ceil.cpp device/sin.cpp
device/floor.cpp device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${PROJECT_VERSION}) rocm_set_soversion(migraphx_device ${PROJECT_VERSION})
......
#include <migraphx/gpu/device/add_unary.hpp> #include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraphx { namespace migraphx {
...@@ -6,16 +6,6 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -6,16 +6,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
}
void add_clip(hipStream_t stream, void add_clip(hipStream_t stream,
const argument& result, const argument& result,
const argument& arg1, const argument& arg1,
...@@ -28,32 +18,6 @@ void add_clip(hipStream_t stream, ...@@ -28,32 +18,6 @@ void add_clip(hipStream_t stream,
}); });
} }
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max<decltype(x + y)>(0, x + y); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y)))); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return ::tanh(to_hip_type(x + y)); });
}
void add_clip(hipStream_t stream, void add_clip(hipStream_t stream,
const argument& result, const argument& result,
const argument& arg1, const argument& arg1,
...@@ -67,36 +31,6 @@ void add_clip(hipStream_t stream, ...@@ -67,36 +31,6 @@ void add_clip(hipStream_t stream,
}); });
} }
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return std::max<decltype(x + y + z)>(0, x + y + z); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y + z)))); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return ::tanh(to_hip_type(x + y + z)); });
}
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max<decltype(x + y)>(0, x + y); });
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return std::max<decltype(x + y + z)>(0, x + y + z); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y)))); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y + z)))); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return ::tanh(to_hip_type(x + y)); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return ::tanh(to_hip_type(x + y + z)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -135,20 +135,21 @@ struct hip_array ...@@ -135,20 +135,21 @@ struct hip_array
MIGRAPHX_DEVICE_CONSTEXPR hip_array carry(hip_array result) const MIGRAPHX_DEVICE_CONSTEXPR hip_array carry(hip_array result) const
{ {
std::ptrdiff_t rem = 0; uint32_t overflow = 0;
for(std::ptrdiff_t i = result.size() - 1; i >= 0; i--) for(std::ptrdiff_t i = result.size() - 1; i > 0; i--)
{ {
auto z = result[i] + rem; auto z = result[i] + overflow;
rem = z - std::ptrdiff_t(d[i]) + 1; // Reset overflow
if(rem > 0) overflow = 0;
z -= rem; // Compute overflow using while loop instead of mod
else while(z >= d[i])
rem = 0; {
z -= d[i];
overflow += 1;
}
result[i] = z; result[i] = z;
} }
// Add overflows to the back result[0] += overflow;
if(rem > 0)
result.back() += rem;
return result; return result;
} }
}; };
......
...@@ -9,7 +9,7 @@ namespace gpu { ...@@ -9,7 +9,7 @@ namespace gpu {
namespace device { namespace device {
constexpr const std::size_t fast_div_shift = 42; constexpr const std::size_t fast_div_shift = 42;
MIGRAPHX_DEVICE_CONSTEXPR std::size_t encode_divisor(std::size_t divisor) inline std::size_t encode_divisor(std::size_t divisor)
{ {
if(divisor == 0) if(divisor == 0)
return 0; return 0;
...@@ -19,7 +19,7 @@ MIGRAPHX_DEVICE_CONSTEXPR std::size_t encode_divisor(std::size_t divisor) ...@@ -19,7 +19,7 @@ MIGRAPHX_DEVICE_CONSTEXPR std::size_t encode_divisor(std::size_t divisor)
inline constexpr bool is_divisor_encodable(std::size_t i) inline constexpr bool is_divisor_encodable(std::size_t i)
{ {
return i < std::size_t{1} << (fast_div_shift / 2); return i < (std::size_t{1} << (fast_div_shift / 2));
} }
MIGRAPHX_DEVICE_CONSTEXPR std::size_t fast_div(std::size_t dividend, std::size_t encoded_divisor) MIGRAPHX_DEVICE_CONSTEXPR std::size_t fast_div(std::size_t dividend, std::size_t encoded_divisor)
......
...@@ -15,18 +15,12 @@ struct multi_index ...@@ -15,18 +15,12 @@ struct multi_index
{ {
using hip_index = hip_array<std::size_t, N>; using hip_index = hip_array<std::size_t, N>;
hip_index id{}; hip_index id{};
std::size_t stride = 0; hip_index stride{};
MIGRAPHX_DEVICE_CONSTEXPR hip_index add_stride(hip_index i) const
{
i.back() += stride;
return i;
}
template <class F> template <class F>
MIGRAPHX_DEVICE_CONSTEXPR void for_stride(hip_index n, F f) const MIGRAPHX_DEVICE_CONSTEXPR void for_stride(hip_index n, F f) const
{ {
for(hip_index i = id; i < n; i = n.carry(add_stride(i))) for(hip_index i = id; i < n; i = n.carry(i + stride))
{ {
f(i); f(i);
} }
...@@ -37,7 +31,7 @@ template <std::size_t N> ...@@ -37,7 +31,7 @@ template <std::size_t N>
MIGRAPHX_DEVICE_CONSTEXPR multi_index<N> MIGRAPHX_DEVICE_CONSTEXPR multi_index<N>
make_multi_index(const hip_shape<N>& s, std::size_t i, std::size_t n) make_multi_index(const hip_shape<N>& s, std::size_t i, std::size_t n)
{ {
return {s.multi(i), n}; return {s.multi(i), s.multi(n)};
} }
template <std::size_t N> template <std::size_t N>
...@@ -51,13 +45,22 @@ template <std::size_t N> ...@@ -51,13 +45,22 @@ template <std::size_t N>
inline auto mi_launch(hipStream_t stream, const hip_shape<N>& s, std::size_t local = 1024) inline auto mi_launch(hipStream_t stream, const hip_shape<N>& s, std::size_t local = 1024)
{ {
assert(s.standard); assert(s.standard);
assert(s.elements() > 0);
std::size_t n = s.elements(); std::size_t n = s.elements();
std::size_t groups = (n + local - 1) / local; std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(128, groups) * local; std::size_t nglobal = std::min<std::size_t>(128, groups) * local;
assert(groups > 0);
assert(nglobal > 0);
auto nglobal_multi = s.multi(nglobal);
// Skip checking this, since this will cause metadata to not be generated
// for some unknown reason.
//
// assert(std::any_of(nglobal_multi.begin(), nglobal_multi.end(), [](auto x){return x>0;}));
return [=](auto f) { return [=](auto f) {
launch(stream, nglobal, local)([=](auto idx) { launch(stream, nglobal, local)([=](auto idx) {
auto midx = make_multi_index(s, idx.global, nglobal); auto midx = make_multi_index(s, idx.global, nglobal_multi);
midx.for_stride(s.lens, [&](auto i) { f(i); }); midx.for_stride(s.lens, [&](auto i) { f(i); });
}); });
}; };
......
...@@ -304,7 +304,8 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args) ...@@ -304,7 +304,8 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
MIGRAPHX_TRACE_NARY_FUNCTION MIGRAPHX_TRACE_NARY_FUNCTION
const auto shapes = make_array(args.get_shape()...); const auto shapes = make_array(args.get_shape()...);
const bool standard = all_of(shapes, [](const shape& s) { return s.standard(); }); const bool standard = all_of(shapes, [](const shape& s) { return s.standard(); });
const bool packed = all_of(shapes, [](const shape& s) { return s.packed(); }); const bool packed =
all_of(shapes, [](const shape& s) { return s.packed() and not s.broadcasted(); });
const bool same_shapes = const bool same_shapes =
all_of(shapes, [&](const shape& s) { return s == result.get_shape(); }); all_of(shapes, [&](const shape& s) { return s == result.get_shape(); });
const bool same_input_shapes = all_of(shapes, [&](const shape& s) { return s == shapes[0]; }); const bool same_input_shapes = all_of(shapes, [&](const shape& s) { return s == shapes[0]; });
......
...@@ -70,14 +70,15 @@ struct hip_shape ...@@ -70,14 +70,15 @@ struct hip_shape
{ {
hip_index result; hip_index result;
std::size_t tidx = idx; std::size_t tidx = idx;
for(std::ptrdiff_t is = result.size() - 1; is >= 0; is--) for(std::ptrdiff_t is = result.size() - 1; is > 0; is--)
{ {
// result[is] = tidx % lens[is]; // result[is] = tidx % lens[is];
// tidx = tdix / lens[is]; // tidx = tidx / lens[is];
auto q = fast_div(tidx, divs[is]); auto q = fast_div(tidx, divs[is]);
result[is] = remainder(q, tidx, lens[is]); result[is] = remainder(q, tidx, lens[is]);
tidx = q; tidx = q;
} }
result[0] = tidx;
return result; return result;
} }
}; };
......
#include <migraphx/gpu/device/add_unary.hpp> #include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraphx { namespace migraphx {
......
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -5,7 +5,11 @@ ...@@ -5,7 +5,11 @@
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/oper.hpp> #include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/mul_add.hpp> #include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/add_unary.hpp> #include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/add.hpp> #include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/array.hpp> #include <migraphx/array.hpp>
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_UNARY_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_UNARY_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
...@@ -11,12 +11,6 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -11,12 +11,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_clip(hipStream_t stream, void add_clip(hipStream_t stream,
const argument& result, const argument& result,
const argument& arg1, const argument& arg1,
...@@ -24,21 +18,6 @@ void add_clip(hipStream_t stream, ...@@ -24,21 +18,6 @@ void add_clip(hipStream_t stream,
float max, float max,
float min); float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_clip(hipStream_t stream, void add_clip(hipStream_t stream,
const argument& result, const argument& result,
const argument& arg1, const argument& arg1,
...@@ -47,24 +26,6 @@ void add_clip(hipStream_t stream, ...@@ -47,24 +26,6 @@ void add_clip(hipStream_t stream,
float max, float max,
float min); float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_SIGMOID_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_TANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_TANH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment