Commit a797f890 authored by Paul Fultz II's avatar Paul Fultz II Committed by mvermeulen
Browse files

Fix bug in bert accuraccy (#385)

* Fix bug in bert accuraccy

* Formatting

* add another test

* Fix add and overflow

* Formatting

* Fix bug in shape_for_each

* Use front instead of iterator

* Use result.front()

* Split add_unary files

* Formatting

* Fix incorrect last index

* Remove comment

* Inline function

* Fix carry check

* Fix metadata errors

* Formatting

* Reflow

* Reflow
parent a625f7b4
......@@ -14,11 +14,12 @@ void shape_for_each(const migraphx::shape& s, F f)
// Ensure calls to f use const ref to vector
auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
std::vector<std::size_t> indices(s.lens().size());
for(std::size_t i = 0; i < s.elements(); i++)
shape ss{s.type(), s.lens()};
for(std::size_t i = 0; i < ss.elements(); i++)
{
std::transform(s.strides().begin(),
s.strides().end(),
s.lens().begin(),
std::transform(ss.strides().begin(),
ss.strides().end(),
ss.lens().begin(),
indices.begin(),
[&](std::size_t stride, std::size_t len) {
assert(len > 0 and stride > 0);
......
......@@ -945,7 +945,7 @@ struct onnx_parser
l_val.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
// l_val contains only one element
std::vector<val_type> out_vec(s.elements(), *val.begin());
std::vector<val_type> out_vec(s.elements(), val.front());
l_out = literal(s, out_vec);
});
......
......@@ -32,8 +32,10 @@ struct shape_impl
assert(m_lens.size() == m_strides.size());
// assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and
// "At least one stride must be non-zero");
m_standard = this->elements() == this->element_space() and
std::is_sorted(m_strides.rbegin(), m_strides.rend());
m_standard =
this->elements() == this->element_space() and
std::is_sorted(m_strides.rbegin(), m_strides.rend()) and
std::none_of(m_strides.begin(), m_strides.end(), [](auto x) { return x == 0; });
}
shape::type_t m_type;
std::vector<std::size_t> m_lens;
......@@ -160,7 +162,21 @@ bool shape::packed() const { return this->elements() == this->element_space(); }
bool shape::transposed() const
{
if(this->broadcasted())
{
// TODO: Use a filter_iterator instead
std::vector<std::size_t> s;
s.reserve(this->strides().size());
std::copy_if(this->strides().begin(),
this->strides().end(),
std::back_inserter(s),
[](std::size_t x) { return x != 0; });
return not std::is_sorted(s.rbegin(), s.rend());
}
else
{
return not std::is_sorted(this->strides().rbegin(), this->strides().rend());
}
}
bool shape::broadcasted() const
......
......@@ -11,51 +11,55 @@ if(NOT TARGET MIOpen)
endif()
add_library(migraphx_device
device/acos.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/max.cpp
device/min.cpp
device/mul_add.cpp
device/exp.cpp
device/erf.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/tanh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/relu.cpp
device/add_unary.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/sigmoid.cpp
device/convert.cpp
device/mul.cpp
device/concat.cpp
device/pad.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/erf.cpp
device/exp.cpp
device/floor.cpp
device/gather.cpp
device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp
device/clip.cpp
device/reduce_sum.cpp
device/rsqrt.cpp
device/round.cpp
device/sqrt.cpp
device/log.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/pad.cpp
device/pow.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_max.cpp
device/pow.cpp
device/sqdiff.cpp
device/reduce_sum.cpp
device/relu.cpp
device/round.cpp
device/rsqrt.cpp
device/sigmoid.cpp
device/sign.cpp
device/ceil.cpp
device/floor.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${PROJECT_VERSION})
......
#include <migraphx/gpu/device/add_unary.hpp>
#include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
......@@ -6,16 +6,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
}
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -28,32 +18,6 @@ void add_clip(hipStream_t stream,
});
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max<decltype(x + y)>(0, x + y); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y)))); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return ::tanh(to_hip_type(x + y)); });
}
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -67,36 +31,6 @@ void add_clip(hipStream_t stream,
});
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return std::max<decltype(x + y + z)>(0, x + y + z); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y + z)))); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return ::tanh(to_hip_type(x + y + z)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max<decltype(x + y)>(0, x + y); });
}
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return std::max<decltype(x + y + z)>(0, x + y + z); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y)))); });
}
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return 1.f / (1.f + ::exp(to_hip_type(-(x + y + z)))); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return ::tanh(to_hip_type(x + y)); });
}
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto y, auto z) { return ::tanh(to_hip_type(x + y + z)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -135,20 +135,21 @@ struct hip_array
MIGRAPHX_DEVICE_CONSTEXPR hip_array carry(hip_array result) const
{
std::ptrdiff_t rem = 0;
for(std::ptrdiff_t i = result.size() - 1; i >= 0; i--)
uint32_t overflow = 0;
for(std::ptrdiff_t i = result.size() - 1; i > 0; i--)
{
auto z = result[i] + rem;
rem = z - std::ptrdiff_t(d[i]) + 1;
if(rem > 0)
z -= rem;
else
rem = 0;
auto z = result[i] + overflow;
// Reset overflow
overflow = 0;
// Compute overflow using while loop instead of mod
while(z >= d[i])
{
z -= d[i];
overflow += 1;
}
result[i] = z;
}
// Add overflows to the back
if(rem > 0)
result.back() += rem;
result[0] += overflow;
return result;
}
};
......
......@@ -9,7 +9,7 @@ namespace gpu {
namespace device {
constexpr const std::size_t fast_div_shift = 42;
MIGRAPHX_DEVICE_CONSTEXPR std::size_t encode_divisor(std::size_t divisor)
inline std::size_t encode_divisor(std::size_t divisor)
{
if(divisor == 0)
return 0;
......@@ -19,7 +19,7 @@ MIGRAPHX_DEVICE_CONSTEXPR std::size_t encode_divisor(std::size_t divisor)
inline constexpr bool is_divisor_encodable(std::size_t i)
{
return i < std::size_t{1} << (fast_div_shift / 2);
return i < (std::size_t{1} << (fast_div_shift / 2));
}
MIGRAPHX_DEVICE_CONSTEXPR std::size_t fast_div(std::size_t dividend, std::size_t encoded_divisor)
......
......@@ -15,18 +15,12 @@ struct multi_index
{
using hip_index = hip_array<std::size_t, N>;
hip_index id{};
std::size_t stride = 0;
MIGRAPHX_DEVICE_CONSTEXPR hip_index add_stride(hip_index i) const
{
i.back() += stride;
return i;
}
hip_index stride{};
template <class F>
MIGRAPHX_DEVICE_CONSTEXPR void for_stride(hip_index n, F f) const
{
for(hip_index i = id; i < n; i = n.carry(add_stride(i)))
for(hip_index i = id; i < n; i = n.carry(i + stride))
{
f(i);
}
......@@ -37,7 +31,7 @@ template <std::size_t N>
MIGRAPHX_DEVICE_CONSTEXPR multi_index<N>
make_multi_index(const hip_shape<N>& s, std::size_t i, std::size_t n)
{
return {s.multi(i), n};
return {s.multi(i), s.multi(n)};
}
template <std::size_t N>
......@@ -51,13 +45,22 @@ template <std::size_t N>
inline auto mi_launch(hipStream_t stream, const hip_shape<N>& s, std::size_t local = 1024)
{
assert(s.standard);
assert(s.elements() > 0);
std::size_t n = s.elements();
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(128, groups) * local;
assert(groups > 0);
assert(nglobal > 0);
auto nglobal_multi = s.multi(nglobal);
// Skip checking this, since this will cause metadata to not be generated
// for some unknown reason.
//
// assert(std::any_of(nglobal_multi.begin(), nglobal_multi.end(), [](auto x){return x>0;}));
return [=](auto f) {
launch(stream, nglobal, local)([=](auto idx) {
auto midx = make_multi_index(s, idx.global, nglobal);
auto midx = make_multi_index(s, idx.global, nglobal_multi);
midx.for_stride(s.lens, [&](auto i) { f(i); });
});
};
......
......@@ -304,7 +304,8 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
MIGRAPHX_TRACE_NARY_FUNCTION
const auto shapes = make_array(args.get_shape()...);
const bool standard = all_of(shapes, [](const shape& s) { return s.standard(); });
const bool packed = all_of(shapes, [](const shape& s) { return s.packed(); });
const bool packed =
all_of(shapes, [](const shape& s) { return s.packed() and not s.broadcasted(); });
const bool same_shapes =
all_of(shapes, [&](const shape& s) { return s == result.get_shape(); });
const bool same_input_shapes = all_of(shapes, [&](const shape& s) { return s == shapes[0]; });
......
......@@ -70,14 +70,15 @@ struct hip_shape
{
hip_index result;
std::size_t tidx = idx;
for(std::ptrdiff_t is = result.size() - 1; is >= 0; is--)
for(std::ptrdiff_t is = result.size() - 1; is > 0; is--)
{
// result[is] = tidx % lens[is];
// tidx = tdix / lens[is];
// tidx = tidx / lens[is];
auto q = fast_div(tidx, divs[is]);
result[is] = remainder(q, tidx, lens[is]);
tidx = q;
}
result[0] = tidx;
return result;
}
};
......
#include <migraphx/gpu/device/add_unary.hpp>
#include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
......
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -5,7 +5,11 @@
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/mul_add.hpp>
#include <migraphx/gpu/device/add_unary.hpp>
#include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/add_relu.hpp>
#include <migraphx/gpu/device/add_sigmoid.hpp>
#include <migraphx/gpu/device/add_tanh.hpp>
#include <migraphx/gpu/device/mul_add_relu.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/array.hpp>
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_UNARY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_UNARY_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
......@@ -11,12 +11,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void mul_add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -24,21 +18,6 @@ void add_clip(hipStream_t stream,
float max,
float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_clip(hipStream_t stream,
const argument& result,
const argument& arg1,
......@@ -47,24 +26,6 @@ void add_clip(hipStream_t stream,
float max,
float min);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_SIGMOID_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_sigmoid(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_TANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ADD_TANH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_tanh(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment