Commit ca15d48a authored by Khalique's avatar Khalique
Browse files

Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/MIGraph into globalavgpool

parents a3a89d67 f76e2490
...@@ -20,6 +20,9 @@ add_library(migraph ...@@ -20,6 +20,9 @@ add_library(migraph
rocm_clang_tidy_check(migraph) rocm_clang_tidy_check(migraph)
target_include_directories(migraph PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraph PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
find_path(HALF_INCLUDE_DIR half.hpp)
target_include_directories(migraph SYSTEM PUBLIC ${HALF_INCLUDE_DIR})
add_subdirectory(onnx) add_subdirectory(onnx)
add_subdirectory(targets/cpu) add_subdirectory(targets/cpu)
if(MIGRAPH_ENABLE_GPU) if(MIGRAPH_ENABLE_GPU)
......
...@@ -3,23 +3,24 @@ ...@@ -3,23 +3,24 @@
#include <migraph/argument.hpp> #include <migraph/argument.hpp>
#include <migraph/literal.hpp> #include <migraph/literal.hpp>
#include <migraph/type_traits.hpp>
#include <random> #include <random>
namespace migraph { namespace migraph {
template <class T, MIGRAPH_REQUIRES(std::is_floating_point<T>{})> template <class T, MIGRAPH_REQUIRES(is_floating_point<T>{})>
constexpr T normalize(unsigned long z) constexpr T normalize(unsigned long z)
{ {
if(z == 0) if(z == 0)
return 0; return T(0);
const auto max = 32; const auto max = 32;
const double range = max / 2; // NOLINT const double range = max / 2; // NOLINT
double result = (z % max) / range; double result = (z % max) / range;
result -= 1; result -= 1;
return result; return T(result);
} }
template <class T, MIGRAPH_REQUIRES(std::is_signed<T>{} and not std::is_floating_point<T>{})> template <class T, MIGRAPH_REQUIRES(is_signed<T>{} and not is_floating_point<T>{})>
constexpr T normalize(unsigned long z) constexpr T normalize(unsigned long z)
{ {
const auto max = std::numeric_limits<T>::max(); const auto max = std::numeric_limits<T>::max();
...@@ -27,7 +28,7 @@ constexpr T normalize(unsigned long z) ...@@ -27,7 +28,7 @@ constexpr T normalize(unsigned long z)
return half_max - (z % max); return half_max - (z % max);
} }
template <class T, MIGRAPH_REQUIRES(not std::is_signed<T>{} and std::is_integral<T>{})> template <class T, MIGRAPH_REQUIRES(not is_signed<T>{} and std::is_integral<T>{})>
constexpr T normalize(unsigned long z) constexpr T normalize(unsigned long z)
{ {
const auto max = std::numeric_limits<T>::max(); const auto max = std::numeric_limits<T>::max();
......
/*=============================================================================
Copyright (c) 2017 Paul Fultz II
half.hpp
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef MIGRAPH_GUARD_RTGLIB_HALF_HPP
#define MIGRAPH_GUARD_RTGLIB_HALF_HPP
#include <half.hpp>
namespace migraph {
using half = half_float::half;
namespace detail {
template <class T>
struct deduce
{
using type = T;
};
template <>
struct deduce<half_float::detail::expr>
{
using type = half;
};
} // namespace detail
template <class T>
using deduce = typename detail::deduce<T>::type;
} // namespace migraph
#endif
...@@ -20,10 +20,10 @@ struct literal : raw_data<literal> ...@@ -20,10 +20,10 @@ struct literal : raw_data<literal>
{ {
literal() {} literal() {}
template <class T> template <class U, class T = deduce<U>>
literal(T x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(shape::get_type<T>{}) literal(U x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(shape::get_type<T>{})
{ {
static_assert(std::is_trivial<T>{}, "Literals can only be trivial types"); static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
*(reinterpret_cast<T*>(buffer.get())) = x; *(reinterpret_cast<T*>(buffer.get())) = x;
} }
...@@ -31,7 +31,7 @@ struct literal : raw_data<literal> ...@@ -31,7 +31,7 @@ struct literal : raw_data<literal>
literal(const shape& s, const std::vector<T>& x) literal(const shape& s, const std::vector<T>& x)
: buffer(make_shared_array<char>(s.bytes())), m_shape(s) : buffer(make_shared_array<char>(s.bytes())), m_shape(s)
{ {
static_assert(std::is_trivial<T>{}, "Literals can only be trivial types"); static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
fill(x.begin(), x.end()); fill(x.begin(), x.end());
} }
...@@ -39,7 +39,7 @@ struct literal : raw_data<literal> ...@@ -39,7 +39,7 @@ struct literal : raw_data<literal>
literal(const shape& s, const std::initializer_list<T>& x) literal(const shape& s, const std::initializer_list<T>& x)
: buffer(make_shared_array<char>(s.bytes())), m_shape(s) : buffer(make_shared_array<char>(s.bytes())), m_shape(s)
{ {
static_assert(std::is_trivial<T>{}, "Literals can only be trivial types"); static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
fill(x.begin(), x.end()); fill(x.begin(), x.end());
} }
...@@ -101,7 +101,7 @@ literal transform(literal l, F f) ...@@ -101,7 +101,7 @@ literal transform(literal l, F f)
literal result; literal result;
l.visit([&](auto x) { l.visit([&](auto x) {
using type = std::remove_cv_t<typename decltype(x)::value_type>; using type = std::remove_cv_t<typename decltype(x)::value_type>;
std::vector<type> output(x.size(), 0.0); std::vector<type> output(x.size(), type(0));
std::transform(x.begin(), x.end(), output.begin(), f); std::transform(x.begin(), x.end(), output.begin(), f);
result = literal{l.get_shape(), output}; result = literal{l.get_shape(), output};
}); });
...@@ -115,7 +115,7 @@ literal transform(literal l1, literal l2, F f) ...@@ -115,7 +115,7 @@ literal transform(literal l1, literal l2, F f)
literal result; literal result;
visit_all(l1, l2)([&](auto x, auto y) { visit_all(l1, l2)([&](auto x, auto y) {
using type = std::remove_cv_t<typename decltype(x)::value_type>; using type = std::remove_cv_t<typename decltype(x)::value_type>;
std::vector<type> output(x.size(), 0.0); std::vector<type> output(x.size(), type(0));
std::transform(x.begin(), x.end(), y.begin(), output.begin(), f); std::transform(x.begin(), x.end(), y.begin(), output.begin(), f);
result = literal{l1.get_shape(), output}; result = literal{l1.get_shape(), output};
}); });
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include <migraph/errors.hpp> #include <migraph/errors.hpp>
#include <migraph/half.hpp>
namespace migraph { namespace migraph {
...@@ -19,6 +20,7 @@ struct shape ...@@ -19,6 +20,7 @@ struct shape
// Add new types here // Add new types here
// clang-format off // clang-format off
#define MIGRAPH_SHAPE_VISIT_TYPES(m) \ #define MIGRAPH_SHAPE_VISIT_TYPES(m) \
m(half_type, half) \
m(float_type, float) \ m(float_type, float) \
m(double_type, double) \ m(double_type, double) \
m(uint8_type, uint8_t) \ m(uint8_type, uint8_t) \
......
/*=============================================================================
Copyright (c) 2017 Paul Fultz II
type_traits.hpp
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef MIGRAPH_GUARD_RTGLIB_TYPE_TRAITS_HPP
#define MIGRAPH_GUARD_RTGLIB_TYPE_TRAITS_HPP
#include <type_traits>
#include <migraph/half.hpp>
namespace migraph {
#define MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
template <class X> \
struct trait : std::trait<X> \
{ \
}; \
\
template <> \
struct trait<T> : std::true_type \
{ \
};
MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, half)
MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_signed, half)
MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, half)
} // namespace migraph
#endif
...@@ -437,7 +437,7 @@ struct relu_op ...@@ -437,7 +437,7 @@ struct relu_op
std::string name() const { return "cpu::relu"; } std::string name() const { return "cpu::relu"; }
auto fcn() const auto fcn() const
{ {
return [](auto x) { return x > 0 ? x : 0; }; return [](auto x) { return std::max(decltype(x){0}, x); };
} }
}; };
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <migraph/gpu/device/tensor.hpp> #include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp> #include <migraph/gpu/device/launch.hpp>
#include <migraph/gpu/device/types.hpp>
#include <migraph/functional.hpp> #include <migraph/functional.hpp>
#include <migraph/ranges.hpp> #include <migraph/ranges.hpp>
...@@ -37,10 +38,10 @@ auto nary_nonstandard_impl(hipStream_t stream, F f, argument result, Arguments.. ...@@ -37,10 +38,10 @@ auto nary_nonstandard_impl(hipStream_t stream, F f, argument result, Arguments..
const auto& output_shape = result.get_shape(); const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) { visit_all(result, args...)([&](auto output, auto... inputs) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) { visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
auto data = pack( auto data = pack(std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape()},
std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape()}, inputs.data())...); device_cast(inputs.data()))...);
hip_tensor_descriptor<ndim> out_desc(output_shape); hip_tensor_descriptor<ndim> out_desc(output_shape);
auto* outp = output.data(); auto* outp = device_cast(output.data());
gs_launch(stream, output_shape.elements())([=](auto i) { gs_launch(stream, output_shape.elements())([=](auto i) {
data([&](auto&&... ps) { data([&](auto&&... ps) {
auto outidx = out_desc.multi(i); auto outidx = out_desc.multi(i);
...@@ -71,11 +72,11 @@ void trinary_broadcast_vec_impl(hipStream_t stream, ...@@ -71,11 +72,11 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
auto bdim_next_stride = bdim_stride * bdim_len; auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) { visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) {
using type = std::remove_cv_t<typename decltype(output)::value_type>; using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
auto* xp = as_vec4(input1.data()); auto* xp = as_vec4(device_cast(input1.data()));
auto* yp = as_vec4(input2.data()); auto* yp = as_vec4(device_cast(input2.data()));
auto* zp = as_vec4(input3.data()); auto* zp = as_vec4(device_cast(input3.data()));
auto* outp = as_vec4(output.data()); auto* outp = as_vec4(device_cast(output.data()));
const std::size_t vec_size = 4; const std::size_t vec_size = 4;
const std::size_t nlocal = 1024; const std::size_t nlocal = 1024;
...@@ -130,11 +131,11 @@ void trinary_broadcast_impl(hipStream_t stream, ...@@ -130,11 +131,11 @@ void trinary_broadcast_impl(hipStream_t stream,
auto bdim_next_stride = bdim_stride * bdim_len; auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) { visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) {
using type = std::remove_cv_t<typename decltype(output)::value_type>; using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
auto* xp = input1.data(); auto* xp = device_cast(input1.data());
auto* yp = input2.data(); auto* yp = device_cast(input2.data());
auto* zp = input3.data(); auto* zp = device_cast(input3.data());
auto* outp = output.data(); auto* outp = device_cast(output.data());
const std::size_t nlocal = 1024; const std::size_t nlocal = 1024;
const std::size_t nglobal = 256 * nlocal; const std::size_t nglobal = 256 * nlocal;
...@@ -177,10 +178,10 @@ void binary_broadcast_vec_impl( ...@@ -177,10 +178,10 @@ void binary_broadcast_vec_impl(
auto bdim_next_stride = bdim_stride * bdim_len; auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) { visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
using type = std::remove_cv_t<typename decltype(output)::value_type>; using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
auto* xp = as_vec4(input1.data()); auto* xp = as_vec4(device_cast(input1.data()));
auto* yp = as_vec4(input2.data()); auto* yp = as_vec4(device_cast(input2.data()));
auto* outp = as_vec4(output.data()); auto* outp = as_vec4(device_cast(output.data()));
const std::size_t vec_size = 4; const std::size_t vec_size = 4;
const std::size_t nlocal = 1024; const std::size_t nlocal = 1024;
...@@ -230,10 +231,10 @@ void binary_broadcast_impl( ...@@ -230,10 +231,10 @@ void binary_broadcast_impl(
auto bdim_next_stride = bdim_stride * bdim_len; auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) { visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
using type = std::remove_cv_t<typename decltype(output)::value_type>; using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
auto* xp = input1.data(); auto* xp = device_cast(input1.data());
auto* yp = input2.data(); auto* yp = device_cast(input2.data());
auto* outp = output.data(); auto* outp = device_cast(output.data());
const std::size_t nlocal = 1024; const std::size_t nlocal = 1024;
const std::size_t nglobal = 256 * nlocal; const std::size_t nglobal = 256 * nlocal;
...@@ -265,10 +266,10 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments. ...@@ -265,10 +266,10 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
// assert(x.get_shape().elements() == y.get_shape().elements()); // assert(x.get_shape().elements() == y.get_shape().elements());
const auto& output_shape = result.get_shape(); const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) { visit_all(result, args...)([&](auto output, auto... inputs) {
using type = std::remove_cv_t<typename decltype(output)::value_type>; using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
const std::size_t vec_size = 4; const std::size_t vec_size = 4;
auto data = pack_vec4(inputs.data()...); auto data = pack_vec4(device_cast(inputs.data())...);
auto* outp = as_vec4(output.data()); auto* outp = as_vec4(device_cast(output.data()));
gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) { gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) {
vec4<type> out = outp[i]; vec4<type> out = outp[i];
data( data(
...@@ -290,8 +291,8 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a ...@@ -290,8 +291,8 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
// assert(x.get_shape().elements() == y.get_shape().elements()); // assert(x.get_shape().elements() == y.get_shape().elements());
const auto& output_shape = result.get_shape(); const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) { visit_all(result, args...)([&](auto output, auto... inputs) {
auto data = pack(inputs.data()...); auto data = pack(device_cast(inputs.data())...);
auto* outp = output.data(); auto* outp = device_cast(output.data());
gs_launch(stream, output_shape.elements())( gs_launch(stream, output_shape.elements())(
[=](auto i) { data([&](auto... xps) { outp[i] = f(xps[i]...); }); }); [=](auto i) { data([&](auto... xps) { outp[i] = f(xps[i]...); }); });
}); });
......
/*=============================================================================
Copyright (c) 2017 Paul Fultz II
types.hpp
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#include <migraph/half.hpp>
namespace migraph {
namespace gpu {
namespace device {
using gpu_half = __fp16;
namespace detail {
template <class T>
struct device_type
{
using type = T;
};
template <>
struct device_type<half>
{
using type = gpu_half;
};
template <class T>
struct host_type
{
using type = T;
};
template <>
struct device_type<gpu_half>
{
using type = half;
};
} // namespace detail
template <class T>
using host_type = typename detail::host_type<T>::type;
template <class T>
using device_type = typename detail::device_type<T>::type;
template <class T>
host_type<T> host_cast(T x)
{
return reinterpret_cast<host_type<T>>(x);
}
template <class T>
host_type<T>* host_cast(T* x)
{
return reinterpret_cast<host_type<T>*>(x);
}
template <class T>
device_type<T> device_cast(T x)
{
return reinterpret_cast<device_type<T>>(x);
}
template <class T>
device_type<T>* device_cast(T* x)
{
return reinterpret_cast<device_type<T>*>(x);
}
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
...@@ -41,6 +41,8 @@ inline tensor_descriptor make_tensor(const migraph::shape& s) ...@@ -41,6 +41,8 @@ inline tensor_descriptor make_tensor(const migraph::shape& s)
miopenDataType_t d; miopenDataType_t d;
if(s.type() == shape::float_type) if(s.type() == shape::float_type)
d = miopenFloat; d = miopenFloat;
else if(s.type() == shape::half_type)
d = miopenHalf;
else else
MIGRAPH_THROW("Unsupported type"); MIGRAPH_THROW("Unsupported type");
miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data()); miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
......
...@@ -134,7 +134,8 @@ void verify_program() ...@@ -134,7 +134,8 @@ void verify_program()
migraph::program gpu_prog; migraph::program gpu_prog;
auto cpu_arg_f = detach_async([&] { return run_cpu<V>(cpu_prog); }); auto cpu_arg_f = detach_async([&] { return run_cpu<V>(cpu_prog); });
auto gpu_arg = run_gpu<V>(gpu_prog); auto gpu_arg = run_gpu<V>(gpu_prog);
bool passed = verify_args(migraph::get_type_name<V>(), cpu_arg_f.get(), gpu_arg); auto cpu_arg = cpu_arg_f.get();
bool passed = verify_args(migraph::get_type_name<V>(), cpu_arg, gpu_arg);
if(not passed) if(not passed)
{ {
V v; V v;
...@@ -175,6 +176,19 @@ struct test_add ...@@ -175,6 +176,19 @@ struct test_add
} }
}; };
struct test_add_half
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::half_type, {3}};
auto x = p.add_parameter("x", s);
auto y = p.add_parameter("y", s);
p.add_instruction(migraph::op::add{}, x, y);
return p;
}
};
struct test_mul struct test_mul
{ {
migraph::program create_program() const migraph::program create_program() const
...@@ -383,6 +397,20 @@ struct test_conv_relu ...@@ -383,6 +397,20 @@ struct test_conv_relu
} }
}; };
struct test_conv_relu_half
{
migraph::program create_program() const
{
migraph::program p;
auto input = p.add_parameter("x", migraph::shape{migraph::shape::half_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraph::shape{migraph::shape::half_type, {4, 3, 3, 3}});
auto conv = p.add_instruction(migraph::op::convolution{}, input, weights);
p.add_instruction(migraph::op::activation{"relu"}, conv);
return p;
}
};
struct test_add_relu struct test_add_relu
{ {
migraph::program create_program() const migraph::program create_program() const
...@@ -710,6 +738,7 @@ int main() ...@@ -710,6 +738,7 @@ int main()
verify_program<test_concat>(); verify_program<test_concat>();
verify_program<test_concat2>(); verify_program<test_concat2>();
verify_program<test_add>(); verify_program<test_add>();
verify_program<test_add_half>();
verify_program<test_mul>(); verify_program<test_mul>();
verify_program<test_scale>(); verify_program<test_scale>();
verify_program<test_triadd>(); verify_program<test_triadd>();
...@@ -725,6 +754,7 @@ int main() ...@@ -725,6 +754,7 @@ int main()
verify_program<test_conv>(); verify_program<test_conv>();
verify_program<test_conv2>(); verify_program<test_conv2>();
verify_program<test_conv_relu>(); verify_program<test_conv_relu>();
verify_program<test_conv_relu_half>();
verify_program<test_add_relu>(); verify_program<test_add_relu>();
verify_program<test_leaky_relu>(); verify_program<test_leaky_relu>();
verify_program<test_conv_pooling>(); verify_program<test_conv_pooling>();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment