Merge branch 'master' of https://github.com/ROCmSoftwarePlatform/MIGraph into globalavgpool

ca15d48a · Khalique · a3a89d67 · f76e2490 · ca15d48a · ca15d48a
Commit ca15d48a authored Nov 01, 2018 by Khalique
12 changed files
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ RadeonOpenCompute/rocm-cmake@6240bb3 --build
 ROCmSoftwarePlatform/MIOpen,http://gitlab1.amd.com/pfultz/miopen/-/archive/04a9a6a6cb3a3e9457fdc207669992c08b7df5d4/miopen-04a9a6a6cb3a3e9457fdc207669992c08b7df5d4.tar.bz2
 ROCmSoftwarePlatform/rocBLAS@v14.3.0
 blaze,https://bitbucket.org/blaze-lib/blaze/get/f0755dea0e03.tar.gz -X header -DHEADER_DIR=blaze
+half,https://github.com/pfultz2/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,6 +20,9 @@ add_library(migraph
 rocm_clang_tidy_check(migraph)
 target_include_directories(migraph PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
+find_path(HALF_INCLUDE_DIR half.hpp)
+target_include_directories(migraph SYSTEM PUBLIC ${HALF_INCLUDE_DIR})
 add_subdirectory(onnx)
 add_subdirectory(targets/cpu)
 if(MIGRAPH_ENABLE_GPU)

--- a/src/include/migraph/generate.hpp
+++ b/src/include/migraph/generate.hpp
@@ -3,23 +3,24 @@
 #include <migraph/argument.hpp>
 #include <migraph/literal.hpp>
+#include <migraph/type_traits.hpp>
 #include <random>
 namespace migraph {
-template <class T, MIGRAPH_REQUIRES(std::is_floating_point<T>{})>
+template <class T, MIGRAPH_REQUIRES(is_floating_point<T>{})>
 constexpr T normalize(unsigned long z)
 {
    if(z == 0)
-        return 0;
+        return T(0);
    const auto max     = 32;
    const double range = max / 2; // NOLINT
    double result      = (z % max) / range;
    result -= 1;
-    return result;
+    return T(result);
 }
-template <class T, MIGRAPH_REQUIRES(std::is_signed<T>{} and not std::is_floating_point<T>{})>
+template <class T, MIGRAPH_REQUIRES(is_signed<T>{} and not is_floating_point<T>{})>
 constexpr T normalize(unsigned long z)
 {
    const auto max      = std::numeric_limits<T>::max();
@@ -27,7 +28,7 @@ constexpr T normalize(unsigned long z)
    return half_max - (z % max);
 }
-template <class T, MIGRAPH_REQUIRES(not std::is_signed<T>{} and std::is_integral<T>{})>
+template <class T, MIGRAPH_REQUIRES(not is_signed<T>{} and std::is_integral<T>{})>
 constexpr T normalize(unsigned long z)
 {
    const auto max = std::numeric_limits<T>::max();

--- a/src/include/migraph/half.hpp
+++ b/src/include/migraph/half.hpp
+/*=============================================================================
+    Copyright (c) 2017 Paul Fultz II
+    half.hpp
+    Distributed under the Boost Software License, Version 1.0. (See accompanying
+    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+==============================================================================*/
+#ifndef MIGRAPH_GUARD_RTGLIB_HALF_HPP
+#define MIGRAPH_GUARD_RTGLIB_HALF_HPP
+#include <half.hpp>
+namespace migraph {
+using half = half_float::half;
+namespace detail {
+template <class T>
+struct deduce
+{
+    using type = T;
+};
+template <>
+struct deduce<half_float::detail::expr>
+{
+    using type = half;
+};
+} // namespace detail
+template <class T>
+using deduce = typename detail::deduce<T>::type;
+} // namespace migraph
+#endif
--- a/src/include/migraph/literal.hpp
+++ b/src/include/migraph/literal.hpp
@@ -20,10 +20,10 @@ struct literal : raw_data<literal>
 {
    literal() {}
-    template <class T>
+    template <class U, class T = deduce<U>>
-    literal(T x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(shape::get_type<T>{})
+    literal(U x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(shape::get_type<T>{})
    {
-        static_assert(std::is_trivial<T>{}, "Literals can only be trivial types");
+        static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
        *(reinterpret_cast<T*>(buffer.get())) = x;
    }
@@ -31,7 +31,7 @@ struct literal : raw_data<literal>
    literal(const shape& s, const std::vector<T>& x)
        : buffer(make_shared_array<char>(s.bytes())), m_shape(s)
    {
-        static_assert(std::is_trivial<T>{}, "Literals can only be trivial types");
+        static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
        fill(x.begin(), x.end());
    }
@@ -39,7 +39,7 @@ struct literal : raw_data<literal>
    literal(const shape& s, const std::initializer_list<T>& x)
        : buffer(make_shared_array<char>(s.bytes())), m_shape(s)
    {
-        static_assert(std::is_trivial<T>{}, "Literals can only be trivial types");
+        static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
        fill(x.begin(), x.end());
    }
@@ -101,7 +101,7 @@ literal transform(literal l, F f)
    literal result;
    l.visit([&](auto x) {
        using type = std::remove_cv_t<typename decltype(x)::value_type>;
-        std::vector<type> output(x.size(), 0.0);
+        std::vector<type> output(x.size(), type(0));
        std::transform(x.begin(), x.end(), output.begin(), f);
        result = literal{l.get_shape(), output};
    });
@@ -115,7 +115,7 @@ literal transform(literal l1, literal l2, F f)
    literal result;
    visit_all(l1, l2)([&](auto x, auto y) {
        using type = std::remove_cv_t<typename decltype(x)::value_type>;
-        std::vector<type> output(x.size(), 0.0);
+        std::vector<type> output(x.size(), type(0));
        std::transform(x.begin(), x.end(), y.begin(), output.begin(), f);
        result = literal{l1.get_shape(), output};
    });

--- a/src/include/migraph/shape.hpp
+++ b/src/include/migraph/shape.hpp
@@ -8,6 +8,7 @@
 #include <memory>
 #include <migraph/errors.hpp>
+#include <migraph/half.hpp>
 namespace migraph {
@@ -19,6 +20,7 @@ struct shape
 // Add new types here
 // clang-format off
 #define MIGRAPH_SHAPE_VISIT_TYPES(m) \
+    m(half_type, half) \
    m(float_type, float) \
    m(double_type, double) \
    m(uint8_type, uint8_t) \

--- a/src/include/migraph/type_traits.hpp
+++ b/src/include/migraph/type_traits.hpp
+/*=============================================================================
+    Copyright (c) 2017 Paul Fultz II
+    type_traits.hpp
+    Distributed under the Boost Software License, Version 1.0. (See accompanying
+    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+==============================================================================*/
+#ifndef MIGRAPH_GUARD_RTGLIB_TYPE_TRAITS_HPP
+#define MIGRAPH_GUARD_RTGLIB_TYPE_TRAITS_HPP
+#include <type_traits>
+#include <migraph/half.hpp>
+namespace migraph {
+#define MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
+    template <class X>                            \
+    struct trait : std::trait<X>                  \
+    {                                             \
+    };                                            \
+                                                  \
+    template <>                                   \
+    struct trait<T> : std::true_type              \
+    {                                             \
+    };
+MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, half)
+MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_signed, half)
+MIGRAPH_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, half)
+} // namespace migraph
+#endif
--- a/src/targets/cpu/cpu_lowering.cpp
+++ b/src/targets/cpu/cpu_lowering.cpp
@@ -437,7 +437,7 @@ struct relu_op
    std::string name() const { return "cpu::relu"; }
    auto fcn() const
    {
-        return [](auto x) { return x > 0 ? x : 0; };
+        return [](auto x) { return std::max(decltype(x){0}, x); };
    }
 };

--- a/src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+++ b/src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
@@ -3,6 +3,7 @@
 #include <migraph/gpu/device/tensor.hpp>
 #include <migraph/gpu/device/launch.hpp>
+#include <migraph/gpu/device/types.hpp>
 #include <migraph/functional.hpp>
 #include <migraph/ranges.hpp>
@@ -37,10 +38,10 @@ auto nary_nonstandard_impl(hipStream_t stream, F f, argument result, Arguments..
    const auto& output_shape = result.get_shape();
    visit_all(result, args...)([&](auto output, auto... inputs) {
        visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
-            auto data = pack(
+            auto data = pack(std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape()},
-                std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape()}, inputs.data())...);
+                                            device_cast(inputs.data()))...);
            hip_tensor_descriptor<ndim> out_desc(output_shape);
-            auto* outp = output.data();
+            auto* outp = device_cast(output.data());
            gs_launch(stream, output_shape.elements())([=](auto i) {
                data([&](auto&&... ps) {
                    auto outidx = out_desc.multi(i);
@@ -71,11 +72,11 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
    auto bdim_next_stride = bdim_stride * bdim_len;
    visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) {
-        using type = std::remove_cv_t<typename decltype(output)::value_type>;
+        using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
-        auto* xp   = as_vec4(input1.data());
+        auto* xp   = as_vec4(device_cast(input1.data()));
-        auto* yp   = as_vec4(input2.data());
+        auto* yp   = as_vec4(device_cast(input2.data()));
-        auto* zp   = as_vec4(input3.data());
+        auto* zp   = as_vec4(device_cast(input3.data()));
-        auto* outp = as_vec4(output.data());
+        auto* outp = as_vec4(device_cast(output.data()));
        const std::size_t vec_size     = 4;
        const std::size_t nlocal       = 1024;
@@ -130,11 +131,11 @@ void trinary_broadcast_impl(hipStream_t stream,
    auto bdim_next_stride = bdim_stride * bdim_len;
    visit_all(result, arg1, arg2, arg3)([&](auto output, auto input1, auto input2, auto input3) {
-        using type = std::remove_cv_t<typename decltype(output)::value_type>;
+        using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
-        auto* xp   = input1.data();
+        auto* xp   = device_cast(input1.data());
-        auto* yp   = input2.data();
+        auto* yp   = device_cast(input2.data());
-        auto* zp   = input3.data();
+        auto* zp   = device_cast(input3.data());
-        auto* outp = output.data();
+        auto* outp = device_cast(output.data());
        const std::size_t nlocal  = 1024;
        const std::size_t nglobal = 256 * nlocal;
@@ -177,10 +178,10 @@ void binary_broadcast_vec_impl(
    auto bdim_next_stride = bdim_stride * bdim_len;
    visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
-        using type = std::remove_cv_t<typename decltype(output)::value_type>;
+        using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
-        auto* xp   = as_vec4(input1.data());
+        auto* xp   = as_vec4(device_cast(input1.data()));
-        auto* yp   = as_vec4(input2.data());
+        auto* yp   = as_vec4(device_cast(input2.data()));
-        auto* outp = as_vec4(output.data());
+        auto* outp = as_vec4(device_cast(output.data()));
        const std::size_t vec_size     = 4;
        const std::size_t nlocal       = 1024;
@@ -230,10 +231,10 @@ void binary_broadcast_impl(
    auto bdim_next_stride = bdim_stride * bdim_len;
    visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
-        using type = std::remove_cv_t<typename decltype(output)::value_type>;
+        using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
-        auto* xp   = input1.data();
+        auto* xp   = device_cast(input1.data());
-        auto* yp   = input2.data();
+        auto* yp   = device_cast(input2.data());
-        auto* outp = output.data();
+        auto* outp = device_cast(output.data());
        const std::size_t nlocal  = 1024;
        const std::size_t nglobal = 256 * nlocal;
@@ -265,10 +266,10 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
    // assert(x.get_shape().elements() == y.get_shape().elements());
    const auto& output_shape = result.get_shape();
    visit_all(result, args...)([&](auto output, auto... inputs) {
-        using type                 = std::remove_cv_t<typename decltype(output)::value_type>;
+        using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
        const std::size_t vec_size = 4;
-        auto data                  = pack_vec4(inputs.data()...);
+        auto data                  = pack_vec4(device_cast(inputs.data())...);
-        auto* outp                 = as_vec4(output.data());
+        auto* outp                 = as_vec4(device_cast(output.data()));
        gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) {
            vec4<type> out = outp[i];
            data(
@@ -290,8 +291,8 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
    // assert(x.get_shape().elements() == y.get_shape().elements());
    const auto& output_shape = result.get_shape();
    visit_all(result, args...)([&](auto output, auto... inputs) {
-        auto data  = pack(inputs.data()...);
+        auto data  = pack(device_cast(inputs.data())...);
-        auto* outp = output.data();
+        auto* outp = device_cast(output.data());
        gs_launch(stream, output_shape.elements())(
            [=](auto i) { data([&](auto... xps) { outp[i] = f(xps[i]...); }); });
    });

--- a/src/targets/gpu/device/include/migraph/gpu/device/types.hpp
+++ b/src/targets/gpu/device/include/migraph/gpu/device/types.hpp
+/*=============================================================================
+    Copyright (c) 2017 Paul Fultz II
+    types.hpp
+    Distributed under the Boost Software License, Version 1.0. (See accompanying
+    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+==============================================================================*/
+#ifndef MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
+#define MIGRAPH_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
+#include <migraph/half.hpp>
+namespace migraph {
+namespace gpu {
+namespace device {
+using gpu_half = __fp16;
+namespace detail {
+template <class T>
+struct device_type
+{
+    using type = T;
+};
+template <>
+struct device_type<half>
+{
+    using type = gpu_half;
+};
+template <class T>
+struct host_type
+{
+    using type = T;
+};
+template <>
+struct device_type<gpu_half>
+{
+    using type = half;
+};
+} // namespace detail
+template <class T>
+using host_type = typename detail::host_type<T>::type;
+template <class T>
+using device_type = typename detail::device_type<T>::type;
+template <class T>
+host_type<T> host_cast(T x)
+{
+    return reinterpret_cast<host_type<T>>(x);
+}
+template <class T>
+host_type<T>* host_cast(T* x)
+{
+    return reinterpret_cast<host_type<T>*>(x);
+}
+template <class T>
+device_type<T> device_cast(T x)
+{
+    return reinterpret_cast<device_type<T>>(x);
+}
+template <class T>
+device_type<T>* device_cast(T* x)
+{
+    return reinterpret_cast<device_type<T>*>(x);
+}
+} // namespace device
+} // namespace gpu
+} // namespace migraph
+#endif
--- a/src/targets/gpu/include/migraph/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraph/gpu/miopen.hpp
@@ -41,6 +41,8 @@ inline tensor_descriptor make_tensor(const migraph::shape& s)
    miopenDataType_t d;
    if(s.type() == shape::float_type)
        d = miopenFloat;
+    else if(s.type() == shape::half_type)
+        d = miopenHalf;
    else
        MIGRAPH_THROW("Unsupported type");
    miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -134,7 +134,8 @@ void verify_program()
    migraph::program gpu_prog;
    auto cpu_arg_f = detach_async([&] { return run_cpu<V>(cpu_prog); });
    auto gpu_arg   = run_gpu<V>(gpu_prog);
-    bool passed    = verify_args(migraph::get_type_name<V>(), cpu_arg_f.get(), gpu_arg);
+    auto cpu_arg   = cpu_arg_f.get();
+    bool passed    = verify_args(migraph::get_type_name<V>(), cpu_arg, gpu_arg);
    if(not passed)
    {
        V v;
@@ -175,6 +176,19 @@ struct test_add
    }
 };
+struct test_add_half
+{
+    migraph::program create_program() const
+    {
+        migraph::program p;
+        migraph::shape s{migraph::shape::half_type, {3}};
+        auto x = p.add_parameter("x", s);
+        auto y = p.add_parameter("y", s);
+        p.add_instruction(migraph::op::add{}, x, y);
+        return p;
+    }
+};
 struct test_mul
 {
    migraph::program create_program() const
@@ -383,6 +397,20 @@ struct test_conv_relu
    }
 };
+struct test_conv_relu_half
+{
+    migraph::program create_program() const
+    {
+        migraph::program p;
+        auto input = p.add_parameter("x", migraph::shape{migraph::shape::half_type, {4, 3, 3, 3}});
+        auto weights =
+            p.add_parameter("w", migraph::shape{migraph::shape::half_type, {4, 3, 3, 3}});
+        auto conv = p.add_instruction(migraph::op::convolution{}, input, weights);
+        p.add_instruction(migraph::op::activation{"relu"}, conv);
+        return p;
+    }
+};
 struct test_add_relu
 {
    migraph::program create_program() const
@@ -710,6 +738,7 @@ int main()
    verify_program<test_concat>();
    verify_program<test_concat2>();
    verify_program<test_add>();
+    verify_program<test_add_half>();
    verify_program<test_mul>();
    verify_program<test_scale>();
    verify_program<test_triadd>();
@@ -725,6 +754,7 @@ int main()
    verify_program<test_conv>();
    verify_program<test_conv2>();
    verify_program<test_conv_relu>();
+    verify_program<test_conv_relu_half>();
    verify_program<test_add_relu>();
    verify_program<test_leaky_relu>();
    verify_program<test_conv_pooling>();