Merge branch 'int8_miopen_call' into op_capture

c321dd0c · Shucai Xiao · 36ab4520 · e0e28ef7 · c321dd0c · c321dd0c
Commit c321dd0c authored Aug 16, 2019 by Shucai Xiao
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -125,6 +125,8 @@ rocm_enable_cppcheck(
        functionConst:*program.*
        shadowFunction
        shadowVar
+        shadowVariable
+        unsafeClassDivZero
        definePrefix:*test/include/test.hpp
    FORCE
    INCONCLUSIVE

--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -99,13 +99,14 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build ->
            | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-5.0 -style=file {} | diff - {}\'
        '''
    }
-}, clang: rocmnode('vega') { cmake_build ->
+}, clang_debug: rocmnode('vega') { cmake_build ->
    stage('Clang Debug') {
-        // TODO: Enanle integer
+        // TODO: Enable integer
        def sanitizers = "undefined"
        def debug_flags = "-g -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
        cmake_build("hcc", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'")
    }
+}, clang_release: rocmnode('vega') { cmake_build ->
    stage('Clang Release') {
        cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release")
    }

--- a/dev-requirements.txt
+++ b/dev-requirements.txt
 pfultz2/rocm-recipes
-danmar/cppcheck@8aa68ee297c2d9ebadf5bcfd00c66ea8d9291e35 -DHAVE_RULES=1
+danmar/cppcheck@ef714225bb31e9a76ac2484796763572386955ae -DHAVE_RULES=1
 ROCm-Developer-Tools/HIP@2490e42baa7d90458f0632fd9fbead2d395f41b9
 python/cpython@v3.6.6 -X autotools -H sha256:92aa914572c695c0aeb01b0a214813f414da4b51a371234df514a74761f2bb36
 -f requirements.txt
--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -353,14 +353,14 @@ MIGRAPHX_PRED_MATCHER(same_input_shapes, instruction_ref ins)
        ins->inputs().begin(), ins->inputs().end(), [&](auto x) { return x->get_shape() == s; });
 }

-MIGRAPHX_BASIC_MATCHER(output, matcher_context& ctx, instruction_ref ins)
+MIGRAPHX_BASIC_MATCHER(output, const matcher_context& ctx, instruction_ref ins)
 {
    if(ins->outputs().size() == 1)
        return ins->outputs().front();
    return ctx.not_found();
 }

-MIGRAPHX_BASIC_MATCHER(used_once, matcher_context& ctx, instruction_ref ins)
+MIGRAPHX_BASIC_MATCHER(used_once, const matcher_context& ctx, instruction_ref ins)
 {
    if(ins->outputs().size() == 1)
        return ins;
@@ -411,7 +411,7 @@ inline auto nargs(std::size_t n)

 inline auto arg(std::size_t i)
 {
-    return make_basic_fun_matcher([=](matcher_context& ctx, instruction_ref ins) {
+    return make_basic_fun_matcher([=](const matcher_context& ctx, instruction_ref ins) {
        if(i < ins->inputs().size())
            return ins->inputs()[i];
        return ctx.not_found();

--- a/src/include/migraphx/verify.hpp
+++ b/src/include/migraphx/verify.hpp
@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
 {
    double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance;
    auto error       = rms_range(r1, r2);
+    // cppcheck-suppress uninitvar
    if(out_error != nullptr)
        *out_error = error;
    return error <= threshold;

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -1011,9 +1011,10 @@ struct onnx_parser
        }

        std::vector<operation> vec_actv_funcs(vec_names.size());
-        std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& fn) {
-            return map_actv_funcs[fn];
-        });
+        std::transform(vec_names.begin(),
+                       vec_names.end(),
+                       vec_actv_funcs.begin(),
+                       [&](const auto& fn) { return map_actv_funcs[fn]; });

        // To be added later
        float clip = 0.0;
@@ -1127,9 +1128,10 @@ struct onnx_parser
        }

        std::vector<operation> vec_actv_funcs(vec_names.size());
-        std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) {
-            return map_actv_funcs[name];
-        });
+        std::transform(vec_names.begin(),
+                       vec_names.end(),
+                       vec_actv_funcs.begin(),
+                       [&](const auto& name) { return map_actv_funcs[name]; });

        float clip = 0.0;
        if(contains(attributes, "clip"))
@@ -1299,9 +1301,10 @@ struct onnx_parser
        }

        std::vector<operation> vec_actv_funcs(vec_names.size());
-        std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) {
-            return map_actv_funcs[name];
-        });
+        std::transform(vec_names.begin(),
+                       vec_names.end(),
+                       vec_actv_funcs.begin(),
+                       [&](const auto& name) { return map_actv_funcs[name]; });

        float clip = 0.0;
        if(contains(attributes, "clip"))

--- a/src/opt/memory_coloring_impl.hpp
+++ b/src/opt/memory_coloring_impl.hpp
@@ -107,7 +107,7 @@ struct memory_coloring_impl
        return ins->name() == "check_context";
    }

-    static bool is_disjoin(live_range& range1, live_range& range2)
+    static bool is_disjoin(const live_range& range1, const live_range& range2)
    {
        if((range1.size == 0) || (range2.size == 0))
            return false;

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -242,7 +242,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
    // TODO: Check every element
    assert(has_instruction(first));
    std::for_each(first, last, [&](instruction& ins) { ins.clear_arguments(); });
-    assert(std::all_of(first, last, [&](instruction& ins) { return ins.outputs().empty(); }));
+    assert(std::all_of(first, last, [&](const instruction& ins) { return ins.outputs().empty(); }));
    return impl->instructions.erase(first, last);
 }


--- a/src/rewrite_rnn.cpp
+++ b/src/rewrite_rnn.cpp
@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
    std::vector<float> ihc_data(ihc_shape.elements(), 0.0);

    migraphx::shape pph_shape{type, {1, 3 * hidden_size}};
-    std::vector<float> pph_data(pph_shape.elements(), 0.0);

    auto actv_funcs         = lstm_actv_funcs(ins);
    auto lstm_op            = any_cast<op::lstm>(ins->get_operator());

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -302,17 +302,17 @@ struct cpu_im2col
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

-            auto kdiv2_h = kernel_h / 2;
-            auto kdiv2_w = kernel_w / 2;
+            long kdiv2_h = long(kernel_h) / 2;
+            long kdiv2_w = long(kernel_w) / 2;
            // calculate output sizes
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
            // account for padding for the starting position of the input pixels
-            std::size_t iinput = kdiv2_h - pad_h;
+            long iinput = kdiv2_h - long(pad_h);
            // loop over output pixels (ioutput, joutput)
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
-                std::size_t jinput = kdiv2_w - pad_w;
+                long jinput = kdiv2_w - long(pad_w);
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
@@ -321,8 +321,8 @@ struct cpu_im2col
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
-                        auto idx    = iinput + koffset - kdiv2_h;
-                        auto jdx    = jinput + loffset - kdiv2_w;
+                        auto idx    = iinput + long(koffset) - kdiv2_h;
+                        auto jdx    = jinput + long(loffset) - kdiv2_w;
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
@@ -589,7 +589,7 @@ struct leaky_relu_op
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
-        auto& a = op.alpha;
+        auto a = op.alpha;
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
 };
@@ -600,7 +600,7 @@ struct elu_op
    std::string name() const { return "cpu::elu"; }
    auto fcn() const
    {
-        auto& a = op.alpha;
+        auto a = op.alpha;
        return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
    }
 };

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -24,9 +24,11 @@ add_library(migraphx_device
    device/tan.cpp
    device/sinh.cpp
    device/cosh.cpp
+    device/tanh.cpp
    device/asin.cpp
    device/acos.cpp
    device/atan.cpp
+    device/relu.cpp
    device/add_relu.cpp
    device/contiguous.cpp
    device/logsoftmax.cpp
@@ -71,9 +73,7 @@ add_library(migraphx_gpu
    logsoftmax.cpp
    contiguous.cpp
    concat.cpp
-    relu.cpp
    leaky_relu.cpp
-    tanh.cpp
    batchnorm.cpp
    write_literals.cpp
    rocblas.cpp

--- a/src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)

 __device__ inline void dpp_reduce(float& x, sum)
 {
-#ifdef MIGRAPHX_USE_CLANG_TIDY
-    (void)x;
+#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
+    x = 1;
 #else
    __asm__ volatile("s_nop 4\n"
                     "v_add_f32 %0 %0 %0 row_shr:1\n"

--- a/src/targets/gpu/device/relu.cpp
+++ b/src/targets/gpu/device/relu.cpp
+#include <migraphx/gpu/device/relu.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void relu(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return std::max<decltype(x)>(0, x); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/tanh.cpp
+++ b/src/targets/gpu/device/tanh.cpp
+#include <migraphx/gpu/device/tanh.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+#include <migraphx/gpu/device/types.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void tanh(hipStream_t stream, const argument& result, const argument& arg)
+{
+    nary(stream, result, arg)([](auto x) { return ::tanh(to_hip_type(x)); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/device/relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/relu.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_RELU_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_RELU_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void relu(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/tanh.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TANH_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TANH_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void tanh(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/relu.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_RELU_HPP
 #define MIGRAPHX_GUARD_RTGLIB_RELU_HPP

-#include <migraphx/shape.hpp>
-#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/relu.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -10,24 +10,8 @@ namespace gpu {

 struct context;

-struct miopen_relu
+struct hip_relu : unary_device<hip_relu, device::relu>
 {
-    shared<activation_descriptor> ad;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return gpu::reflect(self.ad.get(), f);
-    }
-
-    std::string name() const { return "gpu::relu"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tanh.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_TANH_HPP
 #define MIGRAPHX_GUARD_RTGLIB_TANH_HPP

-#include <migraphx/shape.hpp>
-#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/tanh.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-struct context;
-
-struct miopen_tanh
+struct hip_tanh : unary_device<hip_tanh, device::tanh>
 {
-    shared<activation_descriptor> ad;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return gpu::reflect(self.ad.get(), f);
-    }
-
-    std::string name() const { return "gpu::tanh"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
 };

 } // namespace gpu

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -86,10 +86,8 @@ struct miopen_apply
    void init()
    {
        this->last = instruction::get_output_alias(std::prev(prog->end()));
-        add_miopen_simple_op<miopen_relu>("relu", make_relu);
        add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
        add_miopen_simple_op<miopen_abs>("abs", make_abs);
-        add_miopen_simple_op<miopen_tanh>("tanh", make_tanh);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);
@@ -104,6 +102,7 @@ struct miopen_apply
        add_generic_op<hip_tan>("tan");
        add_generic_op<hip_sinh>("sinh");
        add_generic_op<hip_cosh>("cosh");
+        add_generic_op<hip_tanh>("tanh");
        add_generic_op<hip_asin>("asin");
        add_generic_op<hip_acos>("acos");
        add_generic_op<hip_atan>("atan");
@@ -115,6 +114,7 @@ struct miopen_apply
        add_generic_op<hip_rsqrt>("rsqrt");
        add_generic_op<hip_pow>("pow");
        add_generic_op<hip_sqdiff>("sqdiff");
+        add_generic_op<hip_relu>("relu");
        add_generic_op<hip_sign>("sign");

        add_extend_op<miopen_gemm, op::dot>("dot");

--- a/src/targets/gpu/relu.cpp
+++ b/src/targets/gpu/relu.cpp
-#include <migraphx/gpu/relu.hpp>
-#include <migraphx/gpu/context.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-shape miopen_relu::compute_shape(const std::vector<shape>& inputs) const
-{
-    check_shapes{inputs, *this}.has(2).not_broadcasted();
-    return inputs.at(1);
-}
-
-argument miopen_relu::compute(context& ctx,
-                              const shape& output_shape,
-                              const std::vector<argument>& args) const
-{
-    float alpha = 1;
-    float beta  = 0;
-    auto x_desc = make_tensor(args[0].get_shape());
-    auto y_desc = make_tensor(output_shape);
-    miopenActivationForward(ctx.get_stream().get_miopen(),
-                            ad.get(),
-                            &alpha,
-                            x_desc.get(),
-                            args[0].implicit(),
-                            &beta,
-                            y_desc.get(),
-                            args[1].implicit());
-
-    return args[1];
-}
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx