Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into depthwise_conv

f292aa44 · Khalique · cc320df0 · a713a6d3 · f292aa44 · f292aa44
Commit f292aa44 authored May 06, 2019 by Khalique
20 changed files
--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -9,19 +9,60 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-bool try_compute_shape(const operation& op, const std::vector<instruction_ref>& args)
+static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inputs)
 {
    try
    {
-        compute_shape(op, args);
+        shape new_shape = ins->get_operator().compute_shape(inputs);
+        // If the output shape is a standard shape, no need to try its output
+        if(new_shape.standard())
+        {
+            return true;
+        }
+
+        // if no changes for the shape, the contiguous can also be removed
+        if(new_shape == ins->get_shape())
+        {
+            return true;
+        }
+
+        auto outputs = ins->outputs();
+        // If the current instruction has no output, it means it is the last
+        // instruction and generates a non-standard output shape, and the last
+        // output shape is different from the case with the contiguous operator
+        if(outputs.empty())
+        {
+            return false;
+        }
+
+        for(auto output : outputs)
+        {
+            auto args = output->inputs();
+            std::vector<shape> input_shapes(args.size());
+            std::transform(args.begin(), args.end(), input_shapes.begin(), [&](auto& arg) {
+                return (arg == ins) ? new_shape : arg->get_shape();
+            });
+
+            if(!try_compute_shape(output, input_shapes))
+            {
+                return false;
+            }
+        }
    }
    catch(...)
    {
        return false;
    }
+
    return true;
 }

+static bool try_compute_shape(instruction_ref ins, const std::vector<instruction_ref>& args)
+{
+    auto inputs = to_shapes(args);
+    return try_compute_shape(ins, inputs);
+}
+
 void eliminate_contiguous::apply(program& p) const
 {
    for(auto ins : iterator_for(p))
@@ -44,7 +85,7 @@ void eliminate_contiguous::apply(program& p) const
                auto new_args = args;
                auto prev     = arg->inputs().front();
                replace(new_args, arg, prev);
-                if(try_compute_shape(ins->get_operator(), new_args))
+                if(try_compute_shape(ins, new_args))
                {
                    instruction::replace_argument(ins, arg, prev);
                }

--- a/src/include/migraphx/op/binary.hpp
+++ b/src/include/migraphx/op/binary.hpp
@@ -13,10 +13,16 @@ struct binary : op_name<Derived>
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs}.has(2).same_type().same_dims();
-        const auto& s = inputs.front();
-        if(s.scalar() and s.elements() == 1)
-            return {s.type()};
-        return {s.type(), s.lens()};
+        auto s0 = inputs.at(0);
+        auto s1 = inputs.at(1);
+        if(s0 == s1 and s0.packed())
+        {
+            return s0;
+        }
+        else
+        {
+            return {s0.type(), s0.lens()};
+        }
    }
    argument compute(const shape& output_shape, std::vector<argument> args) const
    {

--- a/src/include/migraphx/op/clip.hpp
+++ b/src/include/migraphx/op/clip.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_CLIP_HPP
+#define MIGRAPHX_GUARD_OPERATORS_CLIP_HPP
+
+#include <array>
+#include <migraphx/op/unary.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>
+#include <limits>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct clip : unary<clip>
+{
+    float max_val = std::numeric_limits<float>::max();
+    float min_val = std::numeric_limits<float>::min();
+
+    clip() {}
+
+    clip(float max, float min) : max_val(max), min_val(min) {}
+
+    auto apply() const
+    {
+        auto max = max_val;
+        auto min = min_val;
+        return [max, min](auto x) {
+            using type = decltype(x);
+            return std::min(std::max(type(min), x), type(max));
+        };
+    }
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.max_val, "max"), f(self.min_val, "min"));
+    }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/gather.hpp
+++ b/src/include/migraphx/op/gather.hpp
@@ -30,7 +30,7 @@ struct gather

    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).standard();
        auto lens = inputs[0].lens();
        int n_dim = static_cast<int>(lens.size());
        if(axis >= n_dim || axis < -n_dim)

--- a/src/include/migraphx/op/logsoftmax.hpp
+++ b/src/include/migraphx/op/logsoftmax.hpp
@@ -29,7 +29,7 @@ struct logsoftmax
    std::string name() const { return "logsoftmax"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs}.has(1);
+        check_shapes{inputs}.has(1).standard();
        if(axis < 0 || axis > inputs[0].lens().size())
        {
            MIGRAPHX_THROW("LogSoftMax: input axis value " + std::to_string(axis) +

--- a/src/include/migraphx/op/unary.hpp
+++ b/src/include/migraphx/op/unary.hpp
@@ -13,7 +13,15 @@ struct unary : op_name<Derived>
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs}.has(1);
-        return inputs.at(0);
+        auto s = inputs.at(0);
+        if(s.packed())
+        {
+            return s;
+        }
+        else
+        {
+            return {s.type(), s.lens()};
+        }
    }
    argument compute(const shape& output_shape, std::vector<argument> args) const
    {

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -11,6 +11,7 @@
 #include <migraphx/op/batch_norm.hpp>
 #include <migraphx/op/binary.hpp>
 #include <migraphx/op/broadcast.hpp>
+#include <migraphx/op/clip.hpp>
 #include <migraphx/op/common.hpp>
 #include <migraphx/op/concat.hpp>
 #include <migraphx/op/contiguous.hpp>

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -63,6 +63,7 @@ struct onnx_parser
        add_variadic_op("Max", op::max{});
        add_variadic_op("Min", op::min{});

+        add_mem_op("Clip", &onnx_parser::parse_clip);
        add_mem_op("LRN", &onnx_parser::parse_lrn);
        add_mem_op("ImageScaler", &onnx_parser::parse_imagescaler);
        add_mem_op("LeakyRelu", &onnx_parser::parse_leaky_relu);
@@ -225,6 +226,22 @@ struct onnx_parser
        });
    }

+    instruction_ref parse_clip(const std::string&,
+                               const attribute_map& attributes,
+                               std::vector<instruction_ref> args)
+    {
+        op::clip op;
+        if(contains(attributes, "max"))
+        {
+            op.max_val = parse_value(attributes.at("max")).at<float>();
+        }
+        if(contains(attributes, "min"))
+        {
+            op.min_val = parse_value(attributes.at("min")).at<float>();
+        }
+        return prog.add_instruction(op, std::move(args));
+    }
+
    instruction_ref
    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
    {

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -140,6 +140,21 @@ struct cpu_lrn
    }
 };

+struct clip_op
+{
+    op::clip op;
+    std::string name() const { return "cpu::clip"; }
+    auto fcn() const
+    {
+        auto max = op.max_val;
+        auto min = op.min_val;
+        return [max, min](auto x) {
+            using type = decltype(x);
+            return std::min(std::max(type(min), x), type(max));
+        };
+    }
+};
+
 struct cpu_convolution
 {
    op::convolution op;
@@ -591,13 +606,35 @@ struct cpu_unary
 {
    Op op;
    std::string name() const { return op.name(); }
-    shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        auto s = inputs.at(0);
+        if(s.packed())
+        {
+            return s;
+        }
+        else
+        {
+            return {s.type(), s.lens()};
+        }
+    }
+
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        result.visit([&](auto output) {
            args[0].visit([&](auto input) {
-                std::transform(input.begin(), input.end(), output.begin(), op.fcn());
+                if(input.get_shape().standard())
+                {
+                    std::transform(input.begin(), input.end(), output.begin(), op.fcn());
+                }
+                else
+                {
+                    shape_for_each(output.get_shape(), [&](const auto& idx) {
+                        output(idx.begin(), idx.end()) = op.fcn()(input(idx.begin(), idx.end()));
+                    });
+                }
            });
        });

@@ -771,12 +808,28 @@ struct cpu_binary
 {
    Op op;
    std::string name() const { return "cpu::" + op.name(); }
-    shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs}.has(2).same_type().same_dims();
+        auto s0 = inputs.at(0);
+        auto s1 = inputs.at(1);
+        if(s0 == s1 and s0.packed())
+        {
+            return s0;
+        }
+        else
+        {
+            return {s0.type(), s0.lens()};
+        }
+    }
+
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
-            if(input1.get_shape().packed() and input2.get_shape().packed())
+            auto s1 = input1.get_shape();
+            auto s2 = input2.get_shape();
+            if(s1 == s2 and s1.standard())
            {
                std::transform(
                    input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
@@ -789,6 +842,7 @@ struct cpu_binary
                });
            }
        });
+
        return result;
    }
 };
@@ -818,6 +872,7 @@ struct cpu_apply
        apply_map["batch_norm_inference"] =
            extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
        apply_map["lrn"]        = extend_op<cpu_lrn, op::lrn>();
+        apply_map["clip"]       = extend_op<cpu_unary<clip_op>, op::clip>();
        apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
        apply_map["pad"]        = extend_op<cpu_pad, op::pad>();
        apply_map["concat"]     = extend_op<cpu_concat, op::concat>();

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library(migraphx_device
    device/pad.cpp
    device/gather.cpp
    device/sub.cpp
+    device/clip.cpp
 )
 set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
 rocm_clang_tidy_check(migraphx_device)
@@ -66,6 +67,7 @@ add_library(migraphx_gpu
    lrn.cpp
    schedule_model.cpp
    adjust_allocation.cpp
+    clip.cpp
 )
 set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
 rocm_clang_tidy_check(migraphx_gpu)

--- a/src/targets/gpu/abs.cpp
+++ b/src/targets/gpu/abs.cpp
@@ -7,7 +7,7 @@ namespace gpu {

 shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
 {
-    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    check_shapes{inputs, *this}.has(2).packed();
    return inputs.at(0);
 }


--- a/src/targets/gpu/clip.cpp
+++ b/src/targets/gpu/clip.cpp
+#include <migraphx/gpu/clip.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device/clip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape hip_clip::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+
+argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
+{
+    device::clip(ctx.get_stream().get(), args.back(), args.front(), op.max_val, op.min_val);
+    return args.back();
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/clip.cpp
+++ b/src/targets/gpu/device/clip.cpp
+#include <migraphx/gpu/device/clip.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void clip(hipStream_t stream,
+          const argument& result,
+          const argument& arg1,
+          const float max,
+          const float min)
+{
+    nary(stream, result, arg1)(
+        [max, min](auto x) { return std::min<decltype(x)>(std::max<decltype(x)>(min, x), max); });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/clip.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/clip.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
+
+#include <migraphx/shape.hpp>
+#include <migraphx/op/clip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct hip_clip
+{
+    op::clip op;
+    std::string name() const { return "gpu::clip"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/clip.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/clip.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void clip(hipStream_t stream, const argument& result, const argument& arg1, float max, float min);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
@@ -45,7 +45,15 @@ struct unary_device : oper<Derived>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        return inputs.at(1);
+        auto s = inputs.at(0);
+        if(s.packed())
+        {
+            return s;
+        }
+        else
+        {
+            return {s.type(), s.lens()};
+        }
    }

    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
@@ -66,7 +74,16 @@ struct binary_device : oper<Derived>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(3);
-        return inputs.at(2);
+        auto s0 = inputs.at(0);
+        auto s1 = inputs.at(1);
+        if(s0 == s1 and s0.packed())
+        {
+            return s0;
+        }
+        else
+        {
+            return {s0.type(), s0.lens()};
+        }
    }

    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -45,6 +45,7 @@
 #include <migraphx/gpu/pad.hpp>
 #include <migraphx/gpu/gather.hpp>
 #include <migraphx/gpu/lrn.hpp>
+#include <migraphx/gpu/clip.hpp>
 #include <utility>
 #include <functional>
 #include <algorithm>
@@ -101,6 +102,7 @@ struct miopen_apply
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");
+        add_extend_op<hip_clip, op::clip>("clip");

        add_lrn_op();
        add_convolution_op();

--- a/src/targets/gpu/tanh.cpp
+++ b/src/targets/gpu/tanh.cpp
@@ -7,7 +7,7 @@ namespace gpu {

 shape miopen_tanh::compute_shape(const std::vector<shape>& inputs) const
 {
-    check_shapes{inputs, *this}.has(2).not_broadcasted();
+    check_shapes{inputs, *this}.has(2).packed();
    return inputs.at(0);
 }


--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1557,4 +1557,21 @@ TEST_CASE(fp16_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+TEST_CASE(clip_test)
+{
+    migraphx::program p;
+    migraphx::shape s{migraphx::shape::float_type, {3}};
+    auto l = p.add_literal(migraphx::literal{s, {-1.0, 0.0, 10.0}});
+    migraphx::op::clip op;
+    op.max_val = 6.0;
+    op.min_val = 0.0;
+    p.add_instruction(op, l);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(3);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    std::vector<float> gold = {0.0, 0.0, 6.0};
+    EXPECT(migraphx::verify_range(results_vector, gold));
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/eliminate_contiguous_test.cpp
+++ b/test/eliminate_contiguous_test.cpp
 #include <migraphx/eliminate_contiguous.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/op/identity.hpp>
+#include <migraphx/op/dot.hpp>
+#include <migraphx/op/sin.hpp>
+#include <migraphx/op/slice.hpp>
 #include <migraphx/op/transpose.hpp>
 #include <migraphx/op/contiguous.hpp>
 #include <basic_ops.hpp>
@@ -36,7 +40,46 @@ TEST_CASE(non_standard_op)
    p.add_instruction(pass_op{}, c);
    auto count = std::distance(p.begin(), p.end());
    p.compile(eliminate_contiguous_target{});
+    EXPECT(std::distance(p.begin(), p.end()) == count);
+}
+
+TEST_CASE(transpose_gemm)
+{
+    migraphx::program p;
+    auto l  = p.add_literal(get_2x2());
+    auto t  = p.add_instruction(migraphx::op::transpose{{1, 0}}, l);
+    auto c  = p.add_instruction(migraphx::op::contiguous{}, t);
+    auto ic = p.add_instruction(migraphx::op::identity{}, c);
+    p.add_instruction(migraphx::op::dot{}, ic, l);
+    auto count = std::distance(p.begin(), p.end());
+    p.compile(eliminate_contiguous_target{});
    EXPECT(std::distance(p.begin(), p.end()) == (count - 1));
 }

+TEST_CASE(transpose_standard_op)
+{
+    migraphx::program p;
+    auto l  = p.add_literal(get_2x2());
+    auto t  = p.add_instruction(migraphx::op::transpose{{1, 0}}, l);
+    auto c  = p.add_instruction(migraphx::op::contiguous{}, t);
+    auto sn = p.add_instruction(migraphx::op::sin{}, c);
+    p.add_instruction(pass_standard_op{}, sn);
+    auto count = std::distance(p.begin(), p.end());
+    p.compile(eliminate_contiguous_target{});
+    EXPECT(std::distance(p.begin(), p.end()) == count);
+}
+
+TEST_CASE(no_packed_unary_op)
+{
+    migraphx::program p;
+    auto l  = p.add_literal(get_2x2());
+    auto t  = p.add_instruction(migraphx::op::slice{{1}, {1}, {2}}, l);
+    auto c  = p.add_instruction(migraphx::op::contiguous{}, t);
+    auto sn = p.add_instruction(migraphx::op::sin{}, c);
+    p.add_instruction(pass_standard_op{}, sn);
+    auto count = std::distance(p.begin(), p.end());
+    p.compile(eliminate_contiguous_target{});
+    EXPECT(std::distance(p.begin(), p.end()) == count - 1);
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }