Merge from develop

087c205e · Paul · a3a9e469 · e15b8333 · 087c205e · 087c205e
Commit 087c205e authored Mar 04, 2019 by Paul
20 changed files
--- a/src/include/migraphx/functional.hpp
+++ b/src/include/migraphx/functional.hpp
@@ -94,6 +94,12 @@ constexpr void each_args(F)
 {
 }

+template <class F, class T>
+auto unpack(F f, T& x)
+{
+    return sequence_c<std::tuple_size<T>{}>([&](auto... is) { f(std::get<is>(x)...); });
+}
+
 /// Implements a fix-point combinator
 template <class R, class F>
 detail::fix_f<R, F> fix(F f)

--- a/src/include/migraphx/instruction.hpp
+++ b/src/include/migraphx/instruction.hpp
@@ -14,6 +14,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 shape compute_shape(const operation& op, const std::vector<instruction_ref>& args);
+std::vector<shape> to_shapes(const std::vector<instruction_ref>& args);

 struct instruction
 {
@@ -71,7 +72,11 @@ struct instruction
    static void
    replace(instruction_ref ins, operation o, const shape& r, std::vector<instruction_ref> args);

-    static instruction_ref get_output_alias(instruction_ref ins);
+    argument eval() const;
+
+    void finalize(context& ctx);
+
+    static instruction_ref get_output_alias(instruction_ref ins, bool shallow = false);

    private:
    // internal

--- a/src/include/migraphx/iterator_for.hpp
+++ b/src/include/migraphx/iterator_for.hpp
@@ -17,9 +17,9 @@ struct iterator_for_range
    struct iterator
    {
        base_iterator i;
-        base_iterator operator*() { return i; }
+        base_iterator operator*() const { return i; }
        base_iterator operator++() { return ++i; }
-        bool operator!=(const iterator& rhs) { return i != rhs.i; }
+        bool operator!=(const iterator& rhs) const { return i != rhs.i; }
    };

    iterator begin()

--- a/src/include/migraphx/literal.hpp
+++ b/src/include/migraphx/literal.hpp
@@ -22,8 +22,8 @@ struct literal : raw_data<literal>
 {
    literal() {}

-    template <class U, class T = deduce<U>>
-    literal(U x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(shape::get_type<T>{})
+    template <class U, class T = deduce<U>, shape::type_t ShapeType = shape::get_type<T>{}>
+    literal(U x) : buffer(make_shared_array<char>(sizeof(T))), m_shape(ShapeType)
    {
        static_assert(std::is_trivially_copyable<T>{}, "Literals can only be trivial types");
        *(reinterpret_cast<T*>(buffer.get())) = x;

--- a/src/include/migraphx/make_shared_array.hpp
+++ b/src/include/migraphx/make_shared_array.hpp
@@ -10,7 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 template <typename T>
 std::shared_ptr<T> make_shared_array(size_t size)
 {
-    return std::shared_ptr<T>(new T[size], std::default_delete<T[]>());
+    return std::shared_ptr<T>(new T[size], std::default_delete<T[]>()); // NOLINT
 }

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -214,7 +214,6 @@ void find_matches(program& p, Ms&&... ms)
        bool match = false;
        each_args(
            [&](auto&& m) {
-                // cppcheck-suppress knownConditionTrueFalse
                if(match)
                    return;
                auto r = match_instruction(p, ins, m.matcher());

--- a/src/include/migraphx/onnx.hpp
+++ b/src/include/migraphx/onnx.hpp
@@ -7,6 +7,24 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+struct unknown
+{
+    std::string op;
+    std::string name() const { return "unknown:" + op; }
+    shape compute_shape(std::vector<shape> input) const
+    {
+        if(input.empty())
+            return {};
+        else
+            return input.front();
+    }
+    friend std::ostream& operator<<(std::ostream& os, const unknown& x)
+    {
+        os << x.name();
+        return os;
+    }
+};
+
 /// Create a program from an onnx file
 program parse_onnx(const std::string& name);


--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -7,17 +7,17 @@
 #include <memory>
 #include <type_traits>
 #include <utility>
-#include <migraphx/shape.hpp>
 #include <migraphx/reflect.hpp>
 #include <migraphx/streamutils.hpp>
 #include <migraphx/argument.hpp>
-#include <migraphx/context.hpp>
 #include <migraphx/auto_any_cast.hpp>
 #include <migraphx/config.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+struct context;
+
 #ifdef DOXYGEN

 /// The operation interface represents an action an instruction will perform. All
@@ -26,6 +26,8 @@ struct operation
 {
    /// A unique name identifying the operation
    std::string name() const;
+    /// An optional method that can be used to finalize the operator before running
+    void finalize(context& ctx);
    /// This is used to compute the resulting shape from an operation. If an
    /// operation cannot be run with input shapes, then it should throw an
    /// exception.
@@ -53,6 +55,11 @@ struct operation
    friend std::ostream& operator<<(std::ostream& os, const operation& op);
 };

+/// Returns true if operation does not require a context to run compute
+bool is_context_free(const operation& x);
+/// Returns true if the operation has a finalize method
+bool has_finalize(const operation& x);
+
 #else

 namespace operation_stream {
@@ -89,7 +96,7 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
 } // namespace operation_equal

 template <class T>
-auto compute_op(rank<1>,
+auto compute_op(rank<2>,
                const T& x,
                context& ctx,
                const shape& output_shape,
@@ -99,6 +106,14 @@ auto compute_op(rank<1>,
    return x.compute(auto_any_cast(ctx), output_shape, input);
 }

+template <class T>
+auto compute_op(
+    rank<1>, const T& x, context&, const shape& output_shape, const std::vector<argument>& input)
+    -> decltype(x.compute(output_shape, input))
+{
+    return x.compute(output_shape, input);
+}
+
 template <class T>
 argument compute_op(rank<0>, const T& x, context&, const shape&, const std::vector<argument>&)
 {
@@ -110,7 +125,53 @@ template <class T>
 argument
 compute_op(const T& x, context& ctx, const shape& output_shape, const std::vector<argument>& input)
 {
-    return compute_op(rank<1>{}, x, ctx, output_shape, input);
+    return compute_op(rank<2>{}, x, ctx, output_shape, input);
+}
+
+template <class T>
+auto compute_op(rank<2>, const T& x, const shape& output_shape, const std::vector<argument>& input)
+    -> decltype(x.compute(output_shape, input))
+{
+    return x.compute(output_shape, input);
+}
+
+template <class T>
+auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
+    -> decltype(x.compute(auto_any_cast(std::declval<context&>()), output_shape, input))
+{
+    std::string name = x.name();
+    MIGRAPHX_THROW("Not computable without a context: " + name);
+}
+
+template <class T>
+argument compute_op(rank<0>, const T& x, const shape&, const std::vector<argument>&)
+{
+    std::string name = x.name();
+    MIGRAPHX_THROW("Not computable: " + name);
+}
+
+template <class T>
+argument compute_op(const T& x, const shape& output_shape, const std::vector<argument>& input)
+{
+    return compute_op(rank<2>{}, x, output_shape, input);
+}
+
+template <class T>
+auto is_context_free_op(rank<1>,
+                        const T& x,
+                        const shape& output_shape,
+                        const std::vector<argument>& input)
+    -> decltype(x.compute(output_shape, input), std::true_type{});
+
+template <class T>
+auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)
+    -> std::false_type;
+
+template <class T>
+auto is_context_free_op(const T& x) -> decltype(is_context_free_op(
+    rank<1>{}, x, std::declval<const shape&>(), std::declval<std::vector<argument>>()))
+{
+    return {};
 }

 template <class T>
@@ -132,15 +193,57 @@ int output_alias_op(const T& x, const std::vector<shape>& shapes)
    return output_alias_op(rank<1>{}, x, shapes);
 }

+template <class T>
+auto finalize_op(
+    rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+    -> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), void())
+{
+    x.finalize(auto_any_cast(ctx), output_shape, input);
+}
+
+template <class T>
+void finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
+{
+}
+
+template <class T>
+void finalize_op(T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+{
+    finalize_op(rank<1>{}, x, ctx, output_shape, input);
+}
+
+template <class T>
+auto has_finalize_op(
+    rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+    -> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), std::true_type{});
+
+template <class T>
+auto has_finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
+    -> std::false_type;
+
+template <class T>
+auto has_finalize_op(const T&) -> decltype(has_finalize_op(rank<1>{},
+                                                           std::declval<T&>(),
+                                                           std::declval<context&>(),
+                                                           std::declval<const shape&>(),
+                                                           std::declval<std::vector<shape>>()))
+{
+    return {};
+}
+
 /*
 * Type-erased interface for:
 *
 * struct operation
 * {
 *      std::string name() const;
+ *      bool is_context_free() const;
+ *      bool has_finalize() const;
 *      int output_alias(const std::vector<shape>& input) const;
+ *      void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
 *      shape compute_shape(const std::vector<shape>& input) const;
 *      argument compute(context& ctx,const shape& output,const std::vector<argument>& input) const;
+ *      argument compute(const shape& output,const std::vector<argument>& input) const;
 *     friend std::ostream & operator<<(std::ostream & os,const operation & op) ;
 *     friend bool operator==(const operation & x,const operation & y) ;
 * };
@@ -210,12 +313,30 @@ struct operation
        return (*this).private_detail_te_get_handle().name();
    }

+    bool is_context_free() const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().is_context_free();
+    }
+
+    bool has_finalize() const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().has_finalize();
+    }
+
    int output_alias(const std::vector<shape>& input) const
    {
        assert((*this).private_detail_te_handle_mem_var);
        return (*this).private_detail_te_get_handle().output_alias(input);
    }

+    void finalize(context& ctx, const shape& output, const std::vector<shape>& input)
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        (*this).private_detail_te_get_handle().finalize(ctx, output, input);
+    }
+
    shape compute_shape(const std::vector<shape>& input) const
    {
        assert((*this).private_detail_te_handle_mem_var);
@@ -228,6 +349,12 @@ struct operation
        return (*this).private_detail_te_get_handle().compute(ctx, output, input);
    }

+    argument compute(const shape& output, const std::vector<argument>& input) const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().compute(output, input);
+    }
+
    friend std::ostream& operator<<(std::ostream& os, const operation& op)
    {
        assert(op.private_detail_te_handle_mem_var);
@@ -240,6 +367,12 @@ struct operation
        return x.private_detail_te_get_handle().operator==(y);
    }

+    friend bool is_shared(const operation& private_detail_x, const operation& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
+    }
+
    private:
    struct private_detail_te_handle_base_type
    {
@@ -247,13 +380,18 @@ struct operation
        virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
        virtual const std::type_info& type() const                                = 0;

-        virtual std::string name() const                                   = 0;
-        virtual int output_alias(const std::vector<shape>& input) const    = 0;
-        virtual shape compute_shape(const std::vector<shape>& input) const = 0;
+        virtual std::string name() const                                = 0;
+        virtual bool is_context_free() const                            = 0;
+        virtual bool has_finalize() const                               = 0;
+        virtual int output_alias(const std::vector<shape>& input) const = 0;
+        virtual void
+        finalize(context& ctx, const shape& output, const std::vector<shape>& input) = 0;
+        virtual shape compute_shape(const std::vector<shape>& input) const           = 0;
        virtual argument
-        compute(context& ctx, const shape& output, const std::vector<argument>& input) const = 0;
-        virtual std::ostream& operator_shift_left(std::ostream& os) const                    = 0;
-        virtual bool operator==(const operation& y) const                                    = 0;
+        compute(context& ctx, const shape& output, const std::vector<argument>& input) const    = 0;
+        virtual argument compute(const shape& output, const std::vector<argument>& input) const = 0;
+        virtual std::ostream& operator_shift_left(std::ostream& os) const                       = 0;
+        virtual bool operator==(const operation& y) const                                       = 0;
    };

    template <typename PrivateDetailTypeErasedT>
@@ -286,12 +424,26 @@ struct operation

        std::string name() const override { return private_detail_te_value.name(); }

+        bool is_context_free() const override
+        {
+
+            return is_context_free_op(private_detail_te_value);
+        }
+
+        bool has_finalize() const override { return has_finalize_op(private_detail_te_value); }
+
        int output_alias(const std::vector<shape>& input) const override
        {

            return output_alias_op(private_detail_te_value, input);
        }

+        void finalize(context& ctx, const shape& output, const std::vector<shape>& input) override
+        {
+
+            finalize_op(private_detail_te_value, ctx, output, input);
+        }
+
        shape compute_shape(const std::vector<shape>& input) const override
        {

@@ -306,6 +458,12 @@ struct operation
            return compute_op(private_detail_te_value, ctx, output, input);
        }

+        argument compute(const shape& output, const std::vector<argument>& input) const override
+        {
+
+            return compute_op(private_detail_te_value, output, input);
+        }
+
        std::ostream& operator_shift_left(std::ostream& os) const override
        {
            using migraphx::operation_stream::operator<<;
@@ -385,6 +543,22 @@ inline const ValueType& any_cast(const operation& x)

 inline bool operator!=(const operation& x, const operation& y) { return !(x == y); }

+inline bool is_context_free(const operation& op) { return op.is_context_free(); }
+
+template <class T>
+bool is_context_free(const T& x)
+{
+    return is_context_free_op(x);
+}
+
+inline bool has_finalize(const operation& op) { return op.has_finalize(); }
+
+template <class T>
+bool has_finalize(const T& x)
+{
+    return has_finalize_op(x);
+}
+
 #endif

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -6,6 +6,8 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
 #include <migraphx/config.hpp>
 #include <cmath>
 #include <utility>
@@ -14,9 +16,16 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+enum padding_mode_t
+{
+    default_, // NOLINT
+    same,
+    valid
+};
+
 struct not_computable
 {
-    argument compute(context&, const shape&, const std::vector<argument>&) const
+    argument compute(const shape&, const std::vector<argument>&) const
    {
        MIGRAPHX_THROW("not computable");
    }
@@ -51,18 +60,38 @@ struct batch_norm_inference
    }
 };

+struct lrn
+{
+    float alpha = 0.0001;
+    float beta  = 0.75;
+    float bias  = 1.0;
+    int size    = 1;
+    std::string name() const { return "lrn"; }
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.alpha, "alpha"),
+                    f(self.beta, "beta"),
+                    f(self.bias, "bias"),
+                    f(self.size, "size"));
+    }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        return inputs.front();
+    }
+};
+
 struct convolution
 {
    std::array<std::size_t, 2> padding  = {{0, 0}};
    std::array<std::size_t, 2> stride   = {{1, 1}};
    std::array<std::size_t, 2> dilation = {{1, 1}};
-    enum padding_mode_t
-    {
-        default_, // NOLINT
-        same,
-        valid
-    };
+
    padding_mode_t padding_mode = default_;
+    int group                   = 1;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -70,7 +99,8 @@ struct convolution
        return pack(f(self.padding, "padding"),
                    f(self.stride, "stride"),
                    f(self.dilation, "dilation"),
-                    f(self.padding_mode, "padding_mode"));
+                    f(self.padding_mode, "padding_mode"),
+                    f(self.group, "group"));
    }

    std::string name() const { return "convolution"; }
@@ -134,12 +164,7 @@ struct im2col
    std::array<std::size_t, 2> padding  = {{0, 0}};
    std::array<std::size_t, 2> stride   = {{1, 1}};
    std::array<std::size_t, 2> dilation = {{1, 1}};
-    enum padding_mode_t
-    {
-        default_, // NOLINT
-        same,
-        valid
-    };
+
    padding_mode_t padding_mode = default_;

    template <class Self, class F>
@@ -185,12 +210,14 @@ struct pooling
    std::array<std::size_t, 2> padding = {{0, 0}};
    std::array<std::size_t, 2> stride  = {{1, 1}};
    std::array<std::size_t, 2> lengths = {{1, 1}};
+    padding_mode_t padding_mode        = default_;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return pack(f(self.mode, "mode"),
                    f(self.padding, "padding"),
+                    f(self.padding, "padding_mode"),
                    f(self.stride, "stride"),
                    f(self.lengths, "lengths"));
    }
@@ -207,7 +234,10 @@ struct pooling
        assert(lengths[0] <= (input.lens()[2] + 2 * padding[0]));
        assert(lengths[1] <= (input.lens()[3] + 2 * padding[1]));

-        return {t,
+        if(padding_mode == default_)
+        {
+            return {
+                t,
                {
                    input.lens()[0],
                    input.lens()[1],
@@ -222,6 +252,39 @@ struct pooling
                                                  static_cast<float>(stride[1]))) +
                            1)),
                }};
+        }
+        else if(padding_mode == same)
+        {
+            return {t,
+                    {input.lens()[0],
+                     input.lens()[1],
+                     static_cast<std::size_t>(
+                         std::ceil(static_cast<double>(input.lens()[2]) / stride[0])),
+                     static_cast<std::size_t>(
+                         std::ceil(static_cast<double>(input.lens()[3]) / stride[1]))}};
+        }
+        else if(padding_mode == valid)
+        {
+            return {t,
+                    {
+                        input.lens()[0],
+                        input.lens()[1],
+                        std::size_t(std::max<std::ptrdiff_t>(
+                            1,
+                            std::ptrdiff_t(std::floor((input.lens()[2] - lengths[0]) /
+                                                      static_cast<float>(stride[0]))) +
+                                1)),
+                        std::size_t(std::max<std::ptrdiff_t>(
+                            1,
+                            std::ptrdiff_t(std::floor((input.lens()[3] - lengths[1]) /
+                                                      static_cast<float>(stride[1]))) +
+                                1)),
+                    }};
+        }
+        else
+        {
+            MIGRAPHX_THROW("Invalid padding mode");
+        }
    }
 };

@@ -234,10 +297,28 @@ struct leaky_relu
        check_shapes{inputs, *this}.has(1);
        return inputs.front();
    }
-    friend std::ostream& operator<<(std::ostream& os, const leaky_relu& op)
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.alpha, "alpha"));
+    }
+};
+
+struct elu
+{
+    std::string name() const { return "elu"; }
+    float alpha;
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        return inputs.front();
+    }
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
    {
-        os << op.name() << ":" << op.alpha;
-        return os;
+        return pack(f(self.alpha, "alpha"));
    }
 };

@@ -271,14 +352,14 @@ struct transpose
        }
        std::vector<size_t> output_lens(input_lens.size());
        std::vector<size_t> output_strides(input_lens.size());
-        for(int i = 0; i < output_lens.size(); i++)
+        for(std::size_t i = 0; i < output_lens.size(); i++)
        {
            output_lens[i]    = input_lens[dims[i]];
            output_strides[i] = input_strides[dims[i]];
        }
        return {t, output_lens, output_strides};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
@@ -301,6 +382,17 @@ struct contiguous
        auto t    = inputs.at(0).type();
        return {t, lens};
    }
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        assert(output_shape.standard());
+        argument result{output_shape};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            shape_for_each(output.get_shape(), [&](const auto& idx) {
+                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());
+            });
+        });
+        return result;
+    }
 };

 struct concat
@@ -308,7 +400,7 @@ struct concat
    std::size_t axis = 0;
    std::string name() const { return "concat"; }
    std::vector<std::size_t> compute_offsets(const shape& output_shape,
-                                             const std::vector<argument> args) const
+                                             const std::vector<argument>& args) const
    {
        std::vector<std::size_t> offsets;
        std::vector<std::size_t> offset(args[0].get_shape().lens().size(), 0);
@@ -352,7 +444,27 @@ struct concat
        new_lens[axis] = new_dim_axis;
        return {type, new_lens};
    }
-    int output_alias(const std::vector<shape>&) const { return 0; }
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        std::vector<std::size_t> coffsets = compute_offsets(output_shape, args);
+        for(std::size_t l = 0; l < args.size(); l++)
+        {
+            auto argl             = args[l];
+            std::size_t nelements = argl.get_shape().elements();
+            visit_all(result, argl)([&](auto output, auto input) {
+                auto slice_shape =
+                    shape{output_shape.type(), input.get_shape().lens(), output_shape.strides()};
+                auto slice = make_view(slice_shape, output.data() + coffsets[l]);
+                // cppcheck-suppress useStlAlgorithm
+                for(std::size_t i = 0; i < nelements; i++)
+                {
+                    slice[i] = input[i];
+                }
+            });
+        }
+        return result;
+    }
 };

 struct slice
@@ -419,7 +531,7 @@ struct slice
        }
        return shape{t, new_lens, old_strides};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        auto input  = args[0];
        auto offset = compute_offset(input.get_shape()) * output_shape.type_size();
@@ -469,7 +581,7 @@ struct squeeze
        }
        return shape{type, new_lens};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
@@ -508,7 +620,7 @@ struct unsqueeze
        }
        return shape{type, new_lens};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
@@ -538,11 +650,16 @@ struct reshape
        {
            if(dims[i] == 0)
                rdims[i] = idims[i];
+
+            // since rdims using size_t type, -1 is the max value
+            // is size_t that cause later compuation incorrect
+            if(dims[i] == -1)
+                rdims[i] = 1;
        }
        if(n_neg_dims > 0)
        {
            size_t missing_dim =
-                -inputs.front().elements() /
+                inputs.front().elements() /
                std::accumulate(rdims.begin(), rdims.end(), 1, std::multiplies<int64_t>());
            for(std::size_t i = 0; i < rdims.size(); i++)
            {
@@ -550,23 +667,146 @@ struct reshape
                    rdims[i] = missing_dim;
            }
        }
-        if(dims.back() == -1)
-        {
-            rdims.pop_back();
-            std::copy(idims.begin() + rdims.size(), idims.end(), std::back_inserter(rdims));
-        }
+
        shape s{inputs.front().type(), rdims};
        if(s.elements() != inputs.front().elements())
            MIGRAPHX_THROW("Wrong number of elements for reshape");
        return s;
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        return {std::move(output_shape), std::move(args.front().data)};
+    }
+    int output_alias(const std::vector<shape>&) const { return 0; }
+};
+
+struct pad
+{
+    std::vector<int64_t> pads;
+    float value = 0.0f;
+    enum pad_op_mode_t
+    {
+        constant_pad,
+        reflect_pad,
+        edge_pad
+    };
+    pad_op_mode_t mode = constant_pad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.mode, "mode"), f(self.pads, "pads"), f(self.value, "value"));
+    }
+
+    std::string name() const { return "pad"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto&& idims = inputs.front().lens();
+        std::vector<std::size_t> rdims(idims.begin(), idims.end());
+        std::size_t num_dims = rdims.size();
+
+        for(std::size_t i = 0; i < num_dims; i++)
+        {
+            rdims[i] += pads[i] + pads[i + num_dims];
+        }
+
+        shape s{inputs.front().type(), rdims};
+        return s;
+    }
+};
+
+struct as_shape
+{
+    shape s;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.s, "shape"));
+    }
+
+    std::string name() const { return "as_shape"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(1).standard();
+        assert(inputs.front().elements() == s.elements());
+        return s;
+    }
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
    int output_alias(const std::vector<shape>&) const { return 0; }
 };

+struct gather
+{
+    int axis = 0;
+    std::string name() const { return "gather"; }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(2);
+        auto lens = inputs[0].lens();
+        int n_dim = static_cast<int>(lens.size());
+        if(axis >= n_dim || axis < -n_dim)
+        {
+            MIGRAPHX_THROW("Gather: axis is out of range.");
+        }
+
+        // negative axis means counting dimensions from back
+        int axis_index = (axis < 0) ? (n_dim + axis) : axis;
+
+        auto type = inputs[0].type();
+        lens.erase(lens.begin() + axis_index);
+        if(!inputs[1].scalar())
+        {
+            auto ind_lens = inputs[1].lens();
+            lens.insert(lens.begin() + axis_index, ind_lens.begin(), ind_lens.end());
+        }
+
+        // for scalar output
+        if(lens.empty())
+        {
+            return {type};
+        }
+
+        return {type, lens};
+    }
+
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        // negative axis means counting dimensions from back
+        int axis_index =
+            (axis < 0) ? static_cast<int>(args[0].get_shape().lens().size() + axis) : axis;
+
+        // max dimension in axis
+        visit_all(result, args[0])([&](auto output, auto data) {
+            args[1].visit([&](auto indices) {
+                if(output_shape.scalar())
+                {
+                    output[0] = data[indices.front()];
+                }
+                else
+                {
+                    auto out_lens        = data.get_shape().lens();
+                    out_lens[axis_index] = indices.get_shape().elements();
+                    migraphx::shape out_comp_shape{data.get_shape().type(), out_lens};
+                    shape_for_each(out_comp_shape, [&](const auto& out_idx) {
+                        auto data_idx        = out_idx;
+                        data_idx[axis_index] = indices[data_idx[axis_index]];
+                        output[out_comp_shape.index(out_idx.begin(), out_idx.end())] =
+                            data(data_idx.begin(), data_idx.end());
+                    });
+                }
+            });
+        });
+
+        return result;
+    }
+};
+
 struct dot
 {
    float alpha = 1.0;
@@ -606,7 +846,7 @@ struct identity
 {
    std::string name() const { return "identity"; }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.at(0); }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.at(0).data)};
    }
@@ -623,6 +863,11 @@ struct exp : unary
    std::string name() const { return "exp"; }
 };

+struct log : unary
+{
+    std::string name() const { return "log"; }
+};
+
 struct sin : unary
 {
    std::string name() const { return "sin"; }
@@ -653,6 +898,16 @@ struct atan : unary
    std::string name() const { return "atan"; }
 };

+struct sinh : unary
+{
+    std::string name() const { return "sinh"; }
+};
+
+struct cosh : unary
+{
+    std::string name() const { return "cosh"; }
+};
+
 struct tanh : unary
 {
    std::string name() const { return "tanh"; }
@@ -709,7 +964,7 @@ struct flatten
            std::accumulate(lens.begin() + axis, lens.end(), std::size_t{1}, std::multiplies<>{});
        return {inputs.at(0).type(), {x, y}};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
@@ -761,7 +1016,7 @@ struct broadcast
            return {t, broadcast_shape.lens(), std::move(bcast_strides)};
        }
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.at(0).data)};
    }
@@ -803,7 +1058,7 @@ struct multibroadcast
        }
        return {t, output_lens, bcast_strides};
    }
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.at(0).data)};
    }
@@ -819,13 +1074,12 @@ struct scalar
    shape compute_shape(std::vector<shape> inputs) const
    {
        assert(check_shapes{inputs}.has(1).only_dims(1).size() == 1);
-        auto t     = inputs.at(0).type();
-        auto input = inputs.at(0);
+        auto t = inputs.at(0).type();
        std::vector<std::size_t> strides(scalar_bcast.lens().size(), 0);
        return {t, scalar_bcast.lens(), strides};
    }

-    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    argument compute(shape output_shape, std::vector<argument> args) const
    {
        return {std::move(output_shape), std::move(args.at(0).data)};
    }
@@ -863,6 +1117,16 @@ struct div : binary
    std::string name() const { return "div"; }
 };

+struct max : binary
+{
+    std::string name() const { return "max"; }
+};
+
+struct min : binary
+{
+    std::string name() const { return "min"; }
+};
+
 struct load
 {
    shape s;
@@ -880,7 +1144,7 @@ struct load
        check_shapes{inputs}.has(1);
        return s;
    }
-    argument compute(context&, const shape&, const std::vector<argument>& args) const
+    argument compute(const shape&, const std::vector<argument>& args) const
    {
        return {s, args[0].data() + offset};
    }
@@ -903,12 +1167,167 @@ struct outline
        check_shapes{inputs, *this}.has(0);
        return s;
    }
-    argument compute(context&, const shape&, const std::vector<argument>&) const
+    argument compute(const shape&, const std::vector<argument>&) const { return {s, nullptr}; }
+};
+
+// indicate rnn computation direction
+enum class rnn_direction
+{
+    forward,
+    reverse,
+    bidirectional,
+};
+
+struct rnn
+{
+    std::size_t hidden_size = 1;
+    std::vector<operation> actv_funcs{tanh{}, tanh{}};
+    rnn_direction direction = rnn_direction::forward;
+    float clip              = 0.0f;
+
+    std::string name() const { return "rnn"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        auto in_dims     = inputs[0].lens();
+        auto hidden_dims = inputs[2].lens();
+        if(hidden_size != hidden_dims[2])
+        {
+            MIGRAPHX_THROW("RNN: hidden size mismatch in attribute and input");
+        }
+
+        std::size_t num_directions = 1;
+        if(direction == rnn_direction::bidirectional)
+        {
+            num_directions = 2;
+        }
+
+        if(num_directions != hidden_dims[0])
+        {
+            MIGRAPHX_THROW("RNN: num_direction mismatch in attribute and input");
+        }
+
+        std::vector<std::size_t> out_dims(in_dims);
+        out_dims.insert(out_dims.begin() + 1, num_directions);
+        out_dims.back() = hidden_size;
+
+        return {inputs[0].type(), out_dims};
+    }
+};
+
+struct rnn_last_output
+{
+    std::string name() const { return "rnn_last_output"; }
+    shape compute_shape(std::vector<shape> inputs) const
    {
-        return {s, nullptr};
+        check_shapes{inputs, *this}.has(1);
+        auto dims = inputs[0].lens();
+
+        // remove the first dimension, remaing are output shape
+        dims.erase(dims.begin());
+        return {inputs[0].type(), dims};
    }
 };

+struct gru
+{
+    std::size_t hidden_size = 1;
+    std::vector<operation> actv_funcs{sigmoid{}, tanh{}};
+    rnn_direction direction = rnn_direction::forward;
+    float clip              = 0.0f;
+    int linear_before_reset = 0;
+
+    std::string name() const { return "gru"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        auto in_dims     = inputs[0].lens();
+        auto hidden_dims = inputs[2].lens();
+        if(hidden_size != hidden_dims[2])
+        {
+            MIGRAPHX_THROW("GRU: hidden size mismatch in attribute and input");
+        }
+
+        std::size_t num_directions = 1;
+        if(direction == rnn_direction::bidirectional)
+        {
+            num_directions = 2;
+        }
+
+        if(num_directions != hidden_dims[0])
+        {
+            MIGRAPHX_THROW("GRU: num_direction does not match the direction attribute");
+        }
+
+        std::vector<std::size_t> out_dims(in_dims);
+        out_dims.insert(out_dims.begin() + 1, num_directions);
+        out_dims.back() = hidden_size;
+
+        return {inputs[0].type(), out_dims};
+    }
+};
+
+struct lstm
+{
+    std::size_t hidden_size = 1;
+    std::vector<operation> actv_funcs{sigmoid{}, tanh{}, tanh{}};
+    rnn_direction direction = rnn_direction::forward;
+    float clip              = 0.0f;
+    int input_forget        = 0;
+
+    std::string name() const { return "lstm"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        auto in_dims     = inputs[0].lens();
+        auto hidden_dims = inputs[2].lens();
+        if(hidden_size != hidden_dims[2])
+        {
+            MIGRAPHX_THROW("LSTM: hidden size mismatch in attribute and input");
+        }
+
+        std::size_t num_directions = 1;
+        if(direction == rnn_direction::bidirectional)
+        {
+            num_directions = 2;
+        }
+
+        if(num_directions != hidden_dims[0])
+        {
+            MIGRAPHX_THROW("LSTM: num_direction does not match the direction attribute");
+        }
+
+        std::vector<std::size_t> out_dims(in_dims);
+        out_dims.insert(out_dims.begin() + 1, num_directions);
+        out_dims.back() = hidden_size;
+
+        return {inputs[0].type(), out_dims};
+    }
+};
+
+struct lstm_last_cell_output
+{
+    std::string name() const { return "lstm_last_cell_output"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto dims = inputs[0].lens();
+
+        // remove the first dimension, remaing are output shape
+        dims.erase(dims.begin());
+        return {inputs[0].type(), dims};
+    }
+};
+
+struct undefined
+{
+    std::string name() const { return "undefined"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(0);
+        return {};
+    }
+
+    argument compute(const shape&, const std::vector<argument>&) const { return {{}, nullptr}; }
+};
+
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/par_dfor.hpp
+++ b/src/include/migraphx/par_dfor.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
+
+#include <migraphx/par_for.hpp>
+#include <migraphx/functional.hpp>
+#include <array>
+#include <numeric>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+template <class... Ts>
+auto par_dfor(Ts... xs)
+{
+    return [=](auto f) {
+        using array_type = std::array<std::size_t, sizeof...(Ts)>;
+        array_type lens  = {{static_cast<std::size_t>(xs)...}};
+        auto n = std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>{});
+        const std::size_t min_grain = 8;
+        if(n > 2 * min_grain)
+        {
+            array_type strides;
+            strides.fill(1);
+            std::partial_sum(lens.rbegin(),
+                             lens.rend() - 1,
+                             strides.rbegin() + 1,
+                             std::multiplies<std::size_t>());
+            auto size =
+                std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>());
+            par_for(size, min_grain, [&](std::size_t i) {
+                array_type indices;
+                std::transform(strides.begin(),
+                               strides.end(),
+                               lens.begin(),
+                               indices.begin(),
+                               [&](size_t stride, size_t len) { return (i / stride) % len; });
+                migraphx::unpack(f, indices);
+            });
+        }
+        else
+        {
+            dfor(xs...)(f);
+        }
+
+    };
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/par_for.hpp
+++ b/src/include/migraphx/par_for.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
+
+#include <thread>
+#include <cmath>
+#include <algorithm>
+#include <vector>
+#include <cassert>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct joinable_thread : std::thread
+{
+    template <class... Xs>
+    joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) // NOLINT
+    {
+    }
+
+    joinable_thread& operator=(joinable_thread&& other) = default;
+    joinable_thread(joinable_thread&& other)            = default;
+
+    ~joinable_thread()
+    {
+        if(this->joinable())
+            this->join();
+    }
+};
+
+template <class F>
+void par_for_impl(std::size_t n, std::size_t threadsize, F f)
+{
+    if(threadsize <= 1)
+    {
+        for(std::size_t i = 0; i < n; i++)
+            f(i);
+    }
+    else
+    {
+        std::vector<joinable_thread> threads(threadsize);
+// Using const here causes gcc 5 to ICE
+#if(!defined(__GNUC__) || __GNUC__ != 5)
+        const
+#endif
+            std::size_t grainsize = std::ceil(static_cast<double>(n) / threads.size());
+
+        std::size_t work = 0;
+        std::generate(threads.begin(), threads.end(), [=, &work] {
+            auto result = joinable_thread([=] {
+                std::size_t start = work;
+                std::size_t last  = std::min(n, work + grainsize);
+                for(std::size_t i = start; i < last; i++)
+                {
+                    f(i);
+                }
+            });
+            work += grainsize;
+            return result;
+        });
+        assert(work >= n);
+    }
+}
+
+template <class F>
+void par_for(std::size_t n, std::size_t min_grain, F f)
+{
+    const auto threadsize =
+        std::min<std::size_t>(std::thread::hardware_concurrency(), n / min_grain);
+    par_for_impl(n, threadsize, f);
+}
+
+template <class F>
+void par_for(std::size_t n, F f)
+{
+    const int min_grain = 8;
+    par_for(n, min_grain, f);
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/pass.hpp
+++ b/src/include/migraphx/pass.hpp
@@ -105,7 +105,13 @@ struct pass
    void apply(program& p) const
    {
        assert((*this).private_detail_te_handle_mem_var);
-        return (*this).private_detail_te_get_handle().apply(p);
+        (*this).private_detail_te_get_handle().apply(p);
+    }
+
+    friend bool is_shared(const pass& private_detail_x, const pass& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
    }

    private:
@@ -149,7 +155,7 @@ struct pass

        std::string name() const override { return private_detail_te_value.name(); }

-        void apply(program& p) const override { return private_detail_te_value.apply(p); }
+        void apply(program& p) const override { private_detail_te_value.apply(p); }

        PrivateDetailTypeErasedT private_detail_te_value;
    };

--- a/src/include/migraphx/program.hpp
+++ b/src/include/migraphx/program.hpp
@@ -91,16 +91,22 @@ struct program

    shape get_shape() const;

+    context& get_context() const;
+
    instruction_ref validate() const;

    void compile(const target& t, tracer trace = tracer{});

+    void finalize();
+
    void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;

    void debug_print() const;
    void debug_print(instruction_ref ins) const;
    void debug_print(const std::vector<instruction_ref>& inss) const;

+    void dry_run(parameter_map params) const;
+
    friend std::ostream& operator<<(std::ostream& os, const program& p);
    friend bool operator==(const program& x, const program& y);
    friend bool operator!=(const program& x, const program& y) { return !(x == y); }

--- a/src/include/migraphx/rewrite_rnn.hpp
+++ b/src/include/migraphx/rewrite_rnn.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_REWRITE_RNN_HPP
+#define MIGRAPHX_GUARD_RTGLIB_REWRITE_RNN_HPP
+
+#include <string>
+#include <vector>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct program;
+
+/**
+ * Rewrite rnn to gemm and add.
+ */
+struct rewrite_rnn
+{
+    std::string name() const { return "rewrite_rnn"; }
+    void apply(program& prog) const;
+
+    private:
+    // for vanilla rnn operators
+    void apply_vanilla_rnn(program& prog, instruction_ref ins) const;
+    std::vector<instruction_ref> vanilla_rnn_cell(bool is_forward,
+                                                  program& prog,
+                                                  instruction_ref ins,
+                                                  instruction_ref input,
+                                                  instruction_ref w,
+                                                  instruction_ref r,
+                                                  instruction_ref bias,
+                                                  instruction_ref ih,
+                                                  operation& actv_func) const;
+    std::vector<operation> vanilla_rnn_actv_funcs(instruction_ref ins) const;
+
+    // for gru operators
+    void apply_gru(program& prog, instruction_ref ins) const;
+    std::vector<instruction_ref> gru_cell(bool is_forward,
+                                          program& prog,
+                                          instruction_ref ins,
+                                          std::vector<instruction_ref> inputs,
+                                          int linear_before_reset,
+                                          const operation& actv_func1,
+                                          const operation& actv_func2) const;
+
+    std::vector<operation> gru_actv_funcs(instruction_ref ins) const;
+
+    // for lstm operators
+    void apply_lstm(program& prog, instruction_ref ins) const;
+    std::vector<instruction_ref> lstm_cell(bool is_forward,
+                                           program& prog,
+                                           instruction_ref ins,
+                                           std::vector<instruction_ref> inputs,
+                                           const operation& actv_func1,
+                                           const operation& actv_func2,
+                                           const operation& actv_func3) const;
+
+    std::vector<operation> lstm_actv_funcs(instruction_ref ins) const;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -35,22 +35,22 @@ struct shape
    m(uint64_type, uint64_t)
 // clang-format on

-#define MIGRAPHX_SHAPE_ENUM_TYPES(x, t) x,
+#define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) x,
    enum type_t
    {
-        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_ENUM_TYPES)
+        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES)
    };
-#undef MIGRAPHX_SHAPE_ENUM_TYPES
+#undef MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES

    template <class T, class = void>
    struct get_type;
-#define MIGRAPHX_SHAPE_GET_TYPE(x, t)                         \
+#define MIGRAPHX_SHAPE_GENERATE_GET_TYPE(x, t)                \
    template <class T>                                        \
    struct get_type<t, T> : std::integral_constant<type_t, x> \
    {                                                         \
    };
-    MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GET_TYPE)
-#undef MIGRAPHX_SHAPE_GET_TYPE
+    MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_GET_TYPE)
+#undef MIGRAPHX_SHAPE_GENERATE_GET_TYPE

    template <class T>
    struct get_type<const T> : get_type<T>
@@ -62,6 +62,19 @@ struct shape
    shape(type_t t, std::vector<std::size_t> l);
    shape(type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s);

+    template <class Range>
+    shape(type_t t, const Range& l) : shape(t, std::vector<std::size_t>(l.begin(), l.end()))
+    {
+    }
+
+    template <class Range1, class Range2>
+    shape(type_t t, const Range1& l, const Range2& s)
+        : shape(t,
+                std::vector<std::size_t>(l.begin(), l.end()),
+                std::vector<std::size_t>(s.begin(), s.end()))
+    {
+    }
+
    type_t type() const;
    const std::vector<std::size_t>& lens() const;
    const std::vector<std::size_t>& strides() const;
@@ -141,6 +154,8 @@ struct shape
        {
            return reinterpret_cast<const T*>(buffer) + n;
        }
+
+        type_t type_enum() const { return get_type<T>{}; }
    };

    template <class Visitor>
@@ -148,14 +163,22 @@ struct shape
    {
        switch(this->type())
        {
-#define MIGRAPHX_SHAPE_VISITOR_CASE(x, t) \
+#define MIGRAPHX_SHAPE_GENERATE_VISITOR_CASE(x, t) \
    case x: v(as<t>()); return;
-            MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_VISITOR_CASE)
-#undef MIGRAPHX_SHAPE_VISITOR_CASE
+            MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_VISITOR_CASE)
+#undef MIGRAPHX_SHAPE_GENERATE_VISITOR_CASE
        }
        MIGRAPHX_THROW("Unknown type");
    }

+    template <class Visitor>
+    static void visit_types(Visitor v)
+    {
+#define MIGRAPHX_SHAPE_GENERATE_VISITOR_ALL(x, t) v(as<t>());
+        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_VISITOR_ALL)
+#undef MIGRAPHX_SHAPE_GENERATE_VISITOR_ALL
+    }
+
    private:
    std::shared_ptr<const shape_impl> impl;


--- a/src/include/migraphx/target.hpp
+++ b/src/include/migraphx/target.hpp
@@ -127,6 +127,12 @@ struct target
        return (*this).private_detail_te_get_handle().get_context();
    }

+    friend bool is_shared(const target& private_detail_x, const target& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
+    }
+
    private:
    struct private_detail_te_handle_base_type
    {

--- a/src/include/migraphx/tensor_view.hpp
+++ b/src/include/migraphx/tensor_view.hpp
@@ -124,6 +124,8 @@ struct tensor_view
            return m_data + this->size();
    }

+    std::vector<T> to_vector() const { return std::vector<T>(this->begin(), this->end()); }
+
    friend std::ostream& operator<<(std::ostream& os, const tensor_view<T>& x)
    {
        if(!x.empty())
@@ -164,7 +166,7 @@ bool operator!=(const tensor_view<T>& x, const tensor_view<U>& y)
 }

 template <class T>
-tensor_view<T> make_view(shape s, T* data)
+tensor_view<T> make_view(const shape& s, T* data)
 {
    return {s, data};
 }

--- a/src/include/migraphx/type_name.hpp
+++ b/src/include/migraphx/type_name.hpp
@@ -18,7 +18,7 @@ const std::string& get_type_name()
        name = typeid(PrivateMigraphTypeNameProbe).name();
        name = name.substr(7);
 #else
-        const char parameter_name[] = "PrivateMigraphTypeNameProbe =";
+        const char parameter_name[] = "PrivateMigraphTypeNameProbe ="; // NOLINT

        name = __PRETTY_FUNCTION__;


--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -97,7 +97,7 @@ const std::vector<instruction_ref>& instruction::outputs() const { return output

 bool operator==(const instruction& x, const instruction& y)
 {
-    if(not(x.result == y.result and x.op == y.op and x.arguments == y.arguments))
+    if(std::tie(x.result, x.op, x.arguments) != std::tie(y.result, y.op, y.arguments))
        return false;
    if(x.name() == "@literal")
        return x.lit == y.lit;
@@ -162,25 +162,54 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
    old->remove_output(*this);
 }

-std::vector<shape> compute_shapes(const std::vector<instruction_ref>& args)
+argument instruction::eval() const
 {
-    std::vector<shape> shapes(args.size());
-    std::transform(
-        args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
-    return shapes;
+    if(op.name() == "@literal")
+    {
+        return this->get_literal().get_argument();
+    }
+    if(is_context_free(op))
+    {
+        std::vector<argument> args;
+        for(auto&& arg : this->inputs())
+        {
+            argument a = arg->eval();
+            if(a.empty())
+                return {};
+            args.push_back(a);
+        }
+        return op.compute(result, args);
+    }
+    return {};
 }

-instruction_ref instruction::get_output_alias(instruction_ref ins)
+void instruction::finalize(context& ctx)
 {
-    auto i = ins->get_operator().output_alias(compute_shapes(ins->inputs()));
+    if(has_finalize(this->op))
+        this->op.finalize(ctx, this->get_shape(), to_shapes(this->inputs()));
+}
+
+instruction_ref instruction::get_output_alias(instruction_ref ins, bool shallow)
+{
+    auto i = ins->get_operator().output_alias(to_shapes(ins->inputs()));
    if(i < 0)
        return ins;
+    if(shallow)
+        return ins->inputs().at(i);
    return get_output_alias(ins->inputs().at(i));
 }

+std::vector<shape> to_shapes(const std::vector<instruction_ref>& args)
+{
+    std::vector<shape> shapes(args.size());
+    std::transform(
+        args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
+    return shapes;
+}
+
 shape compute_shape(const operation& op, const std::vector<instruction_ref>& args)
 {
-    return op.compute_shape(compute_shapes(args));
+    return op.compute_shape(to_shapes(args));
 }

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/onnx/mnist.cpp
+++ b/src/onnx/mnist.cpp
@@ -14,7 +14,10 @@

 auto reverse_int(unsigned int i)
 {
-    unsigned char c1, c2, c3, c4;
+    unsigned char c1;
+    unsigned char c2;
+    unsigned char c3;
+    unsigned char c4;
    c1 = i & 255u;
    c2 = (i >> 8u) & 255u;
    c3 = (i >> 16u) & 255u;
@@ -32,7 +35,9 @@ read_mnist_images(const std::string& full_path, int& number_of_images, int& imag

    if(file.is_open())
    {
-        int magic_number = 0, n_rows = 0, n_cols = 0;
+        int magic_number = 0;
+        int n_rows       = 0;
+        int n_cols       = 0;

        file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
        magic_number = reverse_int(magic_number);