Merge branch 'develop' into propogate-constant

ac972127 · Paul Fultz II · GitHub · 68858a5b · 849f7d92 · ac972127
Unverified Commit ac972127 authored Apr 19, 2019 by Paul Fultz II Committed by GitHub Apr 19, 2019
20 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,6 +20,7 @@ add_library(migraphx
    program.cpp
    shape.cpp
    schedule.cpp
+    pass_manager.cpp
    simplify_algebra.cpp
    simplify_reshapes.cpp
    opt/memory_coloring.cpp

--- a/src/fwd_conv_batchnorm_rewrite.cpp
+++ b/src/fwd_conv_batchnorm_rewrite.cpp
@@ -63,7 +63,7 @@ void fwd_conv_batchnorm_rewrite::apply(program& p) const
        auto l_weights = p.add_literal({weights.get_shape(), new_weights.data()});
        auto l_bias    = p.add_literal({new_bias.get_shape(), new_bias.data()});
        auto c = p.replace_instruction(conv_ins, conv_op, {conv_ins->inputs()[0], l_weights});
-        auto b = p.insert_instruction(ins, op::broadcast{1, c->get_shape()}, l_bias);
+        auto b = p.insert_instruction(ins, op::broadcast{1, c->get_shape().lens()}, l_bias);
        p.replace_instruction(ins, op::add{}, {c, b});
    }
 }

--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -27,38 +27,36 @@ namespace op {
 struct broadcast
 {
    uint64_t axis = 0;
+    std::vector<std::size_t> broadcast_lens;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.axis, "axis"));
+        return pack(f(self.axis, "axis"), f(self.broadcast_lens, "dims"));
    }
-    shape broadcast_shape;
    std::string name() const { return "broadcast"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
        auto t     = inputs.at(0).type();
        auto input = inputs.at(0);
-        std::vector<size_t> bcast_strides(broadcast_shape.lens().size(), 0);
+        std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
-        if(std::all_of(broadcast_shape.lens().cbegin(), broadcast_shape.lens().cend(), [&](auto x) {
+        if(std::all_of(
-               return x == 1;
+               broadcast_lens.cbegin(), broadcast_lens.cend(), [&](auto x) { return x == 1; }))
-           }))
        {
            if(axis != 0)
-                MIGRAPHX_THROW("when broadcasting tensor of size 1, axis should be 0");
+                MIGRAPHX_THROW("BROADCAST: when broadcasting tensor of size 1, axis should be 0");
-            return {t, broadcast_shape.lens(), std::move(bcast_strides)};
+            return {t, broadcast_lens, std::move(bcast_strides)};
        }
        else
        {
-            assert(broadcast_shape.lens().size() - axis >= input.lens().size());
+            assert(broadcast_lens.size() - axis >= input.lens().size());
-            if(!std::equal(
+            if(!std::equal(input.lens().begin(), input.lens().end(), broadcast_lens.begin() + axis))
-                   input.lens().begin(), input.lens().end(), broadcast_shape.lens().begin() + axis))
+                MIGRAPHX_THROW("BROADCAST: when broadcasting success sizes must match");
-                MIGRAPHX_THROW("when broadcasting success sizes must match");
            std::copy(input.strides().begin(), input.strides().end(), bcast_strides.begin() + axis);
-            return {t, broadcast_shape.lens(), std::move(bcast_strides)};
+            return {t, broadcast_lens, std::move(bcast_strides)};
        }
    }
    argument compute(shape output_shape, std::vector<argument> args) const

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
@@ -31,6 +31,8 @@ enum class rnn_direction
    bidirectional,
 };
+std::ostream& operator<<(std::ostream& os, rnn_direction v);
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/gather.hpp
+++ b/src/include/migraphx/op/gather.hpp
@@ -19,6 +19,13 @@ namespace op {
 struct gather
 {
    int axis = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.axis, "axis"));
+    }
    std::string name() const { return "gather"; }
    shape compute_shape(std::vector<shape> inputs) const

--- a/src/include/migraphx/op/gru.hpp
+++ b/src/include/migraphx/op/gru.hpp
@@ -27,6 +27,16 @@ struct gru
    float clip              = 0.0f;
    int linear_before_reset = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.hidden_size, "hidden_size"),
+                    f(self.actv_funcs, "actv_func"),
+                    f(self.direction, "direction"),
+                    f(self.clip, "clip"),
+                    f(self.linear_before_reset, "linear_before_reset"));
+    }
    std::string name() const { return "gru"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/logsoftmax.hpp
+++ b/src/include/migraphx/op/logsoftmax.hpp
@@ -19,6 +19,13 @@ namespace op {
 struct logsoftmax
 {
    int axis = 1;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.axis, "axis"));
+    }
    std::string name() const { return "logsoftmax"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/lstm.hpp
+++ b/src/include/migraphx/op/lstm.hpp
@@ -25,6 +25,15 @@ struct lstm
    float clip              = 0.0f;
    int input_forget        = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.hidden_size, "hidden_size"),
+                    f(self.actv_funcs, "actv_func"),
+                    f(self.direction, "direction"),
+                    f(self.input_forget, "input_forget"));
+    }
    std::string name() const { return "lstm"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/rnn.hpp
+++ b/src/include/migraphx/op/rnn.hpp
@@ -25,6 +25,15 @@ struct rnn
    rnn_direction direction = rnn_direction::forward;
    float clip              = 0.0f;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.hidden_size, "hidden_size"),
+                    f(self.actv_funcs, "actv_func"),
+                    f(self.direction, "direction"),
+                    f(self.clip, "clip"));
+    }
    std::string name() const { return "rnn"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/scalar.hpp
+++ b/src/include/migraphx/op/scalar.hpp
@@ -18,7 +18,13 @@ namespace op {
 struct scalar
 {
-    shape scalar_bcast;
+    std::vector<std::size_t> scalar_bcast_lens;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.scalar_bcast_lens, "scalar_bcst_dims"));
+    }
    std::string name() const { return "scalar"; }
@@ -26,8 +32,8 @@ struct scalar
    {
        assert(check_shapes{inputs}.has(1).only_dims(1).size() == 1);
        auto t = inputs.at(0).type();
-        std::vector<std::size_t> strides(scalar_bcast.lens().size(), 0);
+        std::vector<std::size_t> strides(scalar_bcast_lens.size(), 0);
-        return {t, scalar_bcast.lens(), strides};
+        return {t, scalar_bcast_lens, strides};
    }
    argument compute(shape output_shape, std::vector<argument> args) const

--- a/src/include/migraphx/pass_manager.hpp
+++ b/src/include/migraphx/pass_manager.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_PASS_MANAGER_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_PASS_MANAGER_HPP
+#include <list>
+#include <unordered_map>
+#include <migraphx/operation.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/builtin.hpp>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/target.hpp>
+#include <migraphx/tracer.hpp>
+#include <migraphx/env.hpp>
+#include <migraphx/config.hpp>
+#include <algorithm>
+#include <iostream>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+void run_passes(program& prog, const std::vector<pass>& passes, tracer trace = tracer{});
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/program.hpp
+++ b/src/include/migraphx/program.hpp
@@ -108,6 +108,7 @@ struct program
    void debug_print() const;
    void debug_print(instruction_ref ins) const;
    void debug_print(const std::vector<instruction_ref>& inss) const;
+    void print_graph(std::ostream& os) const;
    void dry_run(parameter_map params) const;

--- a/src/include/migraphx/ranges.hpp
+++ b/src/include/migraphx/ranges.hpp
@@ -12,7 +12,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace detail {
 template <class String, class T>
-auto generic_find_impl(rank<2>, String&& s, const T& x) -> decltype(s.begin() + s.find(x), s.npos)
+auto generic_find_impl(rank<2>, String&& s, const T& x) -> decltype(s.npos, s.begin() + s.find(x))
 {
    auto index = s.find(x);
    if(index == s.npos)

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -141,8 +141,8 @@ struct onnx_parser
                if(broadcasted != 0)
                {
                    uint64_t axis = parse_value(attributes.at("axis")).at<uint64_t>();
-                    auto l =
+                    auto l = prog.add_instruction(op::broadcast{axis, args[0]->get_shape().lens()},
-                        prog.add_instruction(op::broadcast{axis, args[0]->get_shape()}, args[1]);
+                                                  args[1]);
                    return prog.add_instruction(x, args[0], l);
                }
                return prog.add_instruction(x, args);
@@ -306,7 +306,7 @@ struct onnx_parser
        {
            uint64_t axis = 1;
            auto l1       = prog.add_instruction(op, args[0], args[1]);
-            auto l2       = prog.add_instruction(op::broadcast{axis, l1->get_shape()}, args[2]);
+            auto l2 = prog.add_instruction(op::broadcast{axis, l1->get_shape().lens()}, args[2]);
            return prog.add_instruction(op::add{}, l1, l2);
        }
        return prog.add_instruction(op, l0, args[1]);
@@ -671,15 +671,15 @@ struct onnx_parser
            auto&& bias_floats = attributes["bias"].floats();
            bias               = std::vector<float>(bias_floats.begin(), bias_floats.end());
        }
-        auto input_shape = args.front()->get_shape();
+        auto input_lens = args.front()->get_shape().lens();
        auto scale_val = prog.add_literal(scale);
        auto bias_vals = prog.add_literal(
            migraphx::literal{migraphx::shape{migraphx::shape::float_type, {bias.size()}}, bias});
-        auto scale_tensor = prog.add_instruction(migraphx::op::scalar{input_shape}, scale_val);
+        auto scale_tensor = prog.add_instruction(migraphx::op::scalar{input_lens}, scale_val);
        auto img_scaled   = prog.add_instruction(migraphx::op::mul{}, args.front(), scale_tensor);
-        auto bias_bcast = prog.add_instruction(migraphx::op::broadcast{1, input_shape}, bias_vals);
+        auto bias_bcast   = prog.add_instruction(migraphx::op::broadcast{1, input_lens}, bias_vals);
        return prog.add_instruction(migraphx::op::add{}, img_scaled, bias_bcast);
    }

--- a/src/pass_manager.cpp
+++ b/src/pass_manager.cpp
+#include <migraphx/program.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/target.hpp>
+#include <migraphx/env.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/time.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+void run_passes(program& prog, const std::vector<pass>& passes, tracer trace)
+{
+    for(auto& p : passes)
+    {
+        trace("Pass: ", p.name());
+        p.apply(prog);
+        trace(prog);
+#ifndef NDEBUG
+        trace("Validate ...");
+        auto invalid = prog.validate();
+        if(invalid != prog.end())
+        {
+            auto index = std::distance(prog.begin(), invalid);
+            MIGRAPHX_THROW(p.name() + " pass produces invalid program at instruction " +
+                           std::to_string(index) + ": " + invalid->name());
+        }
+        trace();
+#endif
+    }
+}
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -7,6 +7,7 @@
 #include <migraphx/ranges.hpp>
 #include <migraphx/time.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/pass_manager.hpp>
 #include <iostream>
 #include <sstream>
 #include <algorithm>
@@ -55,7 +56,7 @@ static void print_instruction(std::ostream& os,
 }
 template <class F>
-static void print_program(std::ostream& os, const program& p, F annonate)
+static void print_program(const program& p, F print_func)
 {
    std::unordered_map<instruction_ref, std::string> names;
    int count = 0;
@@ -76,11 +77,7 @@ static void print_program(std::ostream& os, const program& p, F annonate)
            (void)arg;
        }
-        print_instruction(os, ins, names);
+        print_func(ins, names);
-        annonate(ins, names);
-        os << std::endl;
        count++;
    }
@@ -291,23 +288,7 @@ void program::compile(const target& t, tracer trace)
        trace = tracer{std::cout};
    trace(*this);
    trace();
-    for(auto&& p : t.get_passes(this->impl->ctx))
+    run_passes(*this, t.get_passes(this->impl->ctx), trace);
-    {
-        trace("Pass: ", p.name());
-        p.apply(*this);
-        trace(*this);
-#ifndef NDEBUG
-        trace("Validate ...");
-        auto invalid = this->validate();
-        if(invalid != impl->instructions.end())
-        {
-            auto index = std::distance(impl->instructions.begin(), invalid);
-            MIGRAPHX_THROW(p.name() + " pass produces invalid program at instruction " +
-                           std::to_string(index) + ": " + invalid->name());
-        }
-        trace();
-#endif
-    }
    auto invalid = this->validate();
    if(invalid != impl->instructions.end())
    {
@@ -475,10 +456,12 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
    double calculate_overhead_time    = total_time - total_instruction_time;
    double calculate_overhead_percent = calculate_overhead_time * 100.0 / total_time;
-    print_program(os, *this, [&](auto ins, auto&&) {
+    print_program(*this, [&](auto ins, const auto& names) {
+        print_instruction(std::cout, ins, names);
        double avg     = common_average(ins_vec[ins]);
        double percent = std::ceil(100.0 * avg / total_instruction_time);
        os << ": " << avg << "ms, " << percent << "%";
+        os << std::endl;
    });
    os << std::endl;
@@ -516,7 +499,7 @@ void program::debug_print(instruction_ref ins) const
        return;
    }
    std::stringstream ss;
-    print_program(ss, *this, [&](auto x, auto&& names) {
+    print_program(*this, [&](auto x, const auto& names) {
        if(x == ins)
        {
            print_instruction(std::cout, x, names);
@@ -531,6 +514,32 @@ void program::debug_print(const std::vector<instruction_ref>& inss) const
    std::cout << std::endl;
 }
+static std::string enclose_name(const std::string& name)
+{
+    return '"' + replace_string(name, "\"", "\\\"") + '"';
+}
+void program::print_graph(std::ostream& os) const
+{
+    os << "digraph {" << std::endl;
+    os << "\trankdir=LR;" << std::endl;
+    print_program(*this, [&](auto ins, const auto& names) {
+        os << "\t" << enclose_name(names.at(ins))
+           << "[label=" << enclose_name(to_string(ins->get_operator())) << "];";
+        os << std::endl;
+        if(!ins->inputs().empty())
+        {
+            for(auto&& arg : ins->inputs())
+            {
+                os << "\t" << enclose_name(names.at(arg)) << " -> " << enclose_name(names.at(ins));
+                os << "[label=" << enclose_name(to_string(ins->get_shape())) << "];";
+                os << std::endl;
+            }
+        }
+    });
+    os << "}" << std::endl;
+}
 void program::dry_run(std::unordered_map<std::string, argument> params) const
 {
    auto& ctx = this->impl->ctx;
@@ -539,14 +548,21 @@ void program::dry_run(std::unordered_map<std::string, argument> params) const
 void program::annotate(std::ostream& os, std::function<void(instruction_ref)> a) const
 {
-    print_program(os, *this, [&](auto ins, auto&&) { a(ins); });
+    print_program(*this, [&](auto ins, const auto& names) {
+        print_instruction(os, ins, names);
+        a(ins);
+        os << std::endl;
+    });
 }
 bool operator==(const program& x, const program& y) { return to_string(x) == to_string(y); }
 std::ostream& operator<<(std::ostream& os, const program& p)
 {
-    print_program(os, p, [](auto&&...) {});
+    print_program(p, [&](auto ins, const auto& names) {
+        print_instruction(os, ins, names);
+        os << std::endl;
+    });
    return os;
 }

--- a/src/rewrite_rnn.cpp
+++ b/src/rewrite_rnn.cpp
@@ -4,6 +4,7 @@
 #include <migraphx/operators.hpp>
 #include <migraphx/iterator_for.hpp>
 #include <migraphx/dfor.hpp>
+#include <migraphx/op/common.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -213,7 +214,7 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
        auto wb    = prog.insert_instruction(ins, op::slice{{0}, {0}, {hs}}, sbias);
        auto rb    = prog.insert_instruction(ins, op::slice{{0}, {hs}, {2 * hs}}, sbias);
        auto b     = prog.insert_instruction(ins, op::add{}, wb, rb);
-        bias       = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, b);
+        bias       = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape().lens()}, b);
    }
    instruction_ref hidden_out = prog.end();
@@ -520,25 +521,26 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
    instruction_ref brcst_bh{};
    if(bias != prog.end())
    {
-        auto sbias = prog.insert_instruction(ins, op::squeeze{{0}}, bias);
+        auto broadcast_lens = sih->get_shape().lens();
-        auto wbz   = prog.insert_instruction(ins, op::slice{{0}, {0}, {hs}}, sbias);
+        auto sbias          = prog.insert_instruction(ins, op::squeeze{{0}}, bias);
-        auto wbr   = prog.insert_instruction(ins, op::slice{{0}, {hs}, {2 * hs}}, sbias);
+        auto wbz            = prog.insert_instruction(ins, op::slice{{0}, {0}, {hs}}, sbias);
-        auto wbh   = prog.insert_instruction(ins, op::slice{{0}, {2 * hs}, {3 * hs}}, sbias);
+        auto wbr            = prog.insert_instruction(ins, op::slice{{0}, {hs}, {2 * hs}}, sbias);
-        brcst_wbh  = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, wbh);
+        auto wbh  = prog.insert_instruction(ins, op::slice{{0}, {2 * hs}, {3 * hs}}, sbias);
+        brcst_wbh = prog.insert_instruction(ins, op::broadcast{1, broadcast_lens}, wbh);
        auto rbz  = prog.insert_instruction(ins, op::slice{{0}, {3 * hs}, {4 * hs}}, sbias);
        auto rbr  = prog.insert_instruction(ins, op::slice{{0}, {4 * hs}, {5 * hs}}, sbias);
        auto rbh  = prog.insert_instruction(ins, op::slice{{0}, {5 * hs}, {6 * hs}}, sbias);
-        brcst_rbh = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, rbh);
+        brcst_rbh = prog.insert_instruction(ins, op::broadcast{1, broadcast_lens}, rbh);
        auto bz  = prog.insert_instruction(ins, op::add{}, wbz, rbz);
-        brcst_bz = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, bz);
+        brcst_bz = prog.insert_instruction(ins, op::broadcast{1, broadcast_lens}, bz);
        auto br  = prog.insert_instruction(ins, op::add{}, wbr, rbr);
-        brcst_br = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, br);
+        brcst_br = prog.insert_instruction(ins, op::broadcast{1, broadcast_lens}, br);
        auto bh  = prog.insert_instruction(ins, op::add{}, wbh, rbh);
-        brcst_bh = prog.insert_instruction(ins, op::broadcast{1, sih->get_shape()}, bh);
+        brcst_bh = prog.insert_instruction(ins, op::broadcast{1, broadcast_lens}, bh);
    }
    for(long i = 0; i < seq_len; i++)
@@ -945,8 +947,8 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
    auto sih = prog.insert_instruction(ins, op::squeeze{{0}}, ih);
    // initial cell state
-    auto sic      = prog.insert_instruction(ins, op::squeeze{{0}}, ic);
+    auto sic     = prog.insert_instruction(ins, op::squeeze{{0}}, ic);
-    auto ic_shape = sic->get_shape();
+    auto ic_lens = sic->get_shape().lens();
    // bias
    instruction_ref bi_brcst{};
@@ -955,26 +957,27 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
    instruction_ref bc_brcst{};
    if(bias != prog.end())
    {
        auto sbias = prog.insert_instruction(ins, op::squeeze{{0}}, bias);
        auto bxi   = prog.insert_instruction(ins, op::slice{{0}, {0}, {hs}}, sbias);
        auto bhi   = prog.insert_instruction(ins, op::slice{{0}, {4 * hs}, {5 * hs}}, sbias);
        auto bi    = prog.insert_instruction(ins, op::add{}, bxi, bhi);
-        bi_brcst   = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, bi);
+        bi_brcst   = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, bi);
        auto bxo = prog.insert_instruction(ins, op::slice{{0}, {hs}, {2 * hs}}, sbias);
        auto bho = prog.insert_instruction(ins, op::slice{{0}, {5 * hs}, {6 * hs}}, sbias);
        auto bo  = prog.insert_instruction(ins, op::add{}, bxo, bho);
-        bo_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, bo);
+        bo_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, bo);
        auto bxf = prog.insert_instruction(ins, op::slice{{0}, {2 * hs}, {3 * hs}}, sbias);
        auto bhf = prog.insert_instruction(ins, op::slice{{0}, {6 * hs}, {7 * hs}}, sbias);
        auto bf  = prog.insert_instruction(ins, op::add{}, bxf, bhf);
-        bf_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, bf);
+        bf_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, bf);
        auto bxc = prog.insert_instruction(ins, op::slice{{0}, {3 * hs}, {4 * hs}}, sbias);
        auto bhc = prog.insert_instruction(ins, op::slice{{0}, {7 * hs}, {8 * hs}}, sbias);
        auto bc  = prog.insert_instruction(ins, op::add{}, bxc, bhc);
-        bc_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, bc);
+        bc_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, bc);
    }
    // peep hole
@@ -986,13 +989,13 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
    {
        auto spph  = prog.insert_instruction(ins, op::squeeze{{0}}, pph);
        auto pphi  = prog.insert_instruction(ins, op::slice{{0}, {0}, {hs}}, spph);
-        pphi_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, pphi);
+        pphi_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, pphi);
        auto ppho  = prog.insert_instruction(ins, op::slice{{0}, {hs}, {2 * hs}}, spph);
-        ppho_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, ppho);
+        ppho_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, ppho);
        auto pphf  = prog.insert_instruction(ins, op::slice{{0}, {2 * hs}, {3 * hs}}, spph);
-        pphf_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_shape}, pphf);
+        pphf_brcst = prog.insert_instruction(ins, op::broadcast{1, ic_lens}, pphf);
    }
    for(long i = 0; i < seq_len; ++i)
@@ -1166,5 +1169,14 @@ std::vector<operation> rewrite_rnn::lstm_actv_funcs(instruction_ref ins) const
    }
 }
+namespace op {
+std::ostream& operator<<(std::ostream& os, rnn_direction v)
+{
+    std::vector<std::string> rnn_direction_str = {"forward", "reverse", "bidirectional"};
+    os << rnn_direction_str[static_cast<std::underlying_type<rnn_direction>::type>(v)];
+    return os;
+}
+} // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
@@ -110,6 +110,7 @@ struct tf_parser
        add_generic_op("Relu", op::relu{});
        add_binary_op("Add", op::add{});
+        add_binary_op("Mul", op::mul{});
        add_mem_op("AvgPool", &tf_parser::parse_pooling);
        add_mem_op("BiasAdd", &tf_parser::parse_biasadd);
@@ -117,6 +118,7 @@ struct tf_parser
        add_mem_op("Const", &tf_parser::parse_constant);
        add_mem_op("Conv2D", &tf_parser::parse_conv);
        add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
+        add_mem_op("MatMul", &tf_parser::parse_matmul);
        add_mem_op("MaxPool", &tf_parser::parse_pooling);
        add_mem_op("Mean", &tf_parser::parse_mean);
        add_mem_op("Pack", &tf_parser::parse_pack);
@@ -124,6 +126,7 @@ struct tf_parser
        add_mem_op("Reshape", &tf_parser::parse_reshape);
        add_mem_op("Softmax", &tf_parser::parse_softmax);
        add_mem_op("Squeeze", &tf_parser::parse_squeeze);
+        add_mem_op("StridedSlice", &tf_parser::parse_stridedslice);
    }
    template <class F>
@@ -235,7 +238,7 @@ struct tf_parser
    parse_biasadd(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
    {
        uint64_t axis = 1; // assume output of previous layer is in NCHW (broadcast on channel)
-        auto l0       = prog.add_instruction(op::broadcast{axis, args[0]->get_shape()}, args[1]);
+        auto l0 = prog.add_instruction(op::broadcast{axis, args[0]->get_shape().lens()}, args[1]);
        return prog.add_instruction(op::add{}, args[0], l0);
    }
@@ -336,6 +339,32 @@ struct tf_parser
        return prog.add_instruction(op, {args[0], weights});
    }
+    instruction_ref
+    parse_matmul(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        bool transa = false;
+        bool transb = false;
+        if(contains(attributes, "transpose_a"))
+        {
+            transa = attributes.at("transpose_a").b();
+        }
+        if(contains(attributes, "transpose_b"))
+        {
+            transb = attributes.at("transpose_a").b();
+        }
+        std::vector<int64_t> perm(args[0]->get_shape().lens().size());
+        std::iota(perm.begin(), perm.end(), int64_t{0});
+        // swap the last two elements
+        std::iter_swap(perm.end() - 1, perm.end() - 2);
+        auto l1 = (transa) ? prog.add_instruction(op::transpose{perm}, args[0]) : args[0];
+        auto l2 = (transb) ? prog.add_instruction(op::transpose{perm}, args[1]) : args[1];
+        return prog.add_instruction(op::dot{}, l1, l2);
+    }
    instruction_ref
    parse_mean(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
@@ -508,6 +537,46 @@ struct tf_parser
        return prog.add_instruction(op, args[0]);
    }
+    instruction_ref parse_stridedslice(const std::string&,
+                                       const attribute_map& attributes,
+                                       std::vector<instruction_ref> args)
+    {
+        op::slice op;
+        auto starts     = args[1]->eval().get<int32_t>().to_vector();
+        auto ends       = args[2]->eval().get<int32_t>().to_vector();
+        size_t num_axes = args[0]->get_shape().lens().size();
+        if(num_axes >= 4)
+        {
+            reorder_data(starts);
+            reorder_data(ends);
+        }
+        op.starts = std::vector<int64_t>(starts.begin(), starts.end());
+        op.ends   = std::vector<int64_t>(ends.begin(), ends.end());
+        op.axes   = std::vector<int64_t>(num_axes);
+        std::iota(op.axes.begin(), op.axes.end(), 0);
+        uint32_t shrink_axis_mask = 0;
+        uint32_t bitwise_compare  = 1;
+        std::vector<int64_t> squeeze_axes;
+        if(contains(attributes, "shrink_axis_mask"))
+            shrink_axis_mask = static_cast<uint32_t>(attributes.at("shrink_axis_mask").i());
+        for(size_t i = 0; i < num_axes; i++)
+        {
+            // the LSB corresponds to axis 0 when determining which axes to squeeze
+            if(((shrink_axis_mask >> i) & bitwise_compare) == 1)
+                squeeze_axes.push_back(i);
+        }
+        if(num_axes >= 4)
+        {
+            squeeze_axes = parse_axes(squeeze_axes);
+        }
+        auto l0 = prog.add_instruction(op, args[0]);
+        return prog.add_instruction(op::squeeze{squeeze_axes}, l0);
+    }
    void parse_graph(const tensorflow::GraphDef& graph)
    {
        nodes = get_nodes(graph, input_nodes);

--- a/test/auto_contiguous_test.cpp
+++ b/test/auto_contiguous_test.cpp
@@ -60,7 +60,7 @@ TEST_CASE(after_literal_broadcast)
    auto l2 = p.add_literal(get_2());
    EXPECT(p.get_shape().standard());
    EXPECT(not p.get_shape().broadcasted());
-    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape()}, l2);
+    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape().lens()}, l2);
    p.add_instruction(pass_op{}, b);
    EXPECT(not p.get_shape().standard());
    EXPECT(p.get_shape().broadcasted());
@@ -91,7 +91,7 @@ TEST_CASE(after_param_broadcast)
    auto l2 = p.add_parameter("2", {migraphx::shape::float_type, {2}});
    EXPECT(p.get_shape().standard());
    EXPECT(not p.get_shape().broadcasted());
-    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape()}, l2);
+    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape().lens()}, l2);
    p.add_instruction(pass_op{}, b);
    EXPECT(not p.get_shape().standard());
    EXPECT(p.get_shape().broadcasted());

--- a/test/cpu_dot_op_test.cpp
+++ b/test/cpu_dot_op_test.cpp
@@ -351,7 +351,7 @@ TEST_CASE(gemm_mutli_dim1_2_3)
    float beta     = 0.41;
    auto m12_alpha = p.add_instruction(migraphx::op::dot{alpha, beta}, l1, l2);
    auto l_beta    = p.add_literal(beta);
-    auto b_beta    = p.add_instruction(migraphx::op::scalar{m12_alpha->get_shape()}, l_beta);
+    auto b_beta    = p.add_instruction(migraphx::op::scalar{m12_alpha->get_shape().lens()}, l_beta);
    auto m3_beta   = p.add_instruction(migraphx::op::mul{}, b_beta, l3);
    p.add_instruction(migraphx::op::add{}, m3_beta, m12_alpha);
    p.compile(migraphx::cpu::target{});