merge changes from develop branch

1c3b16d2 · Shucai Xiao · 015d1ac4 · 3d200e1c · 1c3b16d2 · 1c3b16d2
Commit 1c3b16d2 authored Mar 06, 2019 by Shucai Xiao
20 changed files
--- a/src/tf/tensor.proto
+++ b/src/tf/tensor.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "TensorProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "resource_handle.proto";
+import "tensor_shape.proto";
+import "types.proto";
+
+// Protocol buffer representing a tensor.
+message TensorProto {
+  DataType dtype = 1;
+
+  // Shape of the tensor.  TODO(touts): sort out the 0-rank issues.
+  TensorShapeProto tensor_shape = 2;
+
+  // Only one of the representations below is set, one of "tensor_contents" and
+  // the "xxx_val" attributes.  We are not using oneof because as oneofs cannot
+  // contain repeated fields it would require another extra set of messages.
+
+  // Version number.
+  //
+  // In version 0, if the "repeated xxx" representations contain only one
+  // element, that element is repeated to fill the shape.  This makes it easy
+  // to represent a constant Tensor with a single value.
+  int32 version_number = 3;
+
+  // Serialized raw tensor content from either Tensor::AsProtoTensorContent or
+  // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
+  // can be used for all tensor types. The purpose of this representation is to
+  // reduce serialization overhead during RPC call by avoiding serialization of
+  // many repeated small items.
+  bytes tensor_content = 4;
+
+  // Type specific representations that make it easy to create tensor protos in
+  // all languages.  Only the representation corresponding to "dtype" can
+  // be set.  The values hold the flattened representation of the tensor in
+  // row major order.
+
+  // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
+  // have some pointless zero padding for each value here.
+  repeated int32 half_val = 13 [packed = true];
+
+  // DT_FLOAT.
+  repeated float float_val = 5 [packed = true];
+
+  // DT_DOUBLE.
+  repeated double double_val = 6 [packed = true];
+
+  // DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
+  repeated int32 int_val = 7 [packed = true];
+
+  // DT_STRING
+  repeated bytes string_val = 8;
+
+  // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
+  // and imaginary parts of i-th single precision complex.
+  repeated float scomplex_val = 9 [packed = true];
+
+  // DT_INT64
+  repeated int64 int64_val = 10 [packed = true];
+
+  // DT_BOOL
+  repeated bool bool_val = 11 [packed = true];
+
+  // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
+  // and imaginary parts of i-th double precision complex.
+  repeated double dcomplex_val = 12 [packed = true];
+
+  // DT_RESOURCE
+  repeated ResourceHandleProto resource_handle_val = 14;
+
+  // DT_VARIANT
+  repeated VariantTensorDataProto variant_val = 15;
+
+  // DT_UINT32
+  repeated uint32 uint32_val = 16 [packed = true];
+
+  // DT_UINT64
+  repeated uint64 uint64_val = 17 [packed = true];
+};
+
+// Protocol buffer representing the serialization format of DT_VARIANT tensors.
+message VariantTensorDataProto {
+  // Name of the type of objects being serialized.
+  string type_name = 1;
+  // Portions of the object that are not Tensors.
+  bytes metadata = 2;
+  // Tensors contained within objects being serialized.
+  repeated TensorProto tensors = 3;
+}
--- a/src/tf/tensor_shape.proto
+++ b/src/tf/tensor_shape.proto
+// Protocol buffer representing the shape of tensors.
+
+syntax = "proto3";
+option cc_enable_arenas = true;
+option java_outer_classname = "TensorShapeProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+package tensorflow;
+
+// Dimensions of a tensor.
+message TensorShapeProto {
+  // One dimension of the tensor.
+  message Dim {
+    // Size of the tensor in that dimension.
+    // This value must be >= -1, but values of -1 are reserved for "unknown"
+    // shapes (values of -1 mean "unknown" dimension).  Certain wrappers
+    // that work with TensorShapeProto may fail at runtime when deserializing
+    // a TensorShapeProto containing a dim value of -1.
+    int64 size = 1;
+
+    // Optional name of the tensor dimension.
+    string name = 2;
+  };
+
+  // Dimensions of the tensor, such as {"input", 30}, {"output", 40}
+  // for a 30 x 40 2D tensor.  If an entry has size -1, this
+  // corresponds to a dimension of unknown size. The names are
+  // optional.
+  //
+  // The order of entries in "dim" matters: It indicates the layout of the
+  // values in the tensor in-memory representation.
+  //
+  // The first entry in "dim" is the outermost dimension used to layout the
+  // values, the last entry is the innermost dimension.  This matches the
+  // in-memory layout of RowMajor Eigen tensors.
+  //
+  // If "dim.size()" > 0, "unknown_rank" must be false.
+  repeated Dim dim = 2;
+
+  // If true, the number of dimensions in the shape is unknown.
+  //
+  // If true, "dim.size()" must be 0.
+  bool unknown_rank = 3;
+};
--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
+#include <google/protobuf/text_format.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <graph.pb.h>
+#include <iostream>
+#include <fstream>
+#include <unordered_map>
+#include <unordered_set>
+#include <functional>
+#include <array>
+#include <utility>
+#include <vector>
+
+#include <migraphx/fallthrough.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/tf.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct tf_parser
+{
+    using attribute_map = std::unordered_map<std::string, tensorflow::AttrValue>;
+    using node_map      = std::unordered_map<std::string, tensorflow::NodeDef>;
+    // using input_node_map = std::unordered_map<std::string, std::unordered_set<std::string>>;
+    using op_func = std::function<instruction_ref(attribute_map, std::vector<instruction_ref>)>;
+
+    node_map nodes;
+    std::vector<tensorflow::NodeDef> input_nodes;
+    std::unordered_map<std::string, instruction_ref> instructions;
+    program prog = program();
+    bool is_nhwc = true;
+
+    std::unordered_map<std::string, op_func> ops;
+
+    std::vector<size_t> parse_axes(const attribute_map& attributes, const std::string& s) const
+    {
+        auto attrs = attributes.at(s).list().i();
+        std::vector<size_t> axes;
+        copy(attrs.begin(), attrs.end(), std::back_inserter(axes));
+        if(is_nhwc)
+        {
+            std::transform(axes.begin(), axes.end(), axes.begin(), [&](size_t axis) {
+                return parse_axis(axis);
+            });
+        }
+        return axes;
+    }
+
+    template <class T>
+    std::vector<T> parse_axes(std::vector<T> axes) const
+    {
+        std::vector<T> new_axes;
+        if(is_nhwc)
+        {
+            std::transform(axes.begin(),
+                           axes.end(),
+                           std::back_inserter(new_axes),
+                           [&](size_t axis) { return parse_axis(axis); });
+        }
+        return new_axes;
+    }
+
+    // tf stores certain attributes such as strides, dilations, as a 4D input.
+    // The first and last dims are equal to 1, and the relevant data is in dims 2 and 3.
+    // This helper function reorders the data to store for the respective operator member variables.
+    template <class T>
+    void reorder_data(std::vector<T>& prev_data) const
+    {
+        std::vector<T> new_data(prev_data.size());
+        for(size_t i = 0; i < new_data.size(); i++)
+        {
+            auto new_idx         = parse_axis(i);
+            new_data.at(new_idx) = prev_data.at(i);
+        }
+        prev_data = new_data;
+    }
+
+    template <class T>
+    T parse_axis(const T& dim) const
+    {
+        T new_dim = dim;
+        if(is_nhwc)
+        {
+            switch(dim)
+            {
+            case 0: new_dim = 0; break;
+            case 1: new_dim = 2; break;
+            case 2: new_dim = 3; break;
+            case 3: new_dim = 1; break;
+            default: break;
+            }
+        }
+        return new_dim;
+    }
+
+    std::vector<int64_t> get_axes(size_t num_axes) const
+    {
+        std::vector<int64_t> axes(num_axes);
+        std::iota(axes.begin(), axes.end(), 0);
+        return axes;
+    }
+
+    tf_parser()
+    {
+        add_generic_op("Identity", op::identity{});
+        add_generic_op("Relu", op::relu{});
+
+        add_binary_op("Add", op::add{});
+
+        add_mem_op("AvgPool", &tf_parser::parse_pooling);
+        add_mem_op("BiasAdd", &tf_parser::parse_biasadd);
+        add_mem_op("ConcatV2", &tf_parser::parse_concat);
+        add_mem_op("Const", &tf_parser::parse_constant);
+        add_mem_op("Conv2D", &tf_parser::parse_conv);
+        add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
+        add_mem_op("MaxPool", &tf_parser::parse_pooling);
+        add_mem_op("Mean", &tf_parser::parse_mean);
+        add_mem_op("Pad", &tf_parser::parse_pad);
+        add_mem_op("Reshape", &tf_parser::parse_reshape);
+        add_mem_op("Softmax", &tf_parser::parse_softmax);
+        add_mem_op("Squeeze", &tf_parser::parse_squeeze);
+    }
+
+    template <class F>
+    void add_op(std::string name, F f)
+    {
+        ops.emplace(name, f);
+    }
+
+    // Multi output op
+    template <class F>
+    void add_multi_op(std::string name, F f)
+    {
+        ops.emplace(name, f);
+    }
+
+    template <class F>
+    void add_mem_op(std::string name, F f)
+    {
+        add_op(name, [=](auto&&... xs) {
+            return std::mem_fn(f)(*this, name, std::forward<decltype(xs)>(xs)...);
+        });
+    }
+
+    template <class T>
+    void add_binary_op(std::string name, T x)
+    {
+        add_op(name, [this, x](attribute_map attributes, std::vector<instruction_ref> args) {
+            if(args.size() != 2)
+                MIGRAPHX_THROW("binary operators should have 2 operands");
+            auto l0 = args[1];
+            if(contains(attributes, "data_format"))
+            {
+                if(is_nhwc)
+                {
+                    l0 = prog.add_instruction(op::transpose{{0, 3, 1, 2}}, args[1]);
+                }
+            }
+            return add_broadcastable_binary_op(args[0], l0, x);
+        });
+    }
+
+    template <class T>
+    instruction_ref add_broadcastable_binary_op(instruction_ref arg0, instruction_ref arg1, T x)
+    {
+        if(arg0->get_shape() != arg1->get_shape())
+        {
+            // Example:
+            // s0 = (3,2,4,5) and s1 = (2,1,1)
+            //
+            // In this case we need to broadcast (:,1,1) portion of
+            // s1 plus broadcast the 1st dimension of s1
+            // giving output_lens = (3,2,4,5)
+            //
+            // Another example:
+            // s0 = (3,2,1,5) and s1 = (2,7,5)
+            // In this case we need to broadcast the (:,:,1:,:) axis
+            // of s0 plus the 1st dimension of s1 giving
+            // output_lens = (3,2,7,5)
+            //
+            // Get lengths for both arguments
+            const std::vector<size_t>* s0 = &arg0->get_shape().lens();
+            const std::vector<size_t>* s1 = &arg1->get_shape().lens();
+
+            // Make sure s0 is the smaller size
+            if(s0->size() > s1->size())
+                std::swap(s0, s1);
+
+            std::vector<size_t> output_lens(*s1);
+            auto offset = s1->size() - s0->size();
+            std::transform(s0->begin(),
+                           s0->end(),
+                           s1->begin() + offset,
+                           output_lens.begin() + offset,
+                           [](auto a, auto b) { return std::max(a, b); });
+
+            auto l0 = prog.add_instruction(op::multibroadcast{output_lens}, arg0);
+            auto l1 = prog.add_instruction(op::multibroadcast{output_lens}, arg1);
+            return prog.add_instruction(x, l0, l1);
+        }
+        else
+        {
+            return prog.add_instruction(x, {arg0, arg1});
+        }
+    }
+
+    template <class T>
+    void add_generic_op(std::string name, T x)
+    {
+        add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
+            return prog.add_instruction(x, args);
+        });
+    }
+
+    instruction_ref
+    parse_batchnorm(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        float epsilon                                     = 1e-5f;
+        float momentum                                    = 0.9f;
+        op::batch_norm_inference::bn_infer_mode_t bn_mode = op::batch_norm_inference::spatial;
+        if(contains(attributes, "epsilon"))
+        {
+            epsilon = attributes.at("epsilon").f();
+        }
+        op::batch_norm_inference op{epsilon, momentum, bn_mode};
+        return prog.add_instruction(op, std::move(args));
+    }
+
+    instruction_ref
+    parse_biasadd(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        uint64_t axis = 1; // assume output of previous layer is in NCHW (broadcast on channel)
+        auto l0       = prog.add_instruction(op::broadcast{axis, args[0]->get_shape()}, args[1]);
+        return prog.add_instruction(op::add{}, args[0], l0);
+    }
+
+    instruction_ref
+    parse_concat(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        // get index for axis within args
+        size_t axis_idx = attributes.at("N").i();
+        size_t axis     = parse_axis(args[axis_idx]->eval().at<int64_t>());
+        op::concat op{axis};
+        // return only first N arguments (assuming last index is the axis value)
+        return prog.add_instruction(
+            op, std::vector<instruction_ref>(args.begin(), args.begin() + args.size() - 1));
+    }
+
+    instruction_ref parse_constant(const std::string&,
+                                   attribute_map attributes,
+                                   const std::vector<instruction_ref>&)
+    {
+        literal v       = parse_tensor(attributes.at("value").tensor());
+        auto l0         = prog.add_literal(v);
+        size_t num_axes = l0->get_shape().lens().size();
+        if(num_axes >= 4)
+        {
+            std::vector<int64_t> transpose_axes = get_axes(num_axes);
+            reorder_data(transpose_axes);
+            l0 = prog.add_instruction(op::transpose{transpose_axes}, l0);
+        }
+        return l0;
+    }
+
+    instruction_ref
+    parse_conv(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        op::convolution op;
+        if(contains(attributes, "padding"))
+        {
+            const std::string& pad_mode = attributes.at("padding").s();
+            if(pad_mode.find("SAME") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::same;
+            }
+            else if(pad_mode.find("EXPLICIT") != std::string::npos)
+            {
+                std::vector<size_t> padding;
+                copy(attributes.at("explicit_paddings").list().i(), std::back_inserter(padding));
+                if(padding.size() != 4)
+                {
+                    MIGRAPHX_THROW("padding should have 4 values");
+                }
+                if(padding[0] != padding[2] || padding[1] != padding[3])
+                {
+                    MIGRAPHX_THROW("migraphx does not support asymetric padding");
+                }
+                op.padding[0] = padding[0];
+                op.padding[1] = padding[1];
+            }
+        }
+        if(contains(attributes, "strides"))
+        {
+            std::vector<size_t> stride;
+            copy(attributes.at("strides").list().i(), std::back_inserter(stride));
+            reorder_data(stride);
+            if(stride.size() != 4)
+            {
+                MIGRAPHX_THROW("strides should have 4 values");
+            }
+            op.stride[0] = stride[2];
+            op.stride[1] = stride[3];
+        }
+        if(contains(attributes, "dilations"))
+        {
+            std::vector<size_t> dilation;
+            copy(attributes.at("dilations").list().i(), std::back_inserter(dilation));
+            reorder_data(dilation);
+            if(dilation.size() != 4)
+            {
+                MIGRAPHX_THROW("dilation should have 4 values");
+            }
+            op.dilation[0] = dilation[2];
+            op.dilation[1] = dilation[3];
+        }
+        auto weights = args[1];
+        // check if weights are from a constant
+
+        if(weights->name() != "@param")
+        {
+            if(is_nhwc)
+            {
+                weights = prog.add_instruction(op::transpose{{1, 3, 0, 2}}, args[1]);
+            }
+            else
+            {
+                weights = prog.add_instruction(op::transpose{{3, 2, 0, 1}}, args[1]);
+            }
+        }
+
+        return prog.add_instruction(op, {args[0], weights});
+    }
+
+    instruction_ref
+    parse_mean(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+
+        auto axes      = parse_axes(args[1]->eval().get<int32_t>().to_vector());
+        bool keep_dims = attributes.at("keep_dims").b();
+        std::vector<int32_t> hw_axes{2, 3};
+        if(axes == hw_axes and keep_dims)
+        {
+            op::pooling op{"average"};
+            std::vector<size_t> input_dims{args[0]->get_shape().lens()};
+            op.lengths[0] = input_dims[2];
+            op.lengths[1] = input_dims[3];
+            return prog.add_instruction(op, args.front());
+        }
+        MIGRAPHX_THROW("MIGraphX does not support mean outside of GlobalAvgPool transformation");
+    }
+
+    instruction_ref
+    parse_pad(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        size_t ndims = args.front()->get_shape().lens().size();
+
+        // in tf, the paddings are arranged as a 2d shape (ndims, 2),
+        // the last dim contains the left padding and right padding respectively
+        std::vector<std::pair<int32_t, int32_t>> pad_per_dim(ndims);
+        auto tf_padding = args[1]->eval().get<int32_t>().to_vector();
+        for(size_t i = 0; i < 2 * ndims; i += 2)
+        {
+            pad_per_dim[i / 2].first  = tf_padding[i];
+            pad_per_dim[i / 2].second = tf_padding[i + 1];
+        }
+        reorder_data(pad_per_dim);
+
+        op::pad op;
+        std::vector<int64_t> pads(ndims * 2);
+        for(size_t i = 0; i < ndims; i++)
+        {
+            pads[i]         = pad_per_dim[i].first;
+            pads[i + ndims] = pad_per_dim[i].second;
+        }
+        op.pads = pads;
+        return prog.add_instruction(op, args.front());
+    }
+
+    instruction_ref parse_pooling(const std::string& name,
+                                  attribute_map attributes,
+                                  std::vector<instruction_ref> args)
+    {
+        op::pooling op{starts_with(name, "Max") ? "max" : "average"};
+
+        if(contains(attributes, "padding"))
+        {
+            const std::string& pad_mode = attributes.at("padding").s();
+            if(pad_mode.find("SAME") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::same;
+            }
+            else if(pad_mode.find("VALID") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::valid;
+            }
+        }
+        if(contains(attributes, "strides"))
+        {
+            std::vector<size_t> stride;
+            copy(attributes.at("strides").list().i(), std::back_inserter(stride));
+            reorder_data(stride);
+            if(stride.size() != 4)
+            {
+                MIGRAPHX_THROW("strides should have 4 values");
+            }
+            op.stride[0] = stride[2];
+            op.stride[1] = stride[3];
+        }
+        if(contains(attributes, "ksize"))
+        {
+            std::vector<size_t> ksize;
+            copy(attributes.at("ksize").list().i(), std::back_inserter(ksize));
+            reorder_data(ksize);
+            if(ksize.size() != 4)
+            {
+                MIGRAPHX_THROW("ksize should have 4 values");
+            }
+            op.lengths[0] = ksize[2];
+            op.lengths[1] = ksize[3];
+        }
+        return prog.add_instruction(op, args[0]);
+    }
+
+    instruction_ref
+    parse_reshape(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        op::reshape op;
+        if(args.size() != 2)
+            MIGRAPHX_THROW("reshape needs 2 arguments (input, new_shape)");
+        auto s = args[1]->eval();
+        s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
+        return prog.add_instruction(op, args[0]);
+    }
+
+    void parse_from(std::istream& is)
+    {
+        tensorflow::GraphDef graph;
+        if(graph.ParseFromIstream(&is))
+        {
+            this->parse_graph(graph);
+        }
+        else
+        {
+            throw std::runtime_error("Failed reading tf file");
+        }
+    }
+
+    instruction_ref
+    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        auto dims = args.front()->get_shape().lens();
+        auto r =
+            prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
+        auto s = prog.add_instruction(op::softmax{}, r);
+        return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
+    }
+
+    instruction_ref parse_squeeze(const std::string&,
+                                  const attribute_map& attributes,
+                                  std::vector<instruction_ref> args)
+    {
+        op::squeeze op;
+        auto axes = parse_axes(attributes, "squeeze_dims");
+        copy(axes, std::back_inserter(op.axes));
+        auto args0_dims = args[0]->get_shape().lens();
+        if(op.axes.empty()) // no squeeze_dims provided, remove any dim that equals 1
+        {
+            for(size_t i = 0; i < args0_dims.size(); i++)
+            {
+                if(args0_dims.at(i) == 1)
+                {
+                    op.axes.push_back(i);
+                }
+            }
+        }
+        return prog.add_instruction(op, args[0]);
+    }
+
+    void parse_graph(const tensorflow::GraphDef& graph)
+    {
+        nodes = get_nodes(graph, input_nodes);
+        for(auto&& input : input_nodes)
+        {
+            const std::string& name   = input.name();
+            attribute_map input_attrs = get_attributes(input);
+            shape::type_t shape_type  = parse_type(input_attrs.at("dtype").type());
+            std::vector<size_t> dims  = parse_dims(input_attrs.at("shape").shape());
+            if(is_nhwc and dims.size() >= 4)
+            {
+                reorder_data(dims);
+            }
+            shape s            = shape{shape_type, dims};
+            instructions[name] = prog.add_parameter(name, s);
+        }
+        for(auto&& p : nodes)
+        {
+            this->parse_node(p.first);
+        }
+    }
+
+    void parse_node(const std::string& name)
+    {
+        if(instructions.count(name) == 0)
+        {
+            auto&& node = nodes.at(name);
+            std::vector<instruction_ref> args;
+
+            for(auto&& input : node.input())
+            {
+                if(nodes.count(input) > 0)
+                {
+                    auto&& iname = get_name(nodes.at(input));
+                    assert(name != iname);
+                    this->parse_node(iname);
+                    args.push_back(instructions.at(iname));
+                }
+                else
+                {
+                    args.push_back(instructions.at(input));
+                }
+            }
+            if(ops.count(node.op()) == 0)
+            {
+                instructions[name] = prog.add_instruction(unknown{node.op()}, args);
+            }
+            else
+            {
+                instructions[name] = ops[node.op()](get_attributes(node), args);
+            }
+        }
+    }
+
+    static attribute_map get_attributes(const tensorflow::NodeDef& node)
+    {
+        attribute_map result;
+        for(auto&& attr : node.attr())
+        {
+            result[attr.first] = attr.second;
+        }
+        return result;
+    }
+
+    static std::string get_name(const tensorflow::NodeDef& node) { return node.name(); }
+
+    static node_map get_nodes(const tensorflow::GraphDef& graph,
+                              std::vector<tensorflow::NodeDef>& input_nodes)
+    {
+        node_map result;
+        for(auto&& node : graph.node())
+        {
+            auto node_name = get_name(node);
+            // assume each node in graph has an associated name
+            if(node_name.empty())
+                MIGRAPHX_THROW("tf node with no name found");
+            result[node_name] = node;
+            if(node.op() == "Placeholder")
+            {
+                input_nodes.push_back(node);
+            }
+        }
+        return result;
+    }
+
+    static shape::type_t parse_type(const tensorflow::DataType t)
+    {
+        shape::type_t shape_type{};
+        switch(t)
+        {
+        case tensorflow::DataType::DT_INVALID:
+            break; // throw std::runtime_error("Unsupported type UNDEFINED");
+        case tensorflow::DataType::DT_FLOAT: shape_type = shape::float_type; break;
+        case tensorflow::DataType::DT_DOUBLE: shape_type = shape::double_type; break;
+        case tensorflow::DataType::DT_INT32: shape_type = shape::int32_type; break;
+        case tensorflow::DataType::DT_UINT8:
+            break; // throw std::runtime_error("Unsupported type UINT8");
+        case tensorflow::DataType::DT_INT16: shape_type = shape::int16_type; break;
+        case tensorflow::DataType::DT_INT8: shape_type = shape::int8_type; break;
+        case tensorflow::DataType::DT_STRING:
+            break; // throw std::runtime_error("Unsupported type STRING");
+        case tensorflow::DataType::DT_COMPLEX64:
+            break; // throw std::runtime_error("Unsupported type COMPLEX64");
+        case tensorflow::DataType::DT_INT64: shape_type = shape::int64_type; break;
+        case tensorflow::DataType::DT_BOOL:
+            break; // throw std::runtime_error("Unsupported type BOOL");
+        case tensorflow::DataType::DT_QINT8:
+            break; // throw std::runtime_error("Unsupported type QINT8");
+        case tensorflow::DataType::DT_QUINT8:
+            break; // throw std::runtime_error("Unsupported type QUINT8");
+        case tensorflow::DataType::DT_QINT32:
+            break; // throw std::runtime_error("Unsupported type QINT32");
+        case tensorflow::DataType::DT_BFLOAT16:
+            break; // throw std::runtime_error("Unsupported type BFLOAT16");
+        case tensorflow::DataType::DT_QINT16:
+            break; // throw std::runtime_error("Unsupported type QINT16");
+        case tensorflow::DataType::DT_QUINT16:
+            break; // throw std::runtime_error("Unsupported type QUINT16");
+        case tensorflow::DataType::DT_UINT16: shape_type = shape::uint16_type; break;
+        case tensorflow::DataType::DT_COMPLEX128:
+            break; // throw std::runtime_error("Unsupported type COMPLEX128");
+        case tensorflow::DataType::DT_HALF: shape_type = shape::half_type; break;
+        case tensorflow::DataType::DT_RESOURCE:
+            break; // throw std::runtime_error("Unsupported type RESOURCE");
+        case tensorflow::DataType::DT_VARIANT:
+            break; // throw std::runtime_error("Unsupported type VARIANT");
+        case tensorflow::DataType::DT_UINT32: shape_type = shape::uint32_type; break;
+        case tensorflow::DataType::DT_UINT64:
+            shape_type = shape::uint64_type;
+            break;
+
+        // tf pb should not use these types
+        case tensorflow::DataType::DT_FLOAT_REF: break;
+        case tensorflow::DataType::DT_DOUBLE_REF: break;
+        case tensorflow::DataType::DT_INT32_REF: break;
+        case tensorflow::DataType::DT_UINT8_REF: break;
+        case tensorflow::DataType::DT_INT16_REF: break;
+        case tensorflow::DataType::DT_INT8_REF: break;
+        case tensorflow::DataType::DT_STRING_REF: break;
+        case tensorflow::DataType::DT_COMPLEX64_REF: break;
+        case tensorflow::DataType::DT_INT64_REF: break;
+        case tensorflow::DataType::DT_BOOL_REF: break;
+        case tensorflow::DataType::DT_QINT8_REF: break;
+        case tensorflow::DataType::DT_QUINT8_REF: break;
+        case tensorflow::DataType::DT_QINT32_REF: break;
+        case tensorflow::DataType::DT_BFLOAT16_REF: break;
+        case tensorflow::DataType::DT_QINT16_REF: break;
+        case tensorflow::DataType::DT_QUINT16_REF: break;
+        case tensorflow::DataType::DT_UINT16_REF: break;
+        case tensorflow::DataType::DT_COMPLEX128_REF: break;
+        case tensorflow::DataType::DT_HALF_REF: break;
+        case tensorflow::DataType::DT_RESOURCE_REF: break;
+        case tensorflow::DataType::DT_VARIANT_REF: break;
+        case tensorflow::DataType::DT_UINT32_REF: break;
+        case tensorflow::DataType::DT_UINT64_REF: break;
+        case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_: break;
+        case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_: break;
+        }
+        return shape_type;
+    }
+
+    static literal parse_tensor(const tensorflow::TensorProto& t)
+    {
+        std::vector<size_t> dims = parse_dims(t.tensor_shape());
+        if(dims.empty())
+        {
+            dims = {1};
+        }
+        size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
+        if(!t.tensor_content().empty()) // has raw data
+        {
+            const std::string& s = t.tensor_content();
+            switch(t.dtype())
+            {
+            case tensorflow::DataType::DT_INVALID: throw std::runtime_error("");
+            case tensorflow::DataType::DT_FLOAT:
+                return literal{{shape::float_type, dims}, s.data()};
+            case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
+            case tensorflow::DataType::DT_INT8: return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_UINT16:
+                return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_INT16:
+                return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_INT32:
+                return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_INT64:
+                return literal{{shape::int64_type, dims}, s.data()};
+            case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
+            case tensorflow::DataType::DT_BOOL: return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_HALF: return literal{{shape::half_type, dims}, s.data()};
+            case tensorflow::DataType::DT_DOUBLE:
+                return literal{{shape::double_type, dims}, s.data()};
+            case tensorflow::DataType::DT_UINT32: throw std::runtime_error("");
+            case tensorflow::DataType::DT_UINT64: throw std::runtime_error("");
+            case tensorflow::DataType::DT_COMPLEX64: throw std::runtime_error("");
+            case tensorflow::DataType::DT_COMPLEX128: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT8: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QUINT8: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT32: throw std::runtime_error("");
+            case tensorflow::DataType::DT_BFLOAT16: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT16: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QUINT16: throw std::runtime_error("");
+            case tensorflow::DataType::DT_RESOURCE: throw std::runtime_error("");
+            case tensorflow::DataType::DT_VARIANT: throw std::runtime_error("");
+            case tensorflow::DataType::DT_FLOAT_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_DOUBLE_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_INT32_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_UINT8_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_INT16_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_INT8_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_STRING_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_COMPLEX64_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_INT64_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_BOOL_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT8_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QUINT8_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT32_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_BFLOAT16_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QINT16_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_QUINT16_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_UINT16_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_COMPLEX128_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_HALF_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_RESOURCE_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_VARIANT_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_UINT32_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DT_UINT64_REF: throw std::runtime_error("");
+            case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
+                throw std::runtime_error("");
+            case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
+                throw std::runtime_error("");
+            }
+            MIGRAPHX_THROW("Invalid tensor type");
+        }
+        switch(t.dtype())
+        {
+        case tensorflow::DataType::DT_INVALID: throw std::runtime_error("");
+        case tensorflow::DataType::DT_FLOAT:
+            return literal{{shape::float_type, dims}, get_data_vals(t.float_val(), shape_size)};
+        case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
+        case tensorflow::DataType::DT_INT8:
+            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+        case tensorflow::DataType::DT_UINT16:
+            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+        case tensorflow::DataType::DT_INT16:
+            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+        case tensorflow::DataType::DT_INT32:
+            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+        case tensorflow::DataType::DT_INT64:
+            return literal{{shape::int64_type, dims}, get_data_vals(t.int64_val(), shape_size)};
+        case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
+        case tensorflow::DataType::DT_BOOL:
+            return literal{{shape::int32_type, dims}, get_data_vals(t.bool_val(), shape_size)};
+        case tensorflow::DataType::DT_HALF:
+        {
+            std::vector<int> data_int32 = get_data_vals(t.half_val(), shape_size);
+            std::vector<uint16_t> data_uint16(data_int32.begin(), data_int32.end());
+            std::vector<half> data_half;
+            std::transform(data_uint16.begin(),
+                           data_uint16.end(),
+                           std::back_inserter(data_half),
+                           [](uint16_t raw_val) { return *reinterpret_cast<half*>(&raw_val); });
+            return literal{{shape::half_type, dims}, data_half};
+        }
+        case tensorflow::DataType::DT_DOUBLE:
+            return literal{{shape::double_type, dims}, get_data_vals(t.double_val(), shape_size)};
+        case tensorflow::DataType::DT_UINT32: throw std::runtime_error("");
+        case tensorflow::DataType::DT_UINT64: throw std::runtime_error("");
+        case tensorflow::DataType::DT_COMPLEX64: throw std::runtime_error("");
+        case tensorflow::DataType::DT_COMPLEX128: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT8: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QUINT8: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT32: throw std::runtime_error("");
+        case tensorflow::DataType::DT_BFLOAT16: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT16: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QUINT16: throw std::runtime_error("");
+        case tensorflow::DataType::DT_RESOURCE: throw std::runtime_error("");
+        case tensorflow::DataType::DT_VARIANT: throw std::runtime_error("");
+        case tensorflow::DataType::DT_FLOAT_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_DOUBLE_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_INT32_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_UINT8_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_INT16_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_INT8_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_STRING_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_COMPLEX64_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_INT64_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_BOOL_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT8_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QUINT8_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT32_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_BFLOAT16_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QINT16_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_QUINT16_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_UINT16_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_COMPLEX128_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_HALF_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_RESOURCE_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_VARIANT_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_UINT32_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DT_UINT64_REF: throw std::runtime_error("");
+        case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
+            throw std::runtime_error("");
+        case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
+            throw std::runtime_error("");
+        }
+        MIGRAPHX_THROW("Invalid tensor type");
+    }
+
+    template <class T>
+    static std::vector<T> get_data_vals(const google::protobuf::RepeatedField<T>& data,
+                                        const size_t& shape_size)
+    {
+        std::vector<T> data_vals(shape_size);
+        // check if shape has enough data values given existing fields
+        if(data.size() == 1)
+        {
+            std::fill(data_vals.begin(), data_vals.end(), data[0]);
+        }
+        else
+            copy(data.begin(), data.end(), std::back_inserter(data_vals));
+        return data_vals;
+    }
+
+    static std::vector<size_t> parse_dims(const tensorflow::TensorShapeProto& s)
+    {
+        std::vector<size_t> dims;
+        auto input_dims = s.dim();
+        std::transform(input_dims.begin(),
+                       input_dims.end(),
+                       std::back_inserter(dims),
+                       [](tensorflow::TensorShapeProto_Dim dim) { return dim.size(); });
+        return dims;
+    }
+};
+
+program parse_tf(const std::string& name, bool is_nhwc)
+{
+    std::fstream input(name.c_str(), std::ios::in | std::ios::binary);
+    tf_parser parser;
+    parser.is_nhwc = is_nhwc;
+
+#ifndef NDEBUG
+    // Log the program when it can't be parsed
+    try
+    {
+        parser.parse_from(input);
+    }
+    catch(...)
+    {
+        std::cerr << parser.prog << std::endl;
+        throw;
+    }
+#else
+    parser.parse_from(input);
+#endif
+    return std::move(parser.prog);
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/tf/types.proto
+++ b/src/tf/types.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "TypesProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// LINT.IfChange
+enum DataType {
+  // Not a legal value for DataType.  Used to indicate a DataType field
+  // has not been set.
+  DT_INVALID = 0;
+
+  // Data types that all computation devices are expected to be
+  // capable to support.
+  DT_FLOAT = 1;
+  DT_DOUBLE = 2;
+  DT_INT32 = 3;
+  DT_UINT8 = 4;
+  DT_INT16 = 5;
+  DT_INT8 = 6;
+  DT_STRING = 7;
+  DT_COMPLEX64 = 8;  // Single-precision complex
+  DT_INT64 = 9;
+  DT_BOOL = 10;
+  DT_QINT8 = 11;     // Quantized int8
+  DT_QUINT8 = 12;    // Quantized uint8
+  DT_QINT32 = 13;    // Quantized int32
+  DT_BFLOAT16 = 14;  // Float32 truncated to 16 bits.  Only for cast ops.
+  DT_QINT16 = 15;    // Quantized int16
+  DT_QUINT16 = 16;   // Quantized uint16
+  DT_UINT16 = 17;
+  DT_COMPLEX128 = 18;  // Double-precision complex
+  DT_HALF = 19;
+  DT_RESOURCE = 20;
+  DT_VARIANT = 21;  // Arbitrary C++ data types
+  DT_UINT32 = 22;
+  DT_UINT64 = 23;
+
+  // Do not use!  These are only for parameters.  Every enum above
+  // should have a corresponding value below (verified by types_test).
+  DT_FLOAT_REF = 101;
+  DT_DOUBLE_REF = 102;
+  DT_INT32_REF = 103;
+  DT_UINT8_REF = 104;
+  DT_INT16_REF = 105;
+  DT_INT8_REF = 106;
+  DT_STRING_REF = 107;
+  DT_COMPLEX64_REF = 108;
+  DT_INT64_REF = 109;
+  DT_BOOL_REF = 110;
+  DT_QINT8_REF = 111;
+  DT_QUINT8_REF = 112;
+  DT_QINT32_REF = 113;
+  DT_BFLOAT16_REF = 114;
+  DT_QINT16_REF = 115;
+  DT_QUINT16_REF = 116;
+  DT_UINT16_REF = 117;
+  DT_COMPLEX128_REF = 118;
+  DT_HALF_REF = 119;
+  DT_RESOURCE_REF = 120;
+  DT_VARIANT_REF = 121;
+  DT_UINT32_REF = 122;
+  DT_UINT64_REF = 123;
+}
+// LINT.ThenChange(
+//    https://www.tensorflow.org/code/tensorflow/c/c_api.h,
+//    https://www.tensorflow.org/code/tensorflow/go/tensor.go,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/types.h,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/types.cc,
+//    https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py,
+//    https://www.tensorflow.org/code/tensorflow/python/framework/function.py)
--- a/src/tf/verify_tf.cpp
+++ b/src/tf/verify_tf.cpp
+#include <migraphx/tf.hpp>
+
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify_args.hpp>
+#include <migraphx/instruction.hpp>
+
+template <class T>
+auto get_hash(const T& x)
+{
+    return std::hash<T>{}(x);
+}
+
+template <class F>
+migraphx::argument run_cpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::cpu::target{});
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] = migraphx::generate_argument(x.second, get_hash(x.first));
+    }
+    auto out = p.eval(m);
+    std::cout << p << std::endl;
+    return out;
+}
+
+template <class F>
+migraphx::argument run_gpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::gpu::target{});
+
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] =
+            migraphx::gpu::to_gpu(migraphx::generate_argument(x.second, get_hash(x.first)));
+    }
+    auto out = migraphx::gpu::from_gpu(p.eval(m));
+    std::cout << p << std::endl;
+    return migraphx::gpu::from_gpu(out);
+}
+
+template <class F>
+void verify_program(const std::string& name, F f, double tolerance = 100)
+{
+    auto x = run_cpu(f);
+    auto y = run_gpu(f);
+    migraphx::verify_args(name, x, y, tolerance);
+    // std::cout << "cpu: " << x << std::endl;
+    // std::cout << "gpu: " << y << std::endl;
+}
+
+void verify_instructions(const migraphx::program& prog, double tolerance = 80)
+{
+    for(auto&& ins : prog)
+    {
+        if(ins.name().front() == '@')
+            continue;
+        if(ins.name() == "broadcast")
+            continue;
+        if(ins.name() == "transpose")
+            continue;
+        if(ins.name() == "reshape")
+            continue;
+        auto create_program = [&] {
+            migraphx::program p;
+            std::vector<migraphx::instruction_ref> inputs;
+            for(auto&& arg : ins.inputs())
+            {
+                if(arg->name() == "@literal")
+                    inputs.push_back(p.add_literal(arg->get_literal()));
+                else
+                    inputs.push_back(
+                        p.add_parameter(std::to_string(inputs.size()), arg->get_shape()));
+            }
+            p.add_instruction(ins.get_operator(), inputs);
+            return p;
+        };
+        try
+        {
+            std::cout << "Verify: " << ins.name() << std::endl;
+            std::cout << create_program() << std::endl;
+            verify_program(ins.name(), create_program, tolerance);
+        }
+        catch(...)
+        {
+            std::cout << "Instruction " << ins.name() << " threw an exception." << std::endl;
+            throw;
+        }
+    }
+}
+
+template <class F>
+void verify_reduced(F f, int n, double tolerance = 80)
+{
+
+    auto create_program = [&] {
+        migraphx::program p = f();
+        auto last           = std::prev(p.end(), n + 1);
+        p.remove_instructions(last, p.end());
+        return p;
+    };
+    std::cout << "Verify: " << std::endl;
+    std::cout << create_program() << std::endl;
+    verify_program(std::to_string(n), create_program, tolerance);
+}
+
+template <class F>
+void verify_reduced_program(F f, double tolerance = 80)
+{
+    migraphx::program p = f();
+    auto n              = std::distance(p.begin(), p.end());
+    for(std::size_t i = 0; i < n; i++)
+    {
+        verify_reduced(f, i, tolerance);
+    }
+}
+
+int main(int argc, char const* argv[])
+{
+    std::vector<std::string> args(argv + 1, argv + argc);
+    if(not args.empty())
+    {
+        bool is_nhwc = true;
+
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "nchw"; }))
+        {
+            is_nhwc = false;
+        }
+
+        std::string file = args.front();
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << p << std::endl;
+
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-i"; }))
+        {
+            verify_instructions(p);
+        }
+        else if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-r"; }))
+        {
+            verify_reduced_program([&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+        else
+        {
+            verify_program(file, [&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+    }
+}
--- a/src/tf/versions.proto
+++ b/src/tf/versions.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "VersionsProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// Version information for a piece of serialized data
+//
+// There are different types of versions for each type of data
+// (GraphDef, etc.), but they all have the same common shape
+// described here.
+//
+// Each consumer has "consumer" and "min_producer" versions (specified
+// elsewhere).  A consumer is allowed to consume this data if
+//
+//   producer >= min_producer
+//   consumer >= min_consumer
+//   consumer not in bad_consumers
+//
+message VersionDef {
+  // The version of the code that produced this data.
+  int32 producer = 1;
+
+  // Any consumer below this version is not allowed to consume this data.
+  int32 min_consumer = 2;
+
+  // Specific consumer versions which are disallowed (e.g. due to bugs).
+  repeated int32 bad_consumers = 3;
+};
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -126,6 +126,15 @@ foreach(ONNX_TEST ${ONNX_TESTS})
    add_dependencies(check ${TEST_NAME})
 endforeach()

+# tf test
+add_executable(test_tf tf/tf_test.cpp)
+rocm_clang_tidy_check(test_tf)
+target_link_libraries(test_tf migraphx_tf)
+target_include_directories(test_tf PUBLIC include)
+add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tf) 
+add_dependencies(tests test_tf)
+add_dependencies(check test_tf)
+
 if(MIGRAPHX_ENABLE_PYTHON)
 add_subdirectory(py)
 endif()

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1269,6 +1269,176 @@ TEST_CASE(softmax_test)
    EXPECT(migraphx::verify_range(results_vector, s));
 }

+TEST_CASE(logsoftmax_test_axis_0)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
+        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
+        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
+        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
+        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
+        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
+        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
+        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
+
+    std::vector<float> s = {
+        -2.71138556, -5.85030702, -3.74063578, -4.22915517, -6.15821977, -5.96072346, -3.57208097,
+        -5.78313166, -5.51435497, -3.67224195, -3.88393048, -2.57061599, -5.54431083, -6.27880025,
+        -5.1878749,  -6.1318955,  -5.29178545, -4.22537886, -3.75693516, -7.07047099, -4.45763333,
+        -4.66281846, -6.18290503, -4.11886536, -6.17408292, -4.18030052, -4.64570814, -4.64354473,
+        -3.06629525, -3.80807681, -4.69162374, -5.53605222, -3.20969275, -4.82645674, -6.63942356,
+        -4.73634471, -3.86003866, -5.32738981, -4.22249802, -4.51258693, -2.41455206, -3.48343199,
+        -5.86215889, -4.93435935, -4.83713408, -2.97471885, -2.16666459, -3.69133151, -4.71640968,
+        -5.64652924, -3.60709827, -5.87967748, -3.8809403,  -4.33917815};
+
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+    auto al  = p.add_literal(migraphx::literal{a_shape, a});
+    int axis = 0;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector;
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
+TEST_CASE(logsoftmax_test_axis_1)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
+        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
+        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
+        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
+        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
+        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
+        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
+        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
+
+    std::vector<float> s = {
+        -1.77931988, -4.91824134, -2.80857010, -3.29708949, -5.22615409, -5.02865778, -2.64001529,
+        -4.85106598, -4.58228929, -2.74017627, -2.95186480, -1.63855031, -4.61224515, -5.34673457,
+        -4.25580922, -5.19982982, -4.35971977, -3.29331318, -2.82486948, -6.13840531, -3.52556765,
+        -3.73075278, -5.25083935, -3.18679968, -5.24201724, -3.24823484, -3.71364246, -4.14309917,
+        -2.56584969, -3.30763125, -4.19117818, -5.03560666, -2.70924719, -4.32601118, -6.13897800,
+        -4.23589915, -3.35959310, -4.82694425, -3.72205246, -4.01214137, -1.91410650, -2.98298643,
+        -5.36171333, -4.43391379, -4.33668852, -2.47427329, -1.66621903, -3.19088595, -4.21596412,
+        -5.14608368, -3.10665271, -5.37923192, -3.38049474, -3.83873259};
+
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+    auto al  = p.add_literal(migraphx::literal{a_shape, a});
+    int axis = 1;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector;
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
+TEST_CASE(logsoftmax_test_axis_2)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
+        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
+        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
+        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
+        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
+        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
+        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
+        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
+
+    std::vector<float> s = {
+        -0.79763715, -3.93655861, -1.82688737, -2.31540676, -4.24447136, -4.04697505, -1.65833256,
+        -3.86938325, -3.60060656, -1.81223672, -2.02392525, -0.71061076, -3.68430560, -4.41879502,
+        -3.32786967, -4.27189027, -3.43178022, -2.36537363, -1.35498658, -4.66852241, -2.05568475,
+        -2.26086988, -3.78095645, -1.71691678, -3.77213434, -1.77835194, -2.24375956, -2.74631770,
+        -1.16906822, -1.91084978, -2.79439671, -3.63882519, -1.31246572, -2.92922971, -4.74219653,
+        -2.83911768, -2.19738500, -3.66473615, -2.55984436, -2.84993327, -0.75189840, -1.82077833,
+        -4.19950523, -3.27170569, -3.17448042, -1.65286841, -0.84481415, -2.36948107, -3.39455924,
+        -4.32467880, -2.28524783, -4.55782704, -2.55908986, -3.01732771};
+
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+    auto al  = p.add_literal(migraphx::literal{a_shape, a});
+    int axis = 2;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector;
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
+TEST_CASE(logsoftmax_test_axis_3)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
+        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
+        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
+        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
+        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
+        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
+        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
+        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
+
+    std::vector<float> s = {
+        -0.33690375, -3.47582521, -1.36615397, -0.27936556, -2.20843016, -2.01093385, -0.22551114,
+        -2.43656183, -2.16778514, -1.57241522, -1.78410375, -0.47078926, -1.06745881, -1.80194823,
+        -0.71102288, -2.30719726, -1.46708721, -0.40068062, -0.42698261, -3.74051844, -1.12768078,
+        -1.07891856, -2.59900513, -0.53496546, -2.56139951, -0.56761711, -1.03302473, -2.09771276,
+        -0.52046328, -1.26224484, -1.76322959, -2.60765807, -0.28129860, -0.81424303, -2.62720985,
+        -0.72413100, -0.65570381, -2.12305496, -1.01816317, -2.48063402, -0.38259915, -1.45147908,
+        -1.84310238, -0.91530284, -0.81807757, -1.31692881, -0.50887455, -2.03354147, -1.48767160,
+        -2.41779116, -0.37836019, -2.56853147, -0.56979429, -1.02803214};
+
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+    auto al  = p.add_literal(migraphx::literal{a_shape, a});
+    int axis = 3;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector;
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
+TEST_CASE(logsoftmax_test_axis_4)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
+        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
+        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
+        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
+        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
+        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
+        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
+        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
+
+    std::vector<float> s = {0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
+                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000};
+
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+    auto al  = p.add_literal(migraphx::literal{a_shape, a});
+    int axis = 4;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector;
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
 TEST_CASE(conv2d_test)
 {
    migraphx::program p;

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -2977,6 +2977,34 @@ struct test_lstm_bidirct_default_actv2
    }
 };

+template <int Axis>
+struct test_logsoftmax
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
+
+        return p;
+    }
+};
+
+template <int Axis>
+struct test_logsoftmax_1
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
+
+        return p;
+    }
+};
+
 int main()
 {
    verify_program<test_relu_lrn>();
@@ -3095,4 +3123,11 @@ int main()
    verify_program<test_lstm_bidirct_default_actv>();
    verify_program<test_lstm_bidirct_default_actv1>();
    verify_program<test_lstm_bidirct_default_actv2>();
+    verify_program<test_logsoftmax<0>>();
+    verify_program<test_logsoftmax<1>>();
+    verify_program<test_logsoftmax<2>>();
+    verify_program<test_logsoftmax<3>>();
+    verify_program<test_logsoftmax<4>>();
+    verify_program<test_logsoftmax_1<0>>();
+    verify_program<test_logsoftmax_1<1>>();
 }
--- a/test/onnx/constant_scalar.onnx
+++ b/test/onnx/constant_scalar.onnx
-shape-gather-example:O
-2value"Constant*
-value**Bconst_tensor constantb
-z
+constant-scalar-example:R
+00"Constant*!
+value**Bconst_tensor 
test-constantb
+0


 B
\ No newline at end of file
--- a/test/onnx/logsoftmax_test.onnx
+++ b/test/onnx/logsoftmax_test.onnx
+logsoftmax-example:l
+
+xy"
+LogSoftmax*
+axistest_logsoftmaxZ
+x
+
+
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
@@ -470,8 +470,8 @@ TEST_CASE(flatten_test)
 {
    migraphx::program p;
    auto l0 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}});
-    p.add_instruction(migraphx::op::flatten{1}, l0);
    p.add_instruction(migraphx::op::flatten{2}, l0);
+    p.add_instruction(migraphx::op::flatten{1}, l0);
    auto prog = migraphx::parse_onnx("flatten_test.onnx");

    EXPECT(p == prog);
@@ -524,7 +524,7 @@ TEST_CASE(constant_test)
 TEST_CASE(constant_test_scalar)
 {
    migraphx::program p;
-    p.add_literal(migraphx::literal{migraphx::shape{migraphx::shape::int32_type}, {1}});
+    p.add_literal(migraphx::literal{migraphx::shape{migraphx::shape::int32_type, {1}}, {1}});
    auto prog = migraphx::parse_onnx("constant_scalar.onnx");

    EXPECT(p == prog);
@@ -666,4 +666,15 @@ TEST_CASE(add_fp16_test)
    EXPECT(p == prog);
 }

+TEST_CASE(logsoftmax)
+{
+    migraphx::program p;
+    auto l0  = p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {3, 4, 5, 6}});
+    int axis = 1;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, l0);
+    auto prog = migraphx::parse_onnx("logsoftmax_test.onnx");
+
+    EXPECT(p == prog);
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/onnx/sum_test.onnx
+++ b/test/onnx/sum_test.onnx
-sum-example:e
+sum-example:a

 0
 1
-23"Sumtest-dropoutZ
+23"Sumtest-sumZ
 0


@@ -15,7 +15,7 @@


 b
-2
+3


 B
\ No newline at end of file
--- a/test/onnx/unknown_test.onnx
+++ b/test/onnx/unknown_test.onnx
-unknown-example:
+unknown-example:

 0
 12"Unknown
-
-2"Unknowntest-unknownZ
+
+23"Unknowntest-unknownZ
 0


@@ -14,7 +14,7 @@


 b
-2
+3




--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -316,6 +316,61 @@ TEST_CASE(gather)
    }
 }

+TEST_CASE(logsoftmax)
+{
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 0;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 1;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 2;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 3;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 4;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 5;
+        throws_shape(migraphx::op::logsoftmax{axis}, input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = -1;
+        throws_shape(migraphx::op::logsoftmax{axis}, input);
+    }
+}
+
 TEST_CASE(dot)
 {
    {

--- a/test/tf/add_bcast_test.pb
+++ b/test/tf/add_bcast_test.pb
+
+2
+0Placeholder*
+shape
+:*
+dtype0
+2
+1Placeholder*
+dtype0*
+shape
+:
+ 
+
+add_bcast1Add01*
+T0"
\ No newline at end of file
--- a/test/tf/add_test.pb
+++ b/test/tf/add_test.pb
+
+:
+0Placeholder*
+shape:*
+dtype0
+:
+1Placeholder*
+dtype0*
+shape:
+
+add1Add01*
+T0"
\ No newline at end of file
--- a/test/tf/batchnorm_test.pb
+++ b/test/tf/batchnorm_test.pb
--- a/test/tf/biasadd_test.pb
+++ b/test/tf/biasadd_test.pb
+
+;
+0Placeholder*
+shape:*
+dtype0
+/
+1Placeholder*
+dtype0*
+shape:
+:
+	bias_add1BiasAdd01*
+T0*
+data_formatNHWC"
\ No newline at end of file
--- a/test/tf/concat_test.pb
+++ b/test/tf/concat_test.pb