merge changes from develop branch

1c3b16d2 · Shucai Xiao · 015d1ac4 · 3d200e1c · 1c3b16d2 · 1c3b16d2
Commit 1c3b16d2 authored Mar 06, 2019 by Shucai Xiao
20 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -36,6 +36,8 @@ target_include_directories(migraphx SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLU
 set(PACKAGE_DEPENDS)

 add_subdirectory(onnx)
+add_subdirectory(tf)
+
 add_subdirectory(py)
 add_subdirectory(targets/cpu)
 if(MIGRAPHX_ENABLE_GPU)

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -978,6 +978,22 @@ struct softmax
    }
 };

+struct logsoftmax
+{
+    int axis = 1;
+    std::string name() const { return "logsoftmax"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        if(axis < 0 || axis > inputs[0].lens().size())
+        {
+            MIGRAPHX_THROW("LogSoftMax: input axis value " + std::to_string(axis) +
+                           " is out of range");
+        }
+        return inputs.at(0);
+    }
+};
+
 struct flatten
 {
    uint64_t axis = 0;

--- a/src/include/migraphx/tf.hpp
+++ b/src/include/migraphx/tf.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_TF_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_TF_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct unknown
+{
+    std::string op;
+    std::string name() const { return "unknown:" + op; }
+    shape compute_shape(std::vector<shape> input) const
+    {
+        if(input.empty())
+            return {};
+        else
+            return input.front();
+    }
+    friend std::ostream& operator<<(std::ostream& os, const unknown& x)
+    {
+        os << x.name();
+        return os;
+    }
+};
+
+/// Create a program from an onnx file
+program parse_tf(const std::string& name, bool is_nhwc);
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -79,6 +79,7 @@ struct onnx_parser
        add_mem_op("Gemm", &onnx_parser::parse_gemm);
        add_mem_op("BatchNormalization", &onnx_parser::parse_batchnorm);
        add_mem_op("Softmax", &onnx_parser::parse_softmax);
+        add_mem_op("LogSoftmax", &onnx_parser::parse_logsoftmax);
        add_mem_op("Squeeze", &onnx_parser::parse_squeeze);
        add_mem_op("Unsqueeze", &onnx_parser::parse_unsqueeze);
        add_mem_op("Slice", &onnx_parser::parse_slice);
@@ -228,6 +229,19 @@ struct onnx_parser
        return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
    }

+    instruction_ref parse_logsoftmax(const std::string&,
+                                     const attribute_map& attributes,
+                                     std::vector<instruction_ref> args)
+    {
+        int axis = 1;
+        if(contains(attributes, "axis"))
+        {
+            axis = parse_value(attributes.at("axis")).at<int>();
+        }
+
+        return prog.add_instruction(op::logsoftmax{axis}, std::move(args));
+    }
+
    instruction_ref
    parse_conv(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
@@ -1149,9 +1163,9 @@ struct onnx_parser
                instructions[name] = prog.add_parameter(name, s);
            }
        }
-        for(auto&& p : nodes)
+        for(auto&& output : graph.output())
        {
-            this->parse_node(p.first);
+            this->parse_node(output.name());
        }
    }


--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -626,6 +626,75 @@ struct softmax2d
    }
 };

+struct cpu_logsoftmax
+{
+    op::logsoftmax op;
+    std::string name() const { return "cpu::logsoftmax"; }
+    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
+
+    template <typename T>
+    std::size_t compute_batch_index(const T& idx, shape& batch_shape, int axis) const
+    {
+        if(axis == 0)
+        {
+            return 0;
+        }
+        else
+        {
+            std::vector<std::size_t> batch_idx(idx.begin(), idx.begin() + axis);
+            return batch_shape.index(batch_idx.begin(), batch_idx.end());
+        }
+    }
+
+    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        auto lens = output_shape.lens();
+        std::vector<std::size_t> batch_lens{};
+        if(op.axis == 0)
+        {
+            batch_lens.push_back(1);
+        }
+        else
+        {
+            batch_lens.insert(batch_lens.begin(), lens.begin(), lens.begin() + op.axis);
+        }
+        shape batch_shape{migraphx::shape::uint32_type, batch_lens};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            using value_type = typename decltype(input)::value_type;
+            std::vector<value_type> batch_max(batch_shape.elements(),
+                                              std::numeric_limits<value_type>::lowest());
+            shape_for_each(output_shape, [&](auto idx) {
+                auto index       = this->compute_batch_index(idx, batch_shape, op.axis);
+                batch_max[index] = std::max(batch_max[index], input(idx.begin(), idx.end()));
+            });
+
+            shape_for_each(output_shape, [&](auto idx) {
+                auto index = this->compute_batch_index(idx, batch_shape, op.axis);
+                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end()) - batch_max[index];
+            });
+
+            std::vector<value_type> batch_sum(batch_shape.elements(), value_type(0));
+            shape_for_each(output_shape, [&](auto idx) {
+                auto index = this->compute_batch_index(idx, batch_shape, op.axis);
+                batch_sum[index] += std::exp(output(idx.begin(), idx.end()));
+            });
+
+            for(std::size_t i = 0; i < batch_sum.size(); ++i)
+            {
+                batch_sum[i] = std::log(batch_sum[i]);
+            }
+
+            shape_for_each(output_shape, [&](auto idx) {
+                auto index = this->compute_batch_index(idx, batch_shape, op.axis);
+                output(idx.begin(), idx.end()) -= batch_sum[index];
+            });
+        });
+
+        return result;
+    }
+};
+
 struct add_op
 {
    std::string name() const { return "add"; }
@@ -736,6 +805,7 @@ struct cpu_apply
        apply_map["pad"]        = extend_op<cpu_pad, op::pad>();
        apply_map["concat"]     = extend_op<cpu_concat, op::concat>();
        apply_map["gather"]     = extend_op<cpu_gather, op::gather>();
+        apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
        apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
        apply_map["elu"]        = extend_op<cpu_unary<elu_op>, op::elu>();
        apply_map["identity"]   = simple_op<cpu_unary<identity_op>>();

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -26,6 +26,7 @@ add_library(migraphx_device
    device/atan.cpp
    device/add_relu.cpp
    device/contiguous.cpp
+    device/logsoftmax.cpp
    device/mul.cpp
    device/concat.cpp
    device/pad.cpp
@@ -48,6 +49,7 @@ add_library(migraphx_gpu
    pooling.cpp
    convolution.cpp
    softmax.cpp
+    logsoftmax.cpp
    contiguous.cpp
    concat.cpp
    relu.cpp

--- a/src/targets/gpu/device/logsoftmax.cpp
+++ b/src/targets/gpu/device/logsoftmax.cpp
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/device/logsoftmax.hpp>
+#include <migraphx/gpu/device/tensor.hpp>
+#include <migraphx/gpu/device/launch.hpp>
+#include <migraphx/gpu/device/types.hpp>
+#include <migraphx/gpu/hip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument logsoftmax(hipStream_t stream,
+                    const migraphx::shape& output_shape,
+                    std::vector<migraphx::argument> args,
+                    int axis)
+{
+
+    auto lens              = output_shape.lens();
+    std::size_t batch_size = std::accumulate(
+        lens.begin(), lens.begin() + axis, std::size_t{1}, std::multiplies<std::size_t>());
+    std::size_t n_dims = std::accumulate(
+        lens.begin() + axis, lens.end(), std::size_t{1}, std::multiplies<std::size_t>());
+    migraphx::shape comp_shape{output_shape.type(), {batch_size, n_dims}};
+
+    visit_all(args.back(), args.front())([&](auto output, auto input) {
+        const auto* input_ptr = device_cast(input.data());
+        auto* output_ptr      = device_cast(output.data());
+
+        // each thread is for one item in the batch
+        gs_launch(stream, batch_size)([=](auto i) {
+            std::size_t row_start = i * n_dims;
+            // get max
+            auto batch_max = input_ptr[row_start];
+            for(std::size_t j = 1; j < n_dims; ++j)
+            {
+                auto ind  = row_start + j;
+                batch_max = std::max(to_hip_type(batch_max), to_hip_type(input_ptr[ind]));
+            }
+
+            for(std::size_t j = 0; j < n_dims; ++j)
+            {
+                auto ind        = row_start + j;
+                output_ptr[ind] = input_ptr[ind] - batch_max;
+            }
+
+            auto batch_sum = ::exp(to_hip_type(output_ptr[row_start]));
+            for(std::size_t j = 1; j < n_dims; ++j)
+            {
+                auto ind = row_start + j;
+                batch_sum += ::exp(to_hip_type(output_ptr[ind]));
+            }
+            batch_sum = ::log(to_hip_type(batch_sum));
+
+            for(std::size_t j = 0; j < n_dims; ++j)
+            {
+                auto ind = row_start + j;
+                output_ptr[ind] -= batch_sum;
+            }
+        });
+    });
+
+    return args.back();
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument logsoftmax(hipStream_t stream,
+                    const migraphx::shape& output_shape,
+                    std::vector<migraphx::argument> args,
+                    int axis);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/logsoftmax.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_LOGSOFTMAX_HPP
+#define MIGRAPHX_GUARD_RTGLIB_LOGSOFTMAX_HPP
+
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_logsoftmax
+{
+    op::logsoftmax op;
+    std::string name() const { return "gpu::logsoftmax"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/logsoftmax.cpp
+++ b/src/targets/gpu/logsoftmax.cpp
+#include <migraphx/gpu/logsoftmax.hpp>
+#include <migraphx/gpu/device/logsoftmax.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape hip_logsoftmax::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).standard();
+    return op.compute_shape({inputs.at(0)});
+}
+
+argument hip_logsoftmax::compute(context& ctx,
+                                 const shape& output_shape,
+                                 const std::vector<argument>& args) const
+{
+    return device::logsoftmax(ctx.get_stream().get(), output_shape, args, op.axis);
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -21,6 +21,7 @@
 #include <migraphx/gpu/leaky_relu.hpp>
 #include <migraphx/gpu/elu.hpp>
 #include <migraphx/gpu/softmax.hpp>
+#include <migraphx/gpu/logsoftmax.hpp>
 #include <migraphx/gpu/add.hpp>
 #include <migraphx/gpu/sub.hpp>
 #include <migraphx/gpu/exp.hpp>
@@ -97,6 +98,7 @@ struct miopen_apply
        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
        add_extend_op<miopen_softmax, op::softmax>("softmax");
+        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");


--- a/src/tf/CMakeLists.txt
+++ b/src/tf/CMakeLists.txt
+find_package(Protobuf REQUIRED)
+
+protobuf_generate_cpp(
+    PROTO_SRCS PROTO_HDRS 
+    graph.proto
+    node_def.proto
+    attr_value.proto
+    tensor.proto
+    tensor_shape.proto
+    resource_handle.proto
+    types.proto
+    function.proto
+    op_def.proto
+    versions.proto
+)
+add_library(tf-proto STATIC ${PROTO_SRCS})
+target_include_directories(tf-proto SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${PROTOBUF_INCLUDE_DIR})
+target_compile_options(tf-proto PRIVATE -w)
+target_link_libraries(tf-proto PRIVATE ${PROTOBUF_LIBRARY})
+set_target_properties(tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On)
+
+add_library(migraphx_tf tf.cpp)
+set_target_properties(migraphx_tf PROPERTIES EXPORT_NAME tf)
+rocm_clang_tidy_check(migraphx_tf)
+target_link_libraries(migraphx_tf PRIVATE tf-proto)
+target_link_libraries(migraphx_tf PUBLIC migraphx)
+
+rocm_install_targets(
+  TARGETS migraphx_tf
+)
+
+add_executable(read_tf read_tf.cpp)
+rocm_clang_tidy_check(read_tf)
+target_link_libraries(read_tf migraphx_tf)
+
+if(MIGRAPHX_ENABLE_GPU)
+add_executable(verify_tf verify_tf.cpp)
+rocm_clang_tidy_check(verify_tf)
+target_link_libraries(verify_tf migraphx_tf migraphx_cpu migraphx_gpu)
+
+add_executable(perf_tf perf_tf.cpp)
+rocm_clang_tidy_check(perf_tf)
+target_link_libraries(perf_tf migraphx_tf migraphx_cpu migraphx_gpu)
+endif()
--- a/src/tf/attr_value.proto
+++ b/src/tf/attr_value.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "AttrValueProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "tensor.proto";
+import "tensor_shape.proto";
+import "types.proto";
+
+// Protocol buffer representing the value for an attr used to configure an Op.
+// Comment indicates the corresponding attr type.  Only the field matching the
+// attr type may be filled.
+message AttrValue {
+  // LINT.IfChange
+  message ListValue {
+    repeated bytes s = 2;                        // "list(string)"
+    repeated int64 i = 3 [packed = true];        // "list(int)"
+    repeated float f = 4 [packed = true];        // "list(float)"
+    repeated bool b = 5 [packed = true];         // "list(bool)"
+    repeated DataType type = 6 [packed = true];  // "list(type)"
+    repeated TensorShapeProto shape = 7;         // "list(shape)"
+    repeated TensorProto tensor = 8;             // "list(tensor)"
+    repeated NameAttrList func = 9;              // "list(attr)"
+  }
+  // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc)
+
+  oneof value {
+    bytes s = 2;                 // "string"
+    int64 i = 3;                 // "int"
+    float f = 4;                 // "float"
+    bool b = 5;                  // "bool"
+    DataType type = 6;           // "type"
+    TensorShapeProto shape = 7;  // "shape"
+    TensorProto tensor = 8;      // "tensor"
+    ListValue list = 1;          // any "list(...)"
+
+    // "func" represents a function. func.name is a function's name or
+    // a primitive op's name. func.attr.first is the name of an attr
+    // defined for that function. func.attr.second is the value for
+    // that attr in the instantiation.
+    NameAttrList func = 10;
+
+    // This is a placeholder only used in nodes defined inside a
+    // function.  It indicates the attr value will be supplied when
+    // the function is instantiated.  For example, let us suppose a
+    // node "N" in function "FN". "N" has an attr "A" with value
+    // placeholder = "foo". When FN is instantiated with attr "foo"
+    // set to "bar", the instantiated node N's attr A will have been
+    // given the value "bar".
+    string placeholder = 9;
+  }
+}
+
+// A list of attr names and their values. The whole list is attached
+// with a string name.  E.g., MatMul[T=float].
+message NameAttrList {
+  string name = 1;
+  map<string, AttrValue> attr = 2;
+}
--- a/src/tf/function.proto
+++ b/src/tf/function.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "FunctionProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "attr_value.proto";
+import "node_def.proto";
+import "op_def.proto";
+
+// A library is a set of named functions.
+message FunctionDefLibrary {
+  repeated FunctionDef function = 1;
+  repeated GradientDef gradient = 2;
+}
+
+// A function can be instantiated when the runtime can bind every attr
+// with a value. When a GraphDef has a call to a function, it must
+// have binding for every attr defined in the signature.
+//
+// TODO(zhifengc):
+//   * device spec, etc.
+message FunctionDef {
+  // The definition of the function's name, arguments, return values,
+  // attrs etc.
+  OpDef signature = 1;
+
+  // Attributes specific to this function definition.
+  map<string, AttrValue> attr = 5;
+
+  // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21.
+  reserved 2;
+
+  // In both of the following fields, there is the need to specify an
+  // output that is used as either the input to another node (in
+  // `node_def`) or as a return value of the function (in `ret`).
+  // Unlike the NodeDefs in GraphDef, we need to be able to specify a
+  // list in some cases (instead of just single outputs).  Also, we
+  // need to be able to deal with lists of unknown length (so the
+  // output index may not be known at function definition time).  So
+  // we use the following format instead:
+  // * "fun_in" where "fun_in" is the name of a function input arg in
+  //   the `signature` field above.  This represents that input, whether
+  //   it is a single tensor or a list.
+  // * "fun_in:0" gives the first element of a function input arg (a
+  //   non-list input is considered a list of length 1 for these
+  //   purposes).
+  // * "node:out" where "node" is the name of a node in `node_def` and
+  //   "out" is the name one of its op's output arguments (the name
+  //   comes from the OpDef of the node's op). This represents that
+  //   node's output, whether it is a single tensor or a list.
+  //   Note: We enforce that an op's output arguments are never
+  //   renamed in the backwards-compatibility test.
+  // * "node:out:0" gives the first element of a node output arg (a
+  //   non-list output is considered a list of length 1 for these
+  //   purposes).
+  //
+  // NOT CURRENTLY SUPPORTED (but may be in the future):
+  // * "node:out:-1" gives last element in a node output list
+  // * "node:out:1:" gives a list with all but the first element in a
+  //   node output list
+  // * "node:out::-1" gives a list with all but the last element in a
+  //   node output list
+
+  // The body of the function.  Unlike the NodeDefs in a GraphDef, attrs
+  // may have values of type `placeholder` and the `input` field uses
+  // the "output" format above.
+
+  // By convention, "op" in node_def is resolved by consulting with a
+  // user-defined library first. If not resolved, "func" is assumed to
+  // be a builtin op.
+  repeated NodeDef node_def = 3;
+
+  // A mapping from the output arg names from `signature` to the
+  // outputs from `node_def` that should be returned by the function.
+  map<string, string> ret = 4;
+}
+
+// GradientDef defines the gradient function of a function defined in
+// a function library.
+//
+// A gradient function g (specified by gradient_func) for a function f
+// (specified by function_name) must follow the following:
+//
+// The function 'f' must be a numerical function which takes N inputs
+// and produces M outputs. Its gradient function 'g', which is a
+// function taking N + M inputs and produces N outputs.
+//
+// I.e. if we have
+//    (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
+// then, g is
+//    (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
+//                                      dL/dy1, dL/dy2, ..., dL/dy_M),
+// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
+// loss function). dL/dx_i is the partial derivative of L with respect
+// to x_i.
+message GradientDef {
+  string function_name = 1;  // The function name.
+  string gradient_func = 2;  // The gradient function's name.
+}
--- a/src/tf/graph.proto
+++ b/src/tf/graph.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "GraphProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "node_def.proto";
+import "function.proto";
+import "versions.proto";
+
+// Represents the graph of operations
+message GraphDef {
+  repeated NodeDef node = 1;
+
+  // Compatibility versions of the graph.  See core/public/version.h for version
+  // history.  The GraphDef version is distinct from the TensorFlow version, and
+  // each release of TensorFlow will support a range of GraphDef versions.
+  VersionDef versions = 4;
+
+  // Deprecated single version field; use versions above instead.  Since all
+  // GraphDef changes before "versions" was introduced were forward
+  // compatible, this field is entirely ignored.
+  int32 version = 3 [deprecated = true];
+
+  // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
+  //
+  // "library" provides user-defined functions.
+  //
+  // Naming:
+  //   * library.function.name are in a flat namespace.
+  //     NOTE: We may need to change it to be hierarchical to support
+  //     different orgs. E.g.,
+  //     { "/google/nn", { ... }},
+  //     { "/google/vision", { ... }}
+  //     { "/org_foo/module_bar", { ... }}
+  //     map<string, FunctionDefLib> named_lib;
+  //   * If node[i].op is the name of one function in "library",
+  //     node[i] is deemed as a function call. Otherwise, node[i].op
+  //     must be a primitive operation supported by the runtime.
+  //
+  //
+  // Function call semantics:
+  //
+  //   * The callee may start execution as soon as some of its inputs
+  //     are ready. The caller may want to use Tuple() mechanism to
+  //     ensure all inputs are ready in the same time.
+  //
+  //   * The consumer of return values may start executing as soon as
+  //     the return values the consumer depends on are ready.  The
+  //     consumer may want to use Tuple() mechanism to ensure the
+  //     consumer does not start until all return values of the callee
+  //     function are ready.
+  FunctionDefLibrary library = 2;
+};
--- a/src/tf/node_def.proto
+++ b/src/tf/node_def.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "NodeProto";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "attr_value.proto";
+
+message NodeDef {
+  // The name given to this operator. Used for naming inputs,
+  // logging, visualization, etc.  Unique within a single GraphDef.
+  // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*".
+  string name = 1;
+
+  // The operation name.  There may be custom parameters in attrs.
+  // Op names starting with an underscore are reserved for internal use.
+  string op = 2;
+
+  // Each input is "node:src_output" with "node" being a string name and
+  // "src_output" indicating which output tensor to use from "node". If
+  // "src_output" is 0 the ":0" suffix can be omitted.  Regular inputs
+  // may optionally be followed by control inputs that have the format
+  // "^node".
+  repeated string input = 3;
+
+  // A (possibly partial) specification for the device on which this
+  // node should be placed.
+  // The expected syntax for this string is as follows:
+  //
+  // DEVICE_SPEC ::= PARTIAL_SPEC
+  //
+  // PARTIAL_SPEC ::= ("/" CONSTRAINT) *
+  // CONSTRAINT ::= ("job:" JOB_NAME)
+  //              | ("replica:" [1-9][0-9]*)
+  //              | ("task:" [1-9][0-9]*)
+  //              | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") )
+  //
+  // Valid values for this string include:
+  // * "/job:worker/replica:0/task:1/device:GPU:3"  (full specification)
+  // * "/job:worker/device:GPU:3"                   (partial specification)
+  // * ""                                    (no specification)
+  //
+  // If the constraints do not resolve to a single device (or if this
+  // field is empty or not present), the runtime will attempt to
+  // choose a device automatically.
+  string device = 4;
+
+  // Operation-specific graph-construction-time configuration.
+  // Note that this should include all attrs defined in the
+  // corresponding OpDef, including those with a value matching
+  // the default -- this allows the default to change and makes
+  // NodeDefs easier to interpret on their own.  However, if
+  // an attr with a default is not specified in this list, the
+  // default will be used.
+  // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and
+  // one of the names from the corresponding OpDef's attr field).
+  // The values must have a type matching the corresponding OpDef
+  // attr's type field.
+  // TODO(josh11b): Add some examples here showing best practices.
+  map<string, AttrValue> attr = 5;
+};
--- a/src/tf/op_def.proto
+++ b/src/tf/op_def.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "OpDefProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "attr_value.proto";
+import "types.proto";
+
+// Defines an operation. A NodeDef in a GraphDef specifies an Op by
+// using the "op" field which should match the name of a OpDef.
+// LINT.IfChange
+message OpDef {
+  // Op names starting with an underscore are reserved for internal use.
+  // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*".
+  string name = 1;
+
+  // For describing inputs and outputs.
+  message ArgDef {
+    // Name for the input/output.  Should match the regexp "[a-z][a-z0-9_]*".
+    string name = 1;
+
+    // Human readable description.
+    string description = 2;
+
+    // Describes the type of one or more tensors that are accepted/produced
+    // by this input/output arg.  The only legal combinations are:
+    // * For a single tensor: either the "type" field is set or the
+    //   "type_attr" field is set to the name of an attr with type "type".
+    // * For a sequence of tensors with the same type: the "number_attr"
+    //   field will be set to the name of an attr with type "int", and
+    //   either the "type" or "type_attr" field will be set as for
+    //   single tensors.
+    // * For a sequence of tensors, the "type_list_attr" field will be set
+    //   to the name of an attr with type "list(type)".
+    DataType type = 3;
+    string type_attr = 4;    // if specified, attr must have type "type"
+    string number_attr = 5;  // if specified, attr must have type "int"
+    // If specified, attr must have type "list(type)", and none of
+    // type, type_attr, and number_attr may be specified.
+    string type_list_attr = 6;
+
+    // For inputs: if true, the inputs are required to be refs.
+    //   By default, inputs can be either refs or non-refs.
+    // For outputs: if true, outputs are refs, otherwise they are not.
+    bool is_ref = 16;
+  };
+
+  // Description of the input(s).
+  repeated ArgDef input_arg = 2;
+
+  // Description of the output(s).
+  repeated ArgDef output_arg = 3;
+
+  // Description of the graph-construction-time configuration of this
+  // Op.  That is to say, this describes the attr fields that will
+  // be specified in the NodeDef.
+  message AttrDef {
+    // A descriptive name for the argument.  May be used, e.g. by the
+    // Python client, as a keyword argument name, and so should match
+    // the regexp "[a-z][a-z0-9_]+".
+    string name = 1;
+
+    // One of the type names from attr_value.proto ("string", "list(string)",
+    // "int", etc.).
+    string type = 2;
+
+    // A reasonable default for this attribute if the user does not supply
+    // a value.  If not specified, the user must supply a value.
+    AttrValue default_value = 3;
+
+    // Human-readable description.
+    string description = 4;
+
+    // TODO(josh11b): bool is_optional?
+
+    // --- Constraints ---
+    // These constraints are only in effect if specified.  Default is no
+    // constraints.
+
+    // For type == "int", this is a minimum value.  For "list(___)"
+    // types, this is the minimum length.
+    bool has_minimum = 5;
+    int64 minimum = 6;
+
+    // The set of allowed values.  Has type that is the "list" version
+    // of the "type" field above (uses the "list" field of AttrValue).
+    // If type == "type" or "list(type)" above, then the "type" field
+    // of "allowed_values.list" has the set of allowed DataTypes.
+    // If type == "string" or "list(string)", then the "s" field of
+    // "allowed_values.list" has the set of allowed strings.
+    AttrValue allowed_values = 7;
+  }
+  repeated AttrDef attr = 4;
+
+  // Optional deprecation based on GraphDef versions.
+  OpDeprecation deprecation = 8;
+
+  // One-line human-readable description of what the Op does.
+  string summary = 5;
+
+  // Additional, longer human-readable description of what the Op does.
+  string description = 6;
+
+  // -------------------------------------------------------------------------
+  // Which optimizations this operation can participate in.
+
+  // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
+  bool is_commutative = 18;
+
+  // If is_aggregate is true, then this operation accepts N >= 2
+  // inputs and produces 1 output all of the same type.  Should be
+  // associative and commutative, and produce output with the same
+  // shape as the input.  The optimizer may replace an aggregate op
+  // taking input from multiple devices with a tree of aggregate ops
+  // that aggregate locally within each device (and possibly within
+  // groups of nearby devices) before communicating.
+  // TODO(josh11b): Implement that optimization.
+  bool is_aggregate = 16;  // for things like add
+
+  // Other optimizations go here, like
+  //   can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.
+
+  // -------------------------------------------------------------------------
+  // Optimization constraints.
+
+  // Ops are marked as stateful if their behavior depends on some state beyond
+  // their input tensors (e.g. variable reading op) or if they have
+  // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
+  // must always produce the same output for the same input and have
+  // no side-effects.
+  //
+  // By default Ops may be moved between devices.  Stateful ops should
+  // either not be moved, or should only be moved if that state can also
+  // be moved (e.g. via some sort of save / restore).
+  // Stateful ops are guaranteed to never be optimized away by Common
+  // Subexpression Elimination (CSE).
+  bool is_stateful = 17;  // for things like variables, queue
+
+  // -------------------------------------------------------------------------
+  // Non-standard options.
+
+  // By default, all inputs to an Op must be initialized Tensors.  Ops
+  // that may initialize tensors for the first time should set this
+  // field to true, to allow the Op to take an uninitialized Tensor as
+  // input.
+  bool allows_uninitialized_input = 19;  // for Assign, etc.
+};
+// LINT.ThenChange(
+//     https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc)
+
+// Information about version-dependent deprecation of an op
+message OpDeprecation {
+  // First GraphDef version at which the op is disallowed.
+  int32 version = 1;
+
+  // Explanation of why it was deprecated and what to use instead.
+  string explanation = 2;
+};
+
+// A collection of OpDefs
+message OpList {
+  repeated OpDef op = 1;
+};
--- a/src/tf/perf_tf.cpp
+++ b/src/tf/perf_tf.cpp
+#include <migraphx/tf.hpp>
+
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify.hpp>
+
+migraphx::program::parameter_map create_param_map(const migraphx::program& p, bool gpu = true)
+{
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        if(gpu)
+            m[x.first] = migraphx::gpu::to_gpu(migraphx::generate_argument(x.second));
+        else
+            m[x.first] = migraphx::generate_argument(x.second);
+    }
+    return m;
+}
+
+int main(int argc, char const* argv[])
+{
+    if(argc > 1)
+    {
+        bool is_nhwc = true;
+        if(argc > 2)
+        {
+            if(strcmp(argv[2], "nchw") == 0)
+                is_nhwc = false;
+        }
+        std::string file = argv[1];
+        std::size_t n    = argc > 3 ? std::stoul(argv[3]) : 50;
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << "Compiling ... " << std::endl;
+        p.compile(migraphx::gpu::target{});
+        std::cout << "Allocating params ... " << std::endl;
+        auto m = create_param_map(p);
+        std::cout << "Running performance report ... " << std::endl;
+        p.perf_report(std::cout, n, m);
+    }
+}
--- a/src/tf/read_tf.cpp
+++ b/src/tf/read_tf.cpp
+#include <migraphx/tf.hpp>
+
+int main(int argc, char const* argv[])
+{
+    if(argc > 1)
+    {
+        bool is_nhwc = true;
+        if(argc > 2)
+        {
+            if(strcmp(argv[2], "nchw") == 0)
+                is_nhwc = false;
+        }
+        std::string file = argv[1];
+        auto prog        = migraphx::parse_tf(file, is_nhwc);
+        std::cout << prog << std::endl;
+    }
+}
--- a/src/tf/resource_handle.proto
+++ b/src/tf/resource_handle.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "ResourceHandle";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// Protocol buffer representing a handle to a tensorflow resource. Handles are
+// not valid across executions, but can be serialized back and forth from within
+// a single run.
+message ResourceHandleProto {
+  // Unique name for the device containing the resource.
+  string device = 1;
+
+  // Container in which this resource is placed.
+  string container = 2;
+
+  // Unique name of this resource.
+  string name = 3;
+
+  // Hash code for the type of the resource. Is only valid in the same device
+  // and in the same execution.
+  uint64 hash_code = 4;
+
+  // For debug-only, the name of the type pointed to by this handle, if
+  // available.
+  string maybe_type_name = 5;
+};