Merge branch 'develop' into scheduler

213930d8 · Paul · 20d11e64 · 3499ec7d · 213930d8 · 213930d8
Commit 213930d8 authored Mar 07, 2019 by Paul
20 changed files
--- a/src/tf/op_def.proto
+++ b/src/tf/op_def.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "OpDefProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "attr_value.proto";
+import "types.proto";
+
+// Defines an operation. A NodeDef in a GraphDef specifies an Op by
+// using the "op" field which should match the name of a OpDef.
+// LINT.IfChange
+message OpDef {
+  // Op names starting with an underscore are reserved for internal use.
+  // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*".
+  string name = 1;
+
+  // For describing inputs and outputs.
+  message ArgDef {
+    // Name for the input/output.  Should match the regexp "[a-z][a-z0-9_]*".
+    string name = 1;
+
+    // Human readable description.
+    string description = 2;
+
+    // Describes the type of one or more tensors that are accepted/produced
+    // by this input/output arg.  The only legal combinations are:
+    // * For a single tensor: either the "type" field is set or the
+    //   "type_attr" field is set to the name of an attr with type "type".
+    // * For a sequence of tensors with the same type: the "number_attr"
+    //   field will be set to the name of an attr with type "int", and
+    //   either the "type" or "type_attr" field will be set as for
+    //   single tensors.
+    // * For a sequence of tensors, the "type_list_attr" field will be set
+    //   to the name of an attr with type "list(type)".
+    DataType type = 3;
+    string type_attr = 4;    // if specified, attr must have type "type"
+    string number_attr = 5;  // if specified, attr must have type "int"
+    // If specified, attr must have type "list(type)", and none of
+    // type, type_attr, and number_attr may be specified.
+    string type_list_attr = 6;
+
+    // For inputs: if true, the inputs are required to be refs.
+    //   By default, inputs can be either refs or non-refs.
+    // For outputs: if true, outputs are refs, otherwise they are not.
+    bool is_ref = 16;
+  };
+
+  // Description of the input(s).
+  repeated ArgDef input_arg = 2;
+
+  // Description of the output(s).
+  repeated ArgDef output_arg = 3;
+
+  // Description of the graph-construction-time configuration of this
+  // Op.  That is to say, this describes the attr fields that will
+  // be specified in the NodeDef.
+  message AttrDef {
+    // A descriptive name for the argument.  May be used, e.g. by the
+    // Python client, as a keyword argument name, and so should match
+    // the regexp "[a-z][a-z0-9_]+".
+    string name = 1;
+
+    // One of the type names from attr_value.proto ("string", "list(string)",
+    // "int", etc.).
+    string type = 2;
+
+    // A reasonable default for this attribute if the user does not supply
+    // a value.  If not specified, the user must supply a value.
+    AttrValue default_value = 3;
+
+    // Human-readable description.
+    string description = 4;
+
+    // TODO(josh11b): bool is_optional?
+
+    // --- Constraints ---
+    // These constraints are only in effect if specified.  Default is no
+    // constraints.
+
+    // For type == "int", this is a minimum value.  For "list(___)"
+    // types, this is the minimum length.
+    bool has_minimum = 5;
+    int64 minimum = 6;
+
+    // The set of allowed values.  Has type that is the "list" version
+    // of the "type" field above (uses the "list" field of AttrValue).
+    // If type == "type" or "list(type)" above, then the "type" field
+    // of "allowed_values.list" has the set of allowed DataTypes.
+    // If type == "string" or "list(string)", then the "s" field of
+    // "allowed_values.list" has the set of allowed strings.
+    AttrValue allowed_values = 7;
+  }
+  repeated AttrDef attr = 4;
+
+  // Optional deprecation based on GraphDef versions.
+  OpDeprecation deprecation = 8;
+
+  // One-line human-readable description of what the Op does.
+  string summary = 5;
+
+  // Additional, longer human-readable description of what the Op does.
+  string description = 6;
+
+  // -------------------------------------------------------------------------
+  // Which optimizations this operation can participate in.
+
+  // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
+  bool is_commutative = 18;
+
+  // If is_aggregate is true, then this operation accepts N >= 2
+  // inputs and produces 1 output all of the same type.  Should be
+  // associative and commutative, and produce output with the same
+  // shape as the input.  The optimizer may replace an aggregate op
+  // taking input from multiple devices with a tree of aggregate ops
+  // that aggregate locally within each device (and possibly within
+  // groups of nearby devices) before communicating.
+  // TODO(josh11b): Implement that optimization.
+  bool is_aggregate = 16;  // for things like add
+
+  // Other optimizations go here, like
+  //   can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.
+
+  // -------------------------------------------------------------------------
+  // Optimization constraints.
+
+  // Ops are marked as stateful if their behavior depends on some state beyond
+  // their input tensors (e.g. variable reading op) or if they have
+  // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
+  // must always produce the same output for the same input and have
+  // no side-effects.
+  //
+  // By default Ops may be moved between devices.  Stateful ops should
+  // either not be moved, or should only be moved if that state can also
+  // be moved (e.g. via some sort of save / restore).
+  // Stateful ops are guaranteed to never be optimized away by Common
+  // Subexpression Elimination (CSE).
+  bool is_stateful = 17;  // for things like variables, queue
+
+  // -------------------------------------------------------------------------
+  // Non-standard options.
+
+  // By default, all inputs to an Op must be initialized Tensors.  Ops
+  // that may initialize tensors for the first time should set this
+  // field to true, to allow the Op to take an uninitialized Tensor as
+  // input.
+  bool allows_uninitialized_input = 19;  // for Assign, etc.
+};
+// LINT.ThenChange(
+//     https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc)
+
+// Information about version-dependent deprecation of an op
+message OpDeprecation {
+  // First GraphDef version at which the op is disallowed.
+  int32 version = 1;
+
+  // Explanation of why it was deprecated and what to use instead.
+  string explanation = 2;
+};
+
+// A collection of OpDefs
+message OpList {
+  repeated OpDef op = 1;
+};
--- a/src/tf/perf_tf.cpp
+++ b/src/tf/perf_tf.cpp
+#include <migraphx/tf.hpp>
+
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify.hpp>
+
+migraphx::program::parameter_map create_param_map(const migraphx::program& p, bool gpu = true)
+{
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        if(gpu)
+            m[x.first] = migraphx::gpu::to_gpu(migraphx::generate_argument(x.second));
+        else
+            m[x.first] = migraphx::generate_argument(x.second);
+    }
+    return m;
+}
+
+int main(int argc, char const* argv[])
+{
+    if(argc > 1)
+    {
+        bool is_nhwc = true;
+        if(argc > 2)
+        {
+            if(strcmp(argv[2], "nchw") == 0)
+                is_nhwc = false;
+        }
+        std::string file = argv[1];
+        std::size_t n    = argc > 3 ? std::stoul(argv[3]) : 50;
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << "Compiling ... " << std::endl;
+        p.compile(migraphx::gpu::target{});
+        std::cout << "Allocating params ... " << std::endl;
+        auto m = create_param_map(p);
+        std::cout << "Running performance report ... " << std::endl;
+        p.perf_report(std::cout, n, m);
+    }
+}
--- a/src/tf/read_tf.cpp
+++ b/src/tf/read_tf.cpp
+#include <migraphx/tf.hpp>
+
+int main(int argc, char const* argv[])
+{
+    if(argc > 1)
+    {
+        bool is_nhwc = true;
+        if(argc > 2)
+        {
+            if(strcmp(argv[2], "nchw") == 0)
+                is_nhwc = false;
+        }
+        std::string file = argv[1];
+        auto prog        = migraphx::parse_tf(file, is_nhwc);
+        std::cout << prog << std::endl;
+    }
+}
--- a/src/tf/resource_handle.proto
+++ b/src/tf/resource_handle.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "ResourceHandle";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// Protocol buffer representing a handle to a tensorflow resource. Handles are
+// not valid across executions, but can be serialized back and forth from within
+// a single run.
+message ResourceHandleProto {
+  // Unique name for the device containing the resource.
+  string device = 1;
+
+  // Container in which this resource is placed.
+  string container = 2;
+
+  // Unique name of this resource.
+  string name = 3;
+
+  // Hash code for the type of the resource. Is only valid in the same device
+  // and in the same execution.
+  uint64 hash_code = 4;
+
+  // For debug-only, the name of the type pointed to by this handle, if
+  // available.
+  string maybe_type_name = 5;
+};
--- a/src/tf/tensor.proto
+++ b/src/tf/tensor.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "TensorProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+import "resource_handle.proto";
+import "tensor_shape.proto";
+import "types.proto";
+
+// Protocol buffer representing a tensor.
+message TensorProto {
+  DataType dtype = 1;
+
+  // Shape of the tensor.  TODO(touts): sort out the 0-rank issues.
+  TensorShapeProto tensor_shape = 2;
+
+  // Only one of the representations below is set, one of "tensor_contents" and
+  // the "xxx_val" attributes.  We are not using oneof because as oneofs cannot
+  // contain repeated fields it would require another extra set of messages.
+
+  // Version number.
+  //
+  // In version 0, if the "repeated xxx" representations contain only one
+  // element, that element is repeated to fill the shape.  This makes it easy
+  // to represent a constant Tensor with a single value.
+  int32 version_number = 3;
+
+  // Serialized raw tensor content from either Tensor::AsProtoTensorContent or
+  // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
+  // can be used for all tensor types. The purpose of this representation is to
+  // reduce serialization overhead during RPC call by avoiding serialization of
+  // many repeated small items.
+  bytes tensor_content = 4;
+
+  // Type specific representations that make it easy to create tensor protos in
+  // all languages.  Only the representation corresponding to "dtype" can
+  // be set.  The values hold the flattened representation of the tensor in
+  // row major order.
+
+  // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
+  // have some pointless zero padding for each value here.
+  repeated int32 half_val = 13 [packed = true];
+
+  // DT_FLOAT.
+  repeated float float_val = 5 [packed = true];
+
+  // DT_DOUBLE.
+  repeated double double_val = 6 [packed = true];
+
+  // DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
+  repeated int32 int_val = 7 [packed = true];
+
+  // DT_STRING
+  repeated bytes string_val = 8;
+
+  // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
+  // and imaginary parts of i-th single precision complex.
+  repeated float scomplex_val = 9 [packed = true];
+
+  // DT_INT64
+  repeated int64 int64_val = 10 [packed = true];
+
+  // DT_BOOL
+  repeated bool bool_val = 11 [packed = true];
+
+  // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
+  // and imaginary parts of i-th double precision complex.
+  repeated double dcomplex_val = 12 [packed = true];
+
+  // DT_RESOURCE
+  repeated ResourceHandleProto resource_handle_val = 14;
+
+  // DT_VARIANT
+  repeated VariantTensorDataProto variant_val = 15;
+
+  // DT_UINT32
+  repeated uint32 uint32_val = 16 [packed = true];
+
+  // DT_UINT64
+  repeated uint64 uint64_val = 17 [packed = true];
+};
+
+// Protocol buffer representing the serialization format of DT_VARIANT tensors.
+message VariantTensorDataProto {
+  // Name of the type of objects being serialized.
+  string type_name = 1;
+  // Portions of the object that are not Tensors.
+  bytes metadata = 2;
+  // Tensors contained within objects being serialized.
+  repeated TensorProto tensors = 3;
+}
--- a/src/tf/tensor_shape.proto
+++ b/src/tf/tensor_shape.proto
+// Protocol buffer representing the shape of tensors.
+
+syntax = "proto3";
+option cc_enable_arenas = true;
+option java_outer_classname = "TensorShapeProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+package tensorflow;
+
+// Dimensions of a tensor.
+message TensorShapeProto {
+  // One dimension of the tensor.
+  message Dim {
+    // Size of the tensor in that dimension.
+    // This value must be >= -1, but values of -1 are reserved for "unknown"
+    // shapes (values of -1 mean "unknown" dimension).  Certain wrappers
+    // that work with TensorShapeProto may fail at runtime when deserializing
+    // a TensorShapeProto containing a dim value of -1.
+    int64 size = 1;
+
+    // Optional name of the tensor dimension.
+    string name = 2;
+  };
+
+  // Dimensions of the tensor, such as {"input", 30}, {"output", 40}
+  // for a 30 x 40 2D tensor.  If an entry has size -1, this
+  // corresponds to a dimension of unknown size. The names are
+  // optional.
+  //
+  // The order of entries in "dim" matters: It indicates the layout of the
+  // values in the tensor in-memory representation.
+  //
+  // The first entry in "dim" is the outermost dimension used to layout the
+  // values, the last entry is the innermost dimension.  This matches the
+  // in-memory layout of RowMajor Eigen tensors.
+  //
+  // If "dim.size()" > 0, "unknown_rank" must be false.
+  repeated Dim dim = 2;
+
+  // If true, the number of dimensions in the shape is unknown.
+  //
+  // If true, "dim.size()" must be 0.
+  bool unknown_rank = 3;
+};
--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
--- a/src/tf/types.proto
+++ b/src/tf/types.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "TypesProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// LINT.IfChange
+enum DataType {
+  // Not a legal value for DataType.  Used to indicate a DataType field
+  // has not been set.
+  DT_INVALID = 0;
+
+  // Data types that all computation devices are expected to be
+  // capable to support.
+  DT_FLOAT = 1;
+  DT_DOUBLE = 2;
+  DT_INT32 = 3;
+  DT_UINT8 = 4;
+  DT_INT16 = 5;
+  DT_INT8 = 6;
+  DT_STRING = 7;
+  DT_COMPLEX64 = 8;  // Single-precision complex
+  DT_INT64 = 9;
+  DT_BOOL = 10;
+  DT_QINT8 = 11;     // Quantized int8
+  DT_QUINT8 = 12;    // Quantized uint8
+  DT_QINT32 = 13;    // Quantized int32
+  DT_BFLOAT16 = 14;  // Float32 truncated to 16 bits.  Only for cast ops.
+  DT_QINT16 = 15;    // Quantized int16
+  DT_QUINT16 = 16;   // Quantized uint16
+  DT_UINT16 = 17;
+  DT_COMPLEX128 = 18;  // Double-precision complex
+  DT_HALF = 19;
+  DT_RESOURCE = 20;
+  DT_VARIANT = 21;  // Arbitrary C++ data types
+  DT_UINT32 = 22;
+  DT_UINT64 = 23;
+
+  // Do not use!  These are only for parameters.  Every enum above
+  // should have a corresponding value below (verified by types_test).
+  DT_FLOAT_REF = 101;
+  DT_DOUBLE_REF = 102;
+  DT_INT32_REF = 103;
+  DT_UINT8_REF = 104;
+  DT_INT16_REF = 105;
+  DT_INT8_REF = 106;
+  DT_STRING_REF = 107;
+  DT_COMPLEX64_REF = 108;
+  DT_INT64_REF = 109;
+  DT_BOOL_REF = 110;
+  DT_QINT8_REF = 111;
+  DT_QUINT8_REF = 112;
+  DT_QINT32_REF = 113;
+  DT_BFLOAT16_REF = 114;
+  DT_QINT16_REF = 115;
+  DT_QUINT16_REF = 116;
+  DT_UINT16_REF = 117;
+  DT_COMPLEX128_REF = 118;
+  DT_HALF_REF = 119;
+  DT_RESOURCE_REF = 120;
+  DT_VARIANT_REF = 121;
+  DT_UINT32_REF = 122;
+  DT_UINT64_REF = 123;
+}
+// LINT.ThenChange(
+//    https://www.tensorflow.org/code/tensorflow/c/c_api.h,
+//    https://www.tensorflow.org/code/tensorflow/go/tensor.go,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/types.h,
+//    https://www.tensorflow.org/code/tensorflow/core/framework/types.cc,
+//    https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py,
+//    https://www.tensorflow.org/code/tensorflow/python/framework/function.py)
--- a/src/tf/verify_tf.cpp
+++ b/src/tf/verify_tf.cpp
+#include <migraphx/tf.hpp>
+
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify_args.hpp>
+#include <migraphx/instruction.hpp>
+
+template <class T>
+auto get_hash(const T& x)
+{
+    return std::hash<T>{}(x);
+}
+
+template <class F>
+migraphx::argument run_cpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::cpu::target{});
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] = migraphx::generate_argument(x.second, get_hash(x.first));
+    }
+    auto out = p.eval(m);
+    std::cout << p << std::endl;
+    return out;
+}
+
+template <class F>
+migraphx::argument run_gpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::gpu::target{});
+
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] =
+            migraphx::gpu::to_gpu(migraphx::generate_argument(x.second, get_hash(x.first)));
+    }
+    auto out = migraphx::gpu::from_gpu(p.eval(m));
+    std::cout << p << std::endl;
+    return migraphx::gpu::from_gpu(out);
+}
+
+template <class F>
+void verify_program(const std::string& name, F f, double tolerance = 100)
+{
+    auto x = run_cpu(f);
+    auto y = run_gpu(f);
+    migraphx::verify_args(name, x, y, tolerance);
+    // std::cout << "cpu: " << x << std::endl;
+    // std::cout << "gpu: " << y << std::endl;
+}
+
+void verify_instructions(const migraphx::program& prog, double tolerance = 80)
+{
+    for(auto&& ins : prog)
+    {
+        if(ins.name().front() == '@')
+            continue;
+        if(ins.name() == "broadcast")
+            continue;
+        if(ins.name() == "transpose")
+            continue;
+        if(ins.name() == "reshape")
+            continue;
+        auto create_program = [&] {
+            migraphx::program p;
+            std::vector<migraphx::instruction_ref> inputs;
+            for(auto&& arg : ins.inputs())
+            {
+                if(arg->name() == "@literal")
+                    inputs.push_back(p.add_literal(arg->get_literal()));
+                else
+                    inputs.push_back(
+                        p.add_parameter(std::to_string(inputs.size()), arg->get_shape()));
+            }
+            p.add_instruction(ins.get_operator(), inputs);
+            return p;
+        };
+        try
+        {
+            std::cout << "Verify: " << ins.name() << std::endl;
+            std::cout << create_program() << std::endl;
+            verify_program(ins.name(), create_program, tolerance);
+        }
+        catch(...)
+        {
+            std::cout << "Instruction " << ins.name() << " threw an exception." << std::endl;
+            throw;
+        }
+    }
+}
+
+template <class F>
+void verify_reduced(F f, int n, double tolerance = 80)
+{
+
+    auto create_program = [&] {
+        migraphx::program p = f();
+        auto last           = std::prev(p.end(), n + 1);
+        p.remove_instructions(last, p.end());
+        return p;
+    };
+    std::cout << "Verify: " << std::endl;
+    std::cout << create_program() << std::endl;
+    verify_program(std::to_string(n), create_program, tolerance);
+}
+
+template <class F>
+void verify_reduced_program(F f, double tolerance = 80)
+{
+    migraphx::program p = f();
+    auto n              = std::distance(p.begin(), p.end());
+    for(std::size_t i = 0; i < n; i++)
+    {
+        verify_reduced(f, i, tolerance);
+    }
+}
+
+int main(int argc, char const* argv[])
+{
+    std::vector<std::string> args(argv + 1, argv + argc);
+    if(not args.empty())
+    {
+        bool is_nhwc = true;
+
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "nchw"; }))
+        {
+            is_nhwc = false;
+        }
+
+        std::string file = args.front();
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << p << std::endl;
+
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-i"; }))
+        {
+            verify_instructions(p);
+        }
+        else if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-r"; }))
+        {
+            verify_reduced_program([&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+        else
+        {
+            verify_program(file, [&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+    }
+}
--- a/src/tf/versions.proto
+++ b/src/tf/versions.proto
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "VersionsProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
+
+// Version information for a piece of serialized data
+//
+// There are different types of versions for each type of data
+// (GraphDef, etc.), but they all have the same common shape
+// described here.
+//
+// Each consumer has "consumer" and "min_producer" versions (specified
+// elsewhere).  A consumer is allowed to consume this data if
+//
+//   producer >= min_producer
+//   consumer >= min_consumer
+//   consumer not in bad_consumers
+//
+message VersionDef {
+  // The version of the code that produced this data.
+  int32 producer = 1;
+
+  // Any consumer below this version is not allowed to consume this data.
+  int32 min_consumer = 2;
+
+  // Specific consumer versions which are disallowed (e.g. due to bugs).
+  repeated int32 bad_consumers = 3;
+};
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -126,6 +126,15 @@ foreach(ONNX_TEST ${ONNX_TESTS})
    add_dependencies(check ${TEST_NAME})
 endforeach()

+# tf test
+add_executable(test_tf tf/tf_test.cpp)
+rocm_clang_tidy_check(test_tf)
+target_link_libraries(test_tf migraphx_tf)
+target_include_directories(test_tf PUBLIC include)
+add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tf) 
+add_dependencies(tests test_tf)
+add_dependencies(check test_tf)
+
 if(MIGRAPHX_ENABLE_PYTHON)
 add_subdirectory(py)
 endif()

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -766,6 +766,18 @@ struct test_gemm : verify_program<test_gemm>
    }
 };

+struct test_gemm_ex : verify_program<test_gemm_ex>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto a = p.add_parameter("a", migraphx::shape{migraphx::shape::float_type, {1, 1, 4, 5}});
+        auto b = p.add_parameter("b", migraphx::shape{migraphx::shape::float_type, {1, 1, 5, 3}});
+        p.add_instruction(migraphx::op::dot{}, a, b);
+        return p;
+    }
+};
+
 struct test_gemm_half : verify_program<test_gemm_half>
 {
    migraphx::program create_program() const
@@ -805,6 +817,19 @@ struct test_gemm_transposeb : verify_program<test_gemm_transposeb>
    }
 };

+struct test_gemm_transposeb_ex : verify_program<test_gemm_transposeb_ex>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto a  = p.add_parameter("a", migraphx::shape{migraphx::shape::float_type, {1, 4, 5}});
+        auto b  = p.add_parameter("b", migraphx::shape{migraphx::shape::float_type, {1, 3, 5}});
+        auto bt = p.add_instruction(migraphx::op::transpose{{0, 2, 1}}, b);
+        p.add_instruction(migraphx::op::dot{}, a, bt);
+        return p;
+    }
+};
+
 struct test_gemm_transposea : verify_program<test_gemm_transposea>
 {
    migraphx::program create_program() const
@@ -818,6 +843,19 @@ struct test_gemm_transposea : verify_program<test_gemm_transposea>
    }
 };

+struct test_gemm_transposea_ex : verify_program<test_gemm_transposea_ex>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto a  = p.add_parameter("a", migraphx::shape{migraphx::shape::float_type, {1, 1, 5, 4}});
+        auto b  = p.add_parameter("b", migraphx::shape{migraphx::shape::float_type, {1, 1, 5, 3}});
+        auto at = p.add_instruction(migraphx::op::transpose{{0, 1, 3, 2}}, a);
+        p.add_instruction(migraphx::op::dot{}, at, b);
+        return p;
+    }
+};
+
 struct test_gemm_transposeab : verify_program<test_gemm_transposeab>
 {
    migraphx::program create_program() const
@@ -832,6 +870,38 @@ struct test_gemm_transposeab : verify_program<test_gemm_transposeab>
    }
 };

+struct gemm_mutli_dim_2
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape m1_shape{migraphx::shape::float_type, {2, 2, 3}};
+        migraphx::shape m2_shape{migraphx::shape::float_type, {2, 3, 4}};
+        auto l1 = p.add_parameter("1", m1_shape);
+        auto l2 = p.add_parameter("2", m2_shape);
+
+        p.add_instruction(migraphx::op::dot{}, l1, l2);
+
+        return p;
+    }
+};
+
+struct gemm_mutli_dim_2_3
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape m1_shape{migraphx::shape::float_type, {2, 3, 2, 3}};
+        migraphx::shape m2_shape{migraphx::shape::float_type, {2, 3, 3, 2}};
+        auto l1 = p.add_parameter("1", m1_shape);
+        auto l2 = p.add_parameter("2", m2_shape);
+
+        p.add_instruction(migraphx::op::dot{}, l1, l2);
+
+        return p;
+    }
+};
+
 struct test_contiguous : verify_program<test_contiguous>
 {
    migraphx::program create_program() const
@@ -2907,4 +2977,41 @@ struct test_lstm_bidirct_default_actv2 : verify_program<test_lstm_bidirct_defaul
    }
 };

+template <int Axis>
+struct test_logsoftmax : verify_program<test_logsoftmax<Axis>>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
+
+        return p;
+    }
+};
+
+template struct test_logsoftmax<0>;
+template struct test_logsoftmax<1>;
+template struct test_logsoftmax<2>;
+template struct test_logsoftmax<3>;
+template struct test_logsoftmax<4>;
+
+template <int Axis>
+struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
+
+        return p;
+    }
+};
+
+template struct test_logsoftmax_1<0>;
+template struct test_logsoftmax_1<1>;
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/onnx/constant_scalar.onnx
+++ b/test/onnx/constant_scalar.onnx
-shape-gather-example:O
-2value"Constant*
-value**Bconst_tensor constantb
-z
+constant-scalar-example:R
+00"Constant*!
+value**Bconst_tensor 
test-constantb
+0


 B
\ No newline at end of file
--- a/test/onnx/gemm_test_ex.onnx
+++ b/test/onnx/gemm_test_ex.onnx
--- a/test/onnx/logsoftmax_test.onnx
+++ b/test/onnx/logsoftmax_test.onnx
+logsoftmax-example:l
+
+xy"
+LogSoftmax*
+axistest_logsoftmaxZ
+x
+
+
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
@@ -470,8 +470,8 @@ TEST_CASE(flatten_test)
 {
    migraphx::program p;
    auto l0 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}});
-    p.add_instruction(migraphx::op::flatten{1}, l0);
    p.add_instruction(migraphx::op::flatten{2}, l0);
+    p.add_instruction(migraphx::op::flatten{1}, l0);
    auto prog = migraphx::parse_onnx("flatten_test.onnx");

    EXPECT(p == prog);
@@ -524,7 +524,7 @@ TEST_CASE(constant_test)
 TEST_CASE(constant_test_scalar)
 {
    migraphx::program p;
-    p.add_literal(migraphx::literal{migraphx::shape{migraphx::shape::int32_type}, {1}});
+    p.add_literal(migraphx::literal{migraphx::shape{migraphx::shape::int32_type, {1}}, {1}});
    auto prog = migraphx::parse_onnx("constant_scalar.onnx");

    EXPECT(p == prog);
@@ -572,6 +572,27 @@ TEST_CASE(gemm_test)
    EXPECT(p == prog);
 }

+TEST_CASE(gemm_ex)
+{
+    migraphx::program p;
+    auto l0     = p.add_parameter("1", migraphx::shape{migraphx::shape::float_type, {1, 1, 5, 6}});
+    auto l1     = p.add_parameter("2", migraphx::shape{migraphx::shape::float_type, {1, 1, 5, 7}});
+    auto l2     = p.add_parameter("3", migraphx::shape{migraphx::shape::float_type, {1, 1, 6, 7}});
+    auto t0     = p.add_instruction(migraphx::op::transpose{{0, 1, 3, 2}}, l0);
+    auto alpha  = 0.5f;
+    auto res_ab = p.add_instruction(migraphx::op::dot{alpha}, t0, l1);
+
+    auto beta       = 0.8f;
+    auto l_beta     = p.add_literal(beta);
+    auto brcst_beta = p.add_instruction(migraphx::op::scalar{l2->get_shape()}, l_beta);
+    auto res_c      = p.add_instruction(migraphx::op::mul{}, l2, brcst_beta);
+    p.add_instruction(migraphx::op::add{}, res_ab, res_c);
+
+    auto prog = migraphx::parse_onnx("gemm_test_ex.onnx");
+
+    EXPECT(p == prog);
+}
+
 TEST_CASE(add_scalar_test)
 {
    migraphx::program p;
@@ -651,4 +672,15 @@ TEST_CASE(add_fp16_test)
    EXPECT(p == prog);
 }

+TEST_CASE(logsoftmax)
+{
+    migraphx::program p;
+    auto l0  = p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {3, 4, 5, 6}});
+    int axis = 1;
+    p.add_instruction(migraphx::op::logsoftmax{axis}, l0);
+    auto prog = migraphx::parse_onnx("logsoftmax_test.onnx");
+
+    EXPECT(p == prog);
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/onnx/sum_test.onnx
+++ b/test/onnx/sum_test.onnx
-sum-example:e
+sum-example:a

 0
 1
-23"Sumtest-dropoutZ
+23"Sumtest-sumZ
 0


@@ -15,7 +15,7 @@


 b
-2
+3


 B
\ No newline at end of file
--- a/test/onnx/unknown_test.onnx
+++ b/test/onnx/unknown_test.onnx
-unknown-example:
+unknown-example:

 0
 12"Unknown
-
-2"Unknowntest-unknownZ
+
+23"Unknowntest-unknownZ
 0


@@ -14,7 +14,7 @@


 b
-2
+3




--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -316,6 +316,135 @@ TEST_CASE(gather)
    }
 }

+TEST_CASE(logsoftmax)
+{
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 0;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 1;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 2;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 3;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 4;
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
+                     migraphx::op::logsoftmax{axis},
+                     input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = 5;
+        throws_shape(migraphx::op::logsoftmax{axis}, input);
+    }
+
+    {
+        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
+        int axis = -1;
+        throws_shape(migraphx::op::logsoftmax{axis}, input);
+    }
+}
+
+TEST_CASE(dot)
+{
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {5, 8}};
+        expect_shape(
+            migraphx::shape{migraphx::shape::float_type, {4, 8}}, migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {4, 6}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {5, 8}};
+        throws_shape(migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {1, 1}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {1, 1}};
+        expect_shape(
+            migraphx::shape{migraphx::shape::float_type, {1, 1}}, migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {1, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {1, 5, 7}};
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {1, 4, 7}},
+                     migraphx::op::dot{},
+                     s_m1,
+                     s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {2, 3, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {2, 3, 5, 7}};
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 7}},
+                     migraphx::op::dot{},
+                     s_m1,
+                     s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {1, 1, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {1, 1, 5, 7}};
+        expect_shape(migraphx::shape{migraphx::shape::float_type, {1, 1, 4, 7}},
+                     migraphx::op::dot{},
+                     s_m1,
+                     s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {3, 1, 4, 6}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {3, 1, 5, 7}};
+        throws_shape(migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {2, 2, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {3, 2, 5, 7}};
+        throws_shape(migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {1, 1, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {1, 2, 5, 7}};
+        throws_shape(migraphx::op::dot{}, s_m1, s_m2);
+    }
+
+    {
+        migraphx::shape s_m1{migraphx::shape::float_type, {1, 2, 4, 5}};
+        migraphx::shape s_m2{migraphx::shape::float_type, {2, 1, 5, 7}};
+        throws_shape(migraphx::op::dot{}, s_m1, s_m2);
+    }
+}
+
 TEST_CASE(rnn)
 {
    {