merge changes from the develop branch

c0154dca · Shucai Xiao · ca170b5c · b93f5320 · c0154dca · c0154dca
Commit c0154dca authored May 16, 2019 by Shucai Xiao
20 changed files
--- a/src/targets/gpu/include/migraphx/gpu/logsoftmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/logsoftmax.hpp
@@ -25,11 +25,21 @@ namespace gpu {
 struct hip_logsoftmax
 {
    op::logsoftmax op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::logsoftmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/lrn.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/lrn.hpp
@@ -13,11 +13,21 @@ struct context;
 struct miopen_lrn
 {
    shared<lrn_descriptor> ldesc;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ldesc.get(), f);
+    }
+
    std::string name() const { return "gpu::lrn"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -162,6 +162,38 @@ inline fused_operator_args make_fused_args()
    return make_obj<fused_operator_args>(&miopenCreateOperatorArgs);
 }

+template <class F>
+auto reflect(miopenActivationDescriptor_t ad, F f)
+{
+    assert(ad != nullptr);
+    miopenActivationMode_t mode = miopenActivationPASTHRU;
+    double alpha                = 0.0;
+    double beta                 = 0.0;
+    double gamma                = 0.0;
+    miopenGetActivationDescriptor(ad, &mode, &alpha, &beta, &gamma);
+    return pack(f(std::move(mode), "mode"),    // NOLINT
+                f(std::move(alpha), "alpha"),  // NOLINT
+                f(std::move(beta), "beta"),    // NOLINT
+                f(std::move(gamma), "gamma")); // NOLINT
+}
+
+template <class F>
+auto reflect(miopenLRNDescriptor_t lrnd, F f)
+{
+    assert(lrnd != nullptr);
+    miopenLRNMode_t mode = miopenLRNWithinChannel;
+    unsigned int n       = 0;
+    double alpha         = 0.0;
+    double beta          = 0.0;
+    double k             = 0.0;
+    miopenGetLRNDescriptor(lrnd, &mode, &n, &alpha, &beta, &k);
+    return pack(f(std::move(mode), "mode"),   // NOLINT
+                f(std::move(n), "n"),         // NOLINT
+                f(std::move(alpha), "alpha"), // NOLINT
+                f(std::move(beta), "beta"),   // NOLINT
+                f(std::move(k), "k"));        // NOLINT
+}
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
@@ -45,7 +45,15 @@ struct unary_device : oper<Derived>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        return inputs.at(0);
+        auto s = inputs.at(0);
+        if(s.packed())
+        {
+            return s;
+        }
+        else
+        {
+            return {s.type(), s.lens()};
+        }
    }

    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
@@ -54,7 +62,10 @@ struct unary_device : oper<Derived>
        return args[1];
    }

-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 template <class Derived, void (*F)(hipStream_t, const argument&, const argument&, const argument&)>
@@ -63,7 +74,16 @@ struct binary_device : oper<Derived>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(3);
-        return inputs.at(0);
+        auto s0 = inputs.at(0);
+        auto s1 = inputs.at(1);
+        if(s0 == s1 and s0.packed())
+        {
+            return s0;
+        }
+        else
+        {
+            return {s0.type(), s0.lens()};
+        }
    }

    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
@@ -72,7 +92,10 @@ struct binary_device : oper<Derived>
        return args[2];
    }

-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/pad.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pad.hpp
@@ -14,11 +14,20 @@ struct hip_pad
 {
    op::pad op;

+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::pad"; }
    shape compute_shape(std::vector<shape> inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/pooling.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pooling.hpp
@@ -16,11 +16,20 @@ struct miopen_pooling
    op::pooling op;
    shared<pooling_descriptor> pd;

+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::pooling"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/relu.hpp
@@ -13,11 +13,21 @@ struct context;
 struct miopen_relu
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::relu"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
@@ -13,11 +13,21 @@ struct context;
 struct miopen_sigmoid
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::sigmoid"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/softmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/softmax.hpp
@@ -13,11 +13,21 @@ struct context;
 struct miopen_softmax
 {
    op::softmax op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::softmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tanh.hpp
@@ -13,11 +13,21 @@ struct context;
 struct miopen_tanh
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::tanh"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -45,6 +45,7 @@
 #include <migraphx/gpu/pad.hpp>
 #include <migraphx/gpu/gather.hpp>
 #include <migraphx/gpu/lrn.hpp>
+#include <migraphx/gpu/clip.hpp>
 #include <utility>
 #include <functional>
 #include <algorithm>
@@ -101,6 +102,7 @@ struct miopen_apply
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");
+        add_extend_op<hip_clip, op::clip>("clip");

        add_lrn_op();
        add_convolution_op();

--- a/src/targets/gpu/tanh.cpp
+++ b/src/targets/gpu/tanh.cpp
@@ -7,8 +7,8 @@ namespace gpu {

 shape miopen_tanh::compute_shape(const std::vector<shape>& inputs) const
 {
-    check_shapes{inputs, *this}.has(2).not_broadcasted();
-    return inputs.at(1);
+    check_shapes{inputs, *this}.has(2).packed();
+    return inputs.at(0);
 }

 argument miopen_tanh::compute(context& ctx,

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -11,7 +11,7 @@
 #include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/simplify_algebra.hpp>
-#include <migraphx/constant_propagate.hpp>
+#include <migraphx/propagate_constant.hpp>
 #include <migraphx/eliminate_contiguous.hpp>
 #include <migraphx/common_subexpression_elimination.hpp>
 #include <migraphx/fwd_conv_batchnorm_rewrite.hpp>
@@ -20,6 +20,7 @@
 #include <migraphx/eliminate_identity.hpp>
 #include <migraphx/gpu/concat_gpu_opt.hpp>
 #include <migraphx/gpu/schedule_model.hpp>
+#include <migraphx/gpu/adjust_allocation.hpp>
 #include <migraphx/eliminate_pad.hpp>
 #include <migraphx/schedule.hpp>

@@ -47,7 +48,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
        //dead_code_elimination{},
        simplify_algebra{},
        dead_code_elimination{},
-        constant_propagate{},
+        propagate_constant{},
        dead_code_elimination{},
        auto_contiguous{},
        //simplify_reshapes{},
@@ -57,6 +58,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
        dead_code_elimination{},
        eliminate_contiguous{},
        dead_code_elimination{},
+        adjust_allocation{},
+        dead_code_elimination{},
        fuse_ops{&ctx},
        dead_code_elimination{},
        write_literals{&ctx},

--- a/src/targets/gpu/write_literals.cpp
+++ b/src/targets/gpu/write_literals.cpp
@@ -14,6 +14,13 @@ struct hip_load_literal
 {
    shape s;
    std::size_t n = 0;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.s, "shape"), f(self.n, "id"));
+    }
+
    std::string name() const { return "hip::load_literal"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {

--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
@@ -108,15 +108,19 @@ struct tf_parser
    {
        add_generic_op("Identity", op::identity{});
        add_generic_op("Relu", op::relu{});
+        add_generic_op("Relu6", op::clip{6.0, 0.0});

        add_binary_op("Add", op::add{});
+        add_binary_op("Mul", op::mul{});

        add_mem_op("AvgPool", &tf_parser::parse_pooling);
        add_mem_op("BiasAdd", &tf_parser::parse_biasadd);
        add_mem_op("ConcatV2", &tf_parser::parse_concat);
        add_mem_op("Const", &tf_parser::parse_constant);
        add_mem_op("Conv2D", &tf_parser::parse_conv);
+        add_mem_op("DepthwiseConv2dNative", &tf_parser::parse_depthwiseconv);
        add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
+        add_mem_op("MatMul", &tf_parser::parse_matmul);
        add_mem_op("MaxPool", &tf_parser::parse_pooling);
        add_mem_op("Mean", &tf_parser::parse_mean);
        add_mem_op("Pack", &tf_parser::parse_pack);
@@ -124,6 +128,7 @@ struct tf_parser
        add_mem_op("Reshape", &tf_parser::parse_reshape);
        add_mem_op("Softmax", &tf_parser::parse_softmax);
        add_mem_op("Squeeze", &tf_parser::parse_squeeze);
+        add_mem_op("StridedSlice", &tf_parser::parse_stridedslice);
    }

    template <class F>
@@ -150,7 +155,7 @@ struct tf_parser
    template <class T>
    void add_binary_op(std::string name, T x)
    {
-        add_op(name, [this, x](attribute_map attributes, std::vector<instruction_ref> args) {
+        add_op(name, [this, x](const attribute_map& attributes, std::vector<instruction_ref> args) {
            if(args.size() != 2)
                MIGRAPHX_THROW("binary operators should have 2 operands");
            auto l0 = args[1];
@@ -212,7 +217,7 @@ struct tf_parser
    template <class T>
    void add_generic_op(std::string name, T x)
    {
-        add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
+        add_op(name, [this, x](const attribute_map&, std::vector<instruction_ref> args) {
            return prog.add_instruction(x, args);
        });
    }
@@ -235,7 +240,7 @@ struct tf_parser
    parse_biasadd(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
    {
        uint64_t axis = 1; // assume output of previous layer is in NCHW (broadcast on channel)
-        auto l0       = prog.add_instruction(op::broadcast{axis, args[0]->get_shape()}, args[1]);
+        auto l0 = prog.add_instruction(op::broadcast{axis, args[0]->get_shape().lens()}, args[1]);
        return prog.add_instruction(op::add{}, args[0], l0);
    }

@@ -336,6 +341,88 @@ struct tf_parser
        return prog.add_instruction(op, {args[0], weights});
    }

+    instruction_ref parse_depthwiseconv(const std::string&,
+                                        attribute_map attributes,
+                                        std::vector<instruction_ref> args)
+    {
+        op::convolution op;
+        size_t num_channels = args[0]->get_shape().lens()[1];
+        op.group            = num_channels;
+        if(contains(attributes, "padding"))
+        {
+            const std::string& pad_mode = attributes.at("padding").s();
+            if(pad_mode.find("SAME") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::same;
+            }
+        }
+        if(contains(attributes, "strides"))
+        {
+            std::vector<size_t> stride;
+            copy(attributes.at("strides").list().i(), std::back_inserter(stride));
+            reorder_data(stride);
+            if(stride.size() != 4)
+            {
+                MIGRAPHX_THROW("strides should have 4 values");
+            }
+            op.stride[0] = stride[2];
+            op.stride[1] = stride[3];
+        }
+        auto weights = args[1];
+        // check if weights are from a constant
+        if(weights->name() != "@param")
+        {
+            if(is_nhwc)
+            {
+                weights = prog.add_instruction(op::transpose{{1, 3, 0, 2}}, args[1]);
+            }
+            else
+            {
+                weights = prog.add_instruction(op::transpose{{3, 2, 0, 1}}, args[1]);
+            }
+        }
+
+        std::vector<int64_t> new_weights_shape;
+        copy(weights->get_shape().lens(), std::back_inserter(new_weights_shape));
+
+        // weight format is (out_channels, in_channels, h, w), but in depthwise_conv,
+        // out_channels is equal to the multiplier. Adjust by inserting a reshape and
+        // setting in_channels to 1
+        int64_t multiplier   = new_weights_shape[0];
+        int64_t out_channels = num_channels * multiplier;
+        new_weights_shape[0] = out_channels;
+        new_weights_shape[1] = 1;
+        auto new_weights     = prog.add_instruction(op::reshape{new_weights_shape}, weights);
+
+        return prog.add_instruction(op, {args[0], new_weights});
+    }
+
+    instruction_ref
+    parse_matmul(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        bool transa = false;
+        bool transb = false;
+
+        if(contains(attributes, "transpose_a"))
+        {
+            transa = attributes.at("transpose_a").b();
+        }
+        if(contains(attributes, "transpose_b"))
+        {
+            transb = attributes.at("transpose_a").b();
+        }
+
+        std::vector<int64_t> perm(args[0]->get_shape().lens().size());
+        std::iota(perm.begin(), perm.end(), int64_t{0});
+        // swap the last two elements
+        std::iter_swap(perm.end() - 1, perm.end() - 2);
+
+        auto l1 = (transa) ? prog.add_instruction(op::transpose{perm}, args[0]) : args[0];
+        auto l2 = (transb) ? prog.add_instruction(op::transpose{perm}, args[1]) : args[1];
+
+        return prog.add_instruction(op::dot{}, l1, l2);
+    }
+
    instruction_ref
    parse_mean(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
@@ -508,6 +595,46 @@ struct tf_parser
        return prog.add_instruction(op, args[0]);
    }

+    instruction_ref parse_stridedslice(const std::string&,
+                                       const attribute_map& attributes,
+                                       std::vector<instruction_ref> args)
+    {
+        op::slice op;
+        auto starts     = args[1]->eval().get<int32_t>().to_vector();
+        auto ends       = args[2]->eval().get<int32_t>().to_vector();
+        size_t num_axes = args[0]->get_shape().lens().size();
+        if(num_axes >= 4)
+        {
+            reorder_data(starts);
+            reorder_data(ends);
+        }
+
+        op.starts = std::vector<int64_t>(starts.begin(), starts.end());
+        op.ends   = std::vector<int64_t>(ends.begin(), ends.end());
+        op.axes   = std::vector<int64_t>(num_axes);
+        std::iota(op.axes.begin(), op.axes.end(), 0);
+        uint32_t shrink_axis_mask = 0;
+        uint32_t bitwise_compare  = 1;
+        std::vector<int64_t> squeeze_axes;
+
+        if(contains(attributes, "shrink_axis_mask"))
+            shrink_axis_mask = static_cast<uint32_t>(attributes.at("shrink_axis_mask").i());
+
+        for(size_t i = 0; i < num_axes; i++)
+        {
+            // the LSB corresponds to axis 0 when determining which axes to squeeze
+            if(((shrink_axis_mask >> i) & bitwise_compare) == 1)
+                squeeze_axes.push_back(i);
+        }
+        if(num_axes >= 4)
+        {
+            squeeze_axes = parse_axes(squeeze_axes);
+        }
+
+        auto l0 = prog.add_instruction(op, args[0]);
+        return prog.add_instruction(op::squeeze{squeeze_axes}, l0);
+    }
+
    void parse_graph(const tensorflow::GraphDef& graph)
    {
        nodes = get_nodes(graph, input_nodes);
@@ -672,10 +799,6 @@ struct tf_parser
    static literal parse_tensor(const tensorflow::TensorProto& t)
    {
        std::vector<size_t> dims = parse_dims(t.tensor_shape());
-        if(dims.empty())
-        {
-            dims = {1};
-        }
        size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
        if(!t.tensor_content().empty()) // has raw data
        {
@@ -686,17 +809,17 @@ struct tf_parser
            case tensorflow::DataType::DT_FLOAT:
                return literal{{shape::float_type, dims}, s.data()};
            case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
-            case tensorflow::DataType::DT_INT8: return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_INT8: return literal{{shape::int8_type, dims}, s.data()};
            case tensorflow::DataType::DT_UINT16:
-                return literal{{shape::int32_type, dims}, s.data()};
+                return literal{{shape::uint16_type, dims}, s.data()};
            case tensorflow::DataType::DT_INT16:
-                return literal{{shape::int32_type, dims}, s.data()};
+                return literal{{shape::int16_type, dims}, s.data()};
            case tensorflow::DataType::DT_INT32:
                return literal{{shape::int32_type, dims}, s.data()};
            case tensorflow::DataType::DT_INT64:
                return literal{{shape::int64_type, dims}, s.data()};
            case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
-            case tensorflow::DataType::DT_BOOL: return literal{{shape::int32_type, dims}, s.data()};
+            case tensorflow::DataType::DT_BOOL: return literal{{shape::int8_type, dims}, s.data()};
            case tensorflow::DataType::DT_HALF: return literal{{shape::half_type, dims}, s.data()};
            case tensorflow::DataType::DT_DOUBLE:
                return literal{{shape::double_type, dims}, s.data()};
@@ -746,21 +869,23 @@ struct tf_parser
        {
        case tensorflow::DataType::DT_INVALID: throw std::runtime_error("");
        case tensorflow::DataType::DT_FLOAT:
-            return literal{{shape::float_type, dims}, get_data_vals(t.float_val(), shape_size)};
+            return create_literal(
+                shape::float_type, dims, get_data_vals(t.float_val(), shape_size));
        case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
        case tensorflow::DataType::DT_INT8:
-            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+            return create_literal(shape::int8_type, dims, get_data_vals(t.int_val(), shape_size));
        case tensorflow::DataType::DT_UINT16:
-            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+            return create_literal(shape::uint16_type, dims, get_data_vals(t.int_val(), shape_size));
        case tensorflow::DataType::DT_INT16:
-            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+            return create_literal(shape::int16_type, dims, get_data_vals(t.int_val(), shape_size));
        case tensorflow::DataType::DT_INT32:
-            return literal{{shape::int32_type, dims}, get_data_vals(t.int_val(), shape_size)};
+            return create_literal(shape::int32_type, dims, get_data_vals(t.int_val(), shape_size));
        case tensorflow::DataType::DT_INT64:
-            return literal{{shape::int64_type, dims}, get_data_vals(t.int64_val(), shape_size)};
+            return create_literal(
+                shape::int64_type, dims, get_data_vals(t.int64_val(), shape_size));
        case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
        case tensorflow::DataType::DT_BOOL:
-            return literal{{shape::int32_type, dims}, get_data_vals(t.bool_val(), shape_size)};
+            return create_literal(shape::int32_type, dims, get_data_vals(t.bool_val(), shape_size));
        case tensorflow::DataType::DT_HALF:
        {
            std::vector<int> data_int32 = get_data_vals(t.half_val(), shape_size);
@@ -770,7 +895,7 @@ struct tf_parser
                           data_uint16.end(),
                           std::back_inserter(data_half),
                           [](uint16_t raw_val) { return *reinterpret_cast<half*>(&raw_val); });
-            return literal{{shape::half_type, dims}, data_half};
+            return create_literal(shape::half_type, dims, data_half);
        }
        case tensorflow::DataType::DT_DOUBLE:
            return literal{{shape::double_type, dims}, get_data_vals(t.double_val(), shape_size)};
@@ -839,9 +964,19 @@ struct tf_parser
        std::transform(input_dims.begin(),
                       input_dims.end(),
                       std::back_inserter(dims),
-                       [](tensorflow::TensorShapeProto_Dim dim) { return dim.size(); });
+                       [](const tensorflow::TensorShapeProto_Dim& dim) { return dim.size(); });
        return dims;
    }
+
+    template <class T>
+    static literal
+    create_literal(shape::type_t shape_type, const std::vector<size_t>& dims, std::vector<T> data)
+    {
+        // assume if explicit value is mentioned in protobuf and dim size <= 1, treat as scalar
+        if(dims.empty() or (dims.size() == 1 and dims.front() == 1))
+            return literal{{shape_type}, data};
+        return literal{{shape_type, dims}, data};
+    }
 };

 program parse_tf(const std::string& name, bool is_nhwc)

--- a/test/auto_contiguous_test.cpp
+++ b/test/auto_contiguous_test.cpp
@@ -60,7 +60,7 @@ TEST_CASE(after_literal_broadcast)
    auto l2 = p.add_literal(get_2());
    EXPECT(p.get_shape().standard());
    EXPECT(not p.get_shape().broadcasted());
-    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape()}, l2);
+    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape().lens()}, l2);
    p.add_instruction(pass_op{}, b);
    EXPECT(not p.get_shape().standard());
    EXPECT(p.get_shape().broadcasted());
@@ -91,7 +91,7 @@ TEST_CASE(after_param_broadcast)
    auto l2 = p.add_parameter("2", {migraphx::shape::float_type, {2}});
    EXPECT(p.get_shape().standard());
    EXPECT(not p.get_shape().broadcasted());
-    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape()}, l2);
+    auto b = p.add_instruction(migraphx::op::broadcast{0, l1->get_shape().lens()}, l2);
    p.add_instruction(pass_op{}, b);
    EXPECT(not p.get_shape().standard());
    EXPECT(p.get_shape().broadcasted());

--- a/test/cpu_dot_op_test.cpp
+++ b/test/cpu_dot_op_test.cpp
@@ -351,7 +351,7 @@ TEST_CASE(gemm_mutli_dim1_2_3)
    float beta     = 0.41;
    auto m12_alpha = p.add_instruction(migraphx::op::dot{alpha, beta}, l1, l2);
    auto l_beta    = p.add_literal(beta);
-    auto b_beta    = p.add_instruction(migraphx::op::scalar{m12_alpha->get_shape()}, l_beta);
+    auto b_beta    = p.add_instruction(migraphx::op::scalar{m12_alpha->get_shape().lens()}, l_beta);
    auto m3_beta   = p.add_instruction(migraphx::op::mul{}, b_beta, l3);
    p.add_instruction(migraphx::op::add{}, m3_beta, m12_alpha);
    p.compile(migraphx::cpu::target{});

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -651,7 +651,7 @@ TEST_CASE(broadcast_test)
    uint64_t axis = 0;
    auto l1       = p.add_literal(migraphx::literal{a_shape, a_data});
    auto l2       = p.add_literal(migraphx::literal{b_shape, b_data});
-    p.add_instruction(migraphx::op::broadcast{axis, l1->get_shape()}, l2);
+    p.add_instruction(migraphx::op::broadcast{axis, l1->get_shape().lens()}, l2);
    p.compile(migraphx::cpu::target{});
    auto result = p.eval({});
    auto output = result.get<int32_t>();
@@ -671,7 +671,7 @@ TEST_CASE(add_broadcast_test)
        uint64_t axis = 0;
        auto l1       = p.add_literal(migraphx::literal{a_shape, a_data});
        auto l2       = p.add_literal(migraphx::literal{b_shape, b_data});
-        auto l3       = p.add_instruction(migraphx::op::broadcast{axis, l1->get_shape()}, l2);
+        auto l3 = p.add_instruction(migraphx::op::broadcast{axis, l1->get_shape().lens()}, l2);
        p.add_instruction(migraphx::op::add{}, l1, l3);
        p.compile(migraphx::cpu::target{});
        auto result = p.eval({});
@@ -809,11 +809,11 @@ TEST_CASE(imagescaler_test)
                                                0.35,
                                                0.45}});
    auto scale_val     = p.add_literal(2.f);
-    auto scaled_tensor = p.add_instruction(migraphx::op::scalar{s}, scale_val);
+    auto scaled_tensor = p.add_instruction(migraphx::op::scalar{s.lens()}, scale_val);
    auto img_scaled    = p.add_instruction(migraphx::op::mul{}, img, scaled_tensor);
    auto bias_vals     = p.add_literal(
        migraphx::literal{migraphx::shape{migraphx::shape::float_type, {3}}, {0.01, 0.02, 0.03}});
-    auto bias_bcast = p.add_instruction(migraphx::op::broadcast{1, s}, bias_vals);
+    auto bias_bcast = p.add_instruction(migraphx::op::broadcast{1, s.lens()}, bias_vals);
    p.add_instruction(migraphx::op::add{}, img_scaled, bias_bcast);
    p.compile(migraphx::cpu::target{});
    auto result = p.eval({});
@@ -1557,4 +1557,21 @@ TEST_CASE(fp16_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+TEST_CASE(clip_test)
+{
+    migraphx::program p;
+    migraphx::shape s{migraphx::shape::float_type, {3}};
+    auto l = p.add_literal(migraphx::literal{s, {-1.0, 0.0, 10.0}});
+    migraphx::op::clip op;
+    op.max_val = 6.0;
+    op.min_val = 0.0;
+    p.add_instruction(op, l);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(3);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    std::vector<float> gold = {0.0, 0.0, 6.0};
+    EXPECT(migraphx::verify_range(results_vector, gold));
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/eliminate_allocation_test.cpp
+++ b/test/eliminate_allocation_test.cpp
@@ -20,6 +20,13 @@ struct eliminate_allocation_target
 struct allocate
 {
    migraphx::shape s{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::pack(f(self.s, "shape"));
+    }
+
    std::string name() const { return "allocate"; }
    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
    {

--- a/test/eliminate_concat_test.cpp
+++ b/test/eliminate_concat_test.cpp
@@ -10,6 +10,13 @@ struct concat
 {
    concat(std::size_t axis) { op.axis = axis; }
    migraphx::op::concat op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "eliminate_concat::concat"; }
    migraphx::shape compute_shape(std::vector<migraphx::shape> inputs) const
    {
@@ -51,6 +58,13 @@ struct eliminate_concat_target
 struct allocate
 {
    migraphx::shape s{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::pack(f(self.s, "shape"));
+    }
+
    std::string name() const { return "allocate"; }
    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
    {