Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into rnn_optimization

b8090620 · Shucai Xiao · c2db3b96 · 3540f1b9 · b8090620 · b8090620
Commit b8090620 authored Jun 10, 2019 by Shucai Xiao
20 changed files
--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -162,6 +162,38 @@ inline fused_operator_args make_fused_args()
    return make_obj<fused_operator_args>(&miopenCreateOperatorArgs);
 }

+template <class F>
+auto reflect(miopenActivationDescriptor_t ad, F f)
+{
+    assert(ad != nullptr);
+    miopenActivationMode_t mode = miopenActivationPASTHRU;
+    double alpha                = 0.0;
+    double beta                 = 0.0;
+    double gamma                = 0.0;
+    miopenGetActivationDescriptor(ad, &mode, &alpha, &beta, &gamma);
+    return pack(f(std::move(mode), "mode"),    // NOLINT
+                f(std::move(alpha), "alpha"),  // NOLINT
+                f(std::move(beta), "beta"),    // NOLINT
+                f(std::move(gamma), "gamma")); // NOLINT
+}
+
+template <class F>
+auto reflect(miopenLRNDescriptor_t lrnd, F f)
+{
+    assert(lrnd != nullptr);
+    miopenLRNMode_t mode = miopenLRNWithinChannel;
+    unsigned int n       = 0;
+    double alpha         = 0.0;
+    double beta          = 0.0;
+    double k             = 0.0;
+    miopenGetLRNDescriptor(lrnd, &mode, &n, &alpha, &beta, &k);
+    return pack(f(std::move(mode), "mode"),   // NOLINT
+                f(std::move(n), "n"),         // NOLINT
+                f(std::move(alpha), "alpha"), // NOLINT
+                f(std::move(beta), "beta"),   // NOLINT
+                f(std::move(k), "k"));        // NOLINT
+}
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/pad.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pad.hpp
@@ -14,6 +14,12 @@ struct hip_pad
 {
    op::pad op;

+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::pad"; }
    shape compute_shape(std::vector<shape> inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/pooling.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pooling.hpp
@@ -16,6 +16,12 @@ struct miopen_pooling
    op::pooling op;
    shared<pooling_descriptor> pd;

+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::pooling"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/relu.hpp
@@ -13,6 +13,13 @@ struct context;
 struct miopen_relu
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::relu"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/sigmoid.hpp
@@ -13,6 +13,13 @@ struct context;
 struct miopen_sigmoid
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::sigmoid"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/softmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/softmax.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP
 #define MIGRAPHX_GUARD_RTGLIB_SOFTMAX_HPP

-#include <migraphx/shape.hpp>
+#include <migraphx/gpu/lowering.hpp>
+#include <migraphx/manage_ptr.hpp>
+#include <migraphx/instruction.hpp>
 #include <migraphx/op/softmax.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/dfor.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>
+#include <migraphx/gpu/device/add.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -13,6 +27,33 @@ struct context;
 struct miopen_softmax
 {
    op::softmax op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
+    std::string name() const { return "gpu::softmax"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+
+struct hip_softmax
+{
+    op::softmax op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "gpu::softmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/tanh.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/tanh.hpp
@@ -13,6 +13,13 @@ struct context;
 struct miopen_tanh
 {
    shared<activation_descriptor> ad;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
+
    std::string name() const { return "gpu::tanh"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -45,6 +45,8 @@
 #include <migraphx/gpu/pad.hpp>
 #include <migraphx/gpu/gather.hpp>
 #include <migraphx/gpu/lrn.hpp>
+#include <migraphx/gpu/convert.hpp>
+#include <migraphx/gpu/clip.hpp>
 #include <utility>
 #include <functional>
 #include <algorithm>
@@ -97,10 +99,12 @@ struct miopen_apply
        add_extend_op<miopen_gemm, op::dot>("dot");
        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
-        add_extend_op<miopen_softmax, op::softmax>("softmax");
+        add_extend_op<hip_softmax, op::softmax>("softmax");
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");
+        add_extend_op<hip_convert, op::convert>("convert");
+        add_extend_op<hip_clip, op::clip>("clip");

        add_lrn_op();
        add_convolution_op();

--- a/src/targets/gpu/softmax.cpp
+++ b/src/targets/gpu/softmax.cpp
 #include <migraphx/gpu/softmax.hpp>
+#include <migraphx/gpu/device/softmax.hpp>
 #include <migraphx/gpu/context.hpp>

 namespace migraphx {
@@ -30,6 +31,19 @@ argument miopen_softmax::compute(context& ctx,
    return args[1];
 }

+shape hip_softmax::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(2).standard();
+    return op.compute_shape({inputs.at(0)});
+}
+
+argument hip_softmax::compute(context& ctx,
+                              const shape& output_shape,
+                              const std::vector<argument>& args) const
+{
+    return device::softmax(ctx.get_stream().get(), output_shape, args, op.axis);
+}
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -51,7 +51,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
        propagate_constant{},
        dead_code_elimination{},
        auto_contiguous{},
-        //simplify_reshapes{},
+        simplify_reshapes{},
        dead_code_elimination{},
        lowering{ctx},
        eliminate_concat{concat_gpu_optimization{}},

--- a/src/targets/gpu/write_literals.cpp
+++ b/src/targets/gpu/write_literals.cpp
@@ -14,6 +14,13 @@ struct hip_load_literal
 {
    shape s;
    std::size_t n = 0;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.s, "shape"), f(self.n, "id"));
+    }
+
    std::string name() const { return "hip::load_literal"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {

--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
@@ -17,6 +17,7 @@
 #include <migraphx/instruction.hpp>
 #include <migraphx/config.hpp>
 #include <migraphx/tf.hpp>
+#include <migraphx/pad_calc.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -24,7 +25,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 struct tf_parser
 {
    using attribute_map = std::unordered_map<std::string, tensorflow::AttrValue>;
-    using node_map      = std::unordered_map<std::string, tensorflow::NodeDef>;
+    using node_map      = std::map<std::string, tensorflow::NodeDef>;
    // using input_node_map = std::unordered_map<std::string, std::unordered_set<std::string>>;
    using op_func = std::function<instruction_ref(attribute_map, std::vector<instruction_ref>)>;

@@ -53,15 +54,16 @@ struct tf_parser
    template <class T>
    std::vector<T> parse_axes(std::vector<T> axes) const
    {
-        std::vector<T> new_axes;
        if(is_nhwc)
        {
+            std::vector<T> new_axes;
            std::transform(axes.begin(),
                           axes.end(),
                           std::back_inserter(new_axes),
                           [&](size_t axis) { return parse_axis(axis); });
+            return new_axes;
        }
-        return new_axes;
+        return axes;
    }

    // tf stores certain attributes such as strides, dilations, as a 4D input.
@@ -108,6 +110,7 @@ struct tf_parser
    {
        add_generic_op("Identity", op::identity{});
        add_generic_op("Relu", op::relu{});
+        add_generic_op("Relu6", op::clip{6.0, 0.0});

        add_binary_op("Add", op::add{});
        add_binary_op("Mul", op::mul{});
@@ -117,6 +120,7 @@ struct tf_parser
        add_mem_op("ConcatV2", &tf_parser::parse_concat);
        add_mem_op("Const", &tf_parser::parse_constant);
        add_mem_op("Conv2D", &tf_parser::parse_conv);
+        add_mem_op("DepthwiseConv2dNative", &tf_parser::parse_depthwiseconv);
        add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
        add_mem_op("MatMul", &tf_parser::parse_matmul);
        add_mem_op("MaxPool", &tf_parser::parse_pooling);
@@ -274,12 +278,60 @@ struct tf_parser
    parse_conv(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
        op::convolution op;
+        if(contains(attributes, "strides"))
+        {
+            std::vector<size_t> stride;
+            copy(attributes.at("strides").list().i(), std::back_inserter(stride));
+            reorder_data(stride);
+            if(stride.size() != 4)
+            {
+                MIGRAPHX_THROW("strides should have 4 values");
+            }
+            op.stride[0] = stride[2];
+            op.stride[1] = stride[3];
+        }
+        if(contains(attributes, "dilations"))
+        {
+            std::vector<size_t> dilation;
+            copy(attributes.at("dilations").list().i(), std::back_inserter(dilation));
+            reorder_data(dilation);
+            if(dilation.size() != 4)
+            {
+                MIGRAPHX_THROW("dilation should have 4 values");
+            }
+            op.dilation[0] = dilation[2];
+            op.dilation[1] = dilation[3];
+        }
+        auto weights = args[1];
+        // check if weights are from a constant
+
+        if(weights->name() != "@param")
+        {
+            if(is_nhwc)
+            {
+                weights = prog.add_instruction(op::transpose{{1, 3, 0, 2}}, args[1]);
+            }
+            else
+            {
+                weights = prog.add_instruction(op::transpose{{3, 2, 0, 1}}, args[1]);
+            }
+        }
+
        if(contains(attributes, "padding"))
        {
            const std::string& pad_mode = attributes.at("padding").s();
            if(pad_mode.find("SAME") != std::string::npos)
            {
-                op.padding_mode = op::padding_mode_t::same;
+                op.padding_mode                 = op::padding_mode_t::same;
+                std::vector<size_t> weight_dims = weights->get_shape().lens();
+                size_t weight_h                 = weight_dims[2];
+                size_t weight_w                 = weight_dims[3];
+                op.padding[0]                   = calculate_padding(weight_h, op.dilation[0]);
+                op.padding[1]                   = calculate_padding(weight_w, op.dilation[1]);
+            }
+            else if(pad_mode.find("VALID") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::valid;
            }
            else if(pad_mode.find("EXPLICIT") != std::string::npos)
            {
@@ -297,6 +349,18 @@ struct tf_parser
                op.padding[1] = padding[1];
            }
        }
+
+        return prog.add_instruction(op, {args[0], weights});
+    }
+
+    instruction_ref parse_depthwiseconv(const std::string&,
+                                        attribute_map attributes,
+                                        std::vector<instruction_ref> args)
+    {
+        op::convolution op;
+        size_t num_channels = args[0]->get_shape().lens()[1];
+        op.group            = num_channels;
+
        if(contains(attributes, "strides"))
        {
            std::vector<size_t> stride;
@@ -321,9 +385,9 @@ struct tf_parser
            op.dilation[0] = dilation[2];
            op.dilation[1] = dilation[3];
        }
+
        auto weights = args[1];
        // check if weights are from a constant
-
        if(weights->name() != "@param")
        {
            if(is_nhwc)
@@ -336,7 +400,39 @@ struct tf_parser
            }
        }

-        return prog.add_instruction(op, {args[0], weights});
+        if(contains(attributes, "padding"))
+        {
+            const std::string& pad_mode     = attributes.at("padding").s();
+            std::vector<size_t> weight_dims = weights->get_shape().lens();
+            size_t weight_h                 = weight_dims[2];
+            size_t weight_w                 = weight_dims[3];
+            if(pad_mode.find("SAME") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::same;
+                op.padding[0]   = calculate_padding(weight_h, op.dilation[0]);
+                op.padding[1]   = calculate_padding(weight_w, op.dilation[1]);
+            }
+            else if(pad_mode.find("VALID") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::valid;
+            }
+        }
+
+        std::vector<int64_t> new_weights_shape;
+        copy(weights->get_shape().lens(), std::back_inserter(new_weights_shape));
+
+        // weight format is (out_channels, in_channels, h, w), but in depthwise_conv,
+        // out_channels is equal to the multiplier. Adjust by inserting a reshape and
+        // setting in_channels to 1
+        int64_t multiplier   = new_weights_shape[0];
+        int64_t out_channels = num_channels * multiplier;
+        new_weights_shape[0] = out_channels;
+        new_weights_shape[1] = 1;
+        // Make sure weights are contiguous before doing reshape
+        auto cweights    = prog.add_instruction(op::contiguous{}, weights);
+        auto new_weights = prog.add_instruction(op::reshape{new_weights_shape}, cweights);
+
+        return prog.add_instruction(op, {args[0], new_weights});
    }

    instruction_ref
@@ -368,17 +464,21 @@ struct tf_parser
    instruction_ref
    parse_mean(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
-
        auto axes      = parse_axes(args[1]->eval().get<int32_t>().to_vector());
        bool keep_dims = attributes.at("keep_dims").b();
        std::vector<int32_t> hw_axes{2, 3};
-        if(axes == hw_axes and keep_dims)
+        // check if conditions for GlobalAvgPool are met
+        auto lens = args[0]->get_shape().lens();
+        if(axes == hw_axes and lens.size() == 4)
        {
            op::pooling op{"average"};
-            std::vector<size_t> input_dims{args[0]->get_shape().lens()};
-            op.lengths[0] = input_dims[2];
-            op.lengths[1] = input_dims[3];
-            return prog.add_instruction(op, args.front());
+            op.lengths[0] = lens[2];
+            op.lengths[1] = lens[3];
+            auto l0       = prog.add_instruction(op, args.front());
+            if(keep_dims)
+                return l0;
+            return prog.add_instruction(
+                op::squeeze{std::vector<int64_t>(hw_axes.begin(), hw_axes.end())}, l0);
        }
        MIGRAPHX_THROW("MIGraphX does not support mean outside of GlobalAvgPool transformation");
    }
@@ -443,18 +543,6 @@ struct tf_parser
    {
        op::pooling op{starts_with(name, "Max") ? "max" : "average"};

-        if(contains(attributes, "padding"))
-        {
-            const std::string& pad_mode = attributes.at("padding").s();
-            if(pad_mode.find("SAME") != std::string::npos)
-            {
-                op.padding_mode = op::padding_mode_t::same;
-            }
-            else if(pad_mode.find("VALID") != std::string::npos)
-            {
-                op.padding_mode = op::padding_mode_t::valid;
-            }
-        }
        if(contains(attributes, "strides"))
        {
            std::vector<size_t> stride;
@@ -479,6 +567,20 @@ struct tf_parser
            op.lengths[0] = ksize[2];
            op.lengths[1] = ksize[3];
        }
+        if(contains(attributes, "padding"))
+        {
+            const std::string& pad_mode = attributes.at("padding").s();
+            if(pad_mode.find("SAME") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::same;
+                op.padding[0]   = calculate_padding(op.lengths[0], 1);
+                op.padding[1]   = calculate_padding(op.lengths[1], 1);
+            }
+            else if(pad_mode.find("VALID") != std::string::npos)
+            {
+                op.padding_mode = op::padding_mode_t::valid;
+            }
+        }
        return prog.add_instruction(op, args[0]);
    }


--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -3,6 +3,7 @@
 #include <migraphx/literal.hpp>
 #include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
+#include <migraphx/quantization.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/verify.hpp>
 #include <migraphx/onnx.hpp>
@@ -928,6 +929,24 @@ TEST_CASE(maxpool_test)
    EXPECT(migraphx::verify_range(results_vector, c));
 }

+TEST_CASE(softmax_simple_test)
+{
+    migraphx::program p;
+    std::vector<float> a = {0.25, 0.75};
+    std::vector<float> s = {0.377541, 0.622459};
+    migraphx::shape a_shape{migraphx::shape::float_type, {1, 2}};
+    auto al = p.add_literal(migraphx::literal{a_shape, a});
+    p.add_instruction(migraphx::op::softmax{1}, al);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(2);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    for(auto v : results_vector)
+        std::cout << v << "\t";
+    std::cout << std::endl;
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
 TEST_CASE(softmax_test)
 {
    migraphx::program p;
@@ -1001,14 +1020,13 @@ TEST_CASE(logsoftmax_test_axis_0)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -2.71138556, -5.85030702, -3.74063578, -4.22915517, -6.15821977, -5.96072346, -3.57208097,
-        -5.78313166, -5.51435497, -3.67224195, -3.88393048, -2.57061599, -5.54431083, -6.27880025,
-        -5.1878749,  -6.1318955,  -5.29178545, -4.22537886, -3.75693516, -7.07047099, -4.45763333,
-        -4.66281846, -6.18290503, -4.11886536, -6.17408292, -4.18030052, -4.64570814, -4.64354473,
-        -3.06629525, -3.80807681, -4.69162374, -5.53605222, -3.20969275, -4.82645674, -6.63942356,
-        -4.73634471, -3.86003866, -5.32738981, -4.22249802, -4.51258693, -2.41455206, -3.48343199,
-        -5.86215889, -4.93435935, -4.83713408, -2.97471885, -2.16666459, -3.69133151, -4.71640968,
-        -5.64652924, -3.60709827, -5.87967748, -3.8809403,  -4.33917815};
+        -0.135261, -2.843968, -0.659995, -0.488413, -1.051857, -2.812936, -0.250956, -0.353985,
+        -1.155980, -0.603651, -0.211969, -0.175371, -1.336552, -3.885010, -1.871544, -0.837083,
+        -0.887745, -0.433338, -1.158864, -4.911197, -1.147972, -0.666711, -0.996874, -0.981418,
+        -0.851145, -0.853988, -0.858112, -2.067420, -0.059956, -0.727436, -0.950881, -0.429689,
+        -0.061906, -1.505332, -1.210277, -0.377970, -0.791448, -1.655428, -1.827253, -0.304828,
+        -0.020762, -0.167101, -0.567346, -0.530319, -1.045094, -0.376648, -0.007391, -0.381670,
+        -0.720302, -0.460499, -0.469651, -0.556740, -0.554628, -0.551582};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1035,14 +1053,13 @@ TEST_CASE(logsoftmax_test_axis_1)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -1.77931988, -4.91824134, -2.80857010, -3.29708949, -5.22615409, -5.02865778, -2.64001529,
-        -4.85106598, -4.58228929, -2.74017627, -2.95186480, -1.63855031, -4.61224515, -5.34673457,
-        -4.25580922, -5.19982982, -4.35971977, -3.29331318, -2.82486948, -6.13840531, -3.52556765,
-        -3.73075278, -5.25083935, -3.18679968, -5.24201724, -3.24823484, -3.71364246, -4.14309917,
-        -2.56584969, -3.30763125, -4.19117818, -5.03560666, -2.70924719, -4.32601118, -6.13897800,
-        -4.23589915, -3.35959310, -4.82694425, -3.72205246, -4.01214137, -1.91410650, -2.98298643,
-        -5.36171333, -4.43391379, -4.33668852, -2.47427329, -1.66621903, -3.19088595, -4.21596412,
-        -5.14608368, -3.10665271, -5.37923192, -3.38049474, -3.83873259};
+        -0.550468, -2.132973, -1.549746, -0.650533, -1.051529, -2.248570, -0.141017, -2.028357,
+        -1.947730, -1.511324, -0.166597, -0.379726, -1.965689, -1.172109, -1.475721, -2.700831,
+        -1.537011, -0.658754, -1.596017, -3.353137, -2.266743, -1.084197, -1.076214, -0.406712,
+        -2.743019, -0.425526, -1.079083, -2.139486, -1.270584, -1.024088, -1.154231, -3.201762,
+        -0.888957, -0.532855, -3.103583, -1.221339, -1.355980, -3.531678, -1.438510, -0.975194,
+        -0.080261, -1.162697, -1.568557, -1.398519, -1.322129, -0.470660, -0.370953, -0.907343,
+        -1.179017, -3.312239, -1.286363, -1.586076, -0.345100, -0.824173};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1069,14 +1086,13 @@ TEST_CASE(logsoftmax_test_axis_2)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -0.79763715, -3.93655861, -1.82688737, -2.31540676, -4.24447136, -4.04697505, -1.65833256,
-        -3.86938325, -3.60060656, -1.81223672, -2.02392525, -0.71061076, -3.68430560, -4.41879502,
-        -3.32786967, -4.27189027, -3.43178022, -2.36537363, -1.35498658, -4.66852241, -2.05568475,
-        -2.26086988, -3.78095645, -1.71691678, -3.77213434, -1.77835194, -2.24375956, -2.74631770,
-        -1.16906822, -1.91084978, -2.79439671, -3.63882519, -1.31246572, -2.92922971, -4.74219653,
-        -2.83911768, -2.19738500, -3.66473615, -2.55984436, -2.84993327, -0.75189840, -1.82077833,
-        -4.19950523, -3.27170569, -3.17448042, -1.65286841, -0.84481415, -2.36948107, -3.39455924,
-        -4.32467880, -2.28524783, -4.55782704, -2.55908986, -3.01732771};
+        -0.495957, -1.031212, -0.245531, -2.013726, -1.339125, -2.465619, -1.356652, -0.964037,
+        -2.019250, -0.214522, -0.289569, -0.234392, -2.086591, -2.684439, -2.851651, -2.674176,
+        -1.697424, -1.889155, -0.401029, -3.064586, -1.173030, -1.306912, -2.177020, -0.834262,
+        -2.818177, -0.174415, -1.361105, -1.024571, -0.106766, -1.167645, -1.072650, -2.576522,
+        -0.569261, -1.207483, -3.679894, -2.095913, -0.504264, -3.039291, -1.290559, -1.156812,
+        -0.126453, -0.551493, -2.506384, -2.646261, -1.905195, -0.206994, -0.191369, -0.959754,
+        -1.948685, -3.671233, -0.875521, -3.111952, -1.905644, -1.6076011};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1103,14 +1119,13 @@ TEST_CASE(logsoftmax_test_axis_3)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -0.33690375, -3.47582521, -1.36615397, -0.27936556, -2.20843016, -2.01093385, -0.22551114,
-        -2.43656183, -2.16778514, -1.57241522, -1.78410375, -0.47078926, -1.06745881, -1.80194823,
-        -0.71102288, -2.30719726, -1.46708721, -0.40068062, -0.42698261, -3.74051844, -1.12768078,
-        -1.07891856, -2.59900513, -0.53496546, -2.56139951, -0.56761711, -1.03302473, -2.09771276,
-        -0.52046328, -1.26224484, -1.76322959, -2.60765807, -0.28129860, -0.81424303, -2.62720985,
-        -0.72413100, -0.65570381, -2.12305496, -1.01816317, -2.48063402, -0.38259915, -1.45147908,
-        -1.84310238, -0.91530284, -0.81807757, -1.31692881, -0.50887455, -2.03354147, -1.48767160,
-        -2.41779116, -0.37836019, -2.56853147, -0.56979429, -1.02803214};
+        -0.336904, -3.475825, -1.366154, -0.279366, -2.208430, -2.010934, -0.225511, -2.436562,
+        -2.167785, -1.572415, -1.784104, -0.470789, -1.067459, -1.801948, -0.711023, -2.307197,
+        -1.467087, -0.400681, -0.426983, -3.740518, -1.127681, -1.078919, -2.599005, -0.534965,
+        -2.561400, -0.567617, -1.033025, -2.097713, -0.520463, -1.262245, -1.763230, -2.607658,
+        -0.281299, -0.814243, -2.627210, -0.724131, -0.655704, -2.123055, -1.018163, -2.480634,
+        -0.382599, -1.451479, -1.843102, -0.915303, -0.818078, -1.316929, -0.508875, -2.033541,
+        -1.487672, -2.417791, -0.378360, -2.568531, -0.569794, -1.028032};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1123,40 +1138,6 @@ TEST_CASE(logsoftmax_test_axis_3)
    EXPECT(migraphx::verify_range(results_vector, s));
 }

-TEST_CASE(logsoftmax_test_axis_4)
-{
-    migraphx::program p;
-    std::vector<float> a = {
-        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
-        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
-        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
-        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
-        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
-        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
-        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
-        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
-
-    std::vector<float> s = {0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000};
-
-    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
-    auto al  = p.add_literal(migraphx::literal{a_shape, a});
-    int axis = 4;
-    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
-    p.compile(migraphx::cpu::target{});
-    auto result = p.eval({});
-    std::vector<float> results_vector;
-    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(results_vector, s));
-}
-
 TEST_CASE(conv2d_test)
 {
    migraphx::program p;
@@ -1557,4 +1538,49 @@ TEST_CASE(fp16_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+TEST_CASE(fp32_fp16_test)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        return p;
+    };
+
+    auto test_case = [&](std::vector<std::string>&& op_names) {
+        std::vector<float> gold_res = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0};
+        auto p                      = create_program();
+        migraphx::quantize(p, op_names);
+        p.compile(migraphx::cpu::target{});
+        auto result = p.eval({});
+        std::vector<float> res;
+        result.visit([&](auto output) { res.assign(output.begin(), output.end()); });
+        EXPECT(migraphx::verify_range(res, gold_res));
+    };
+
+    test_case({"all"});
+    test_case({"add"});
+}
+
+TEST_CASE(clip_test)
+{
+    migraphx::program p;
+    migraphx::shape s{migraphx::shape::float_type, {3}};
+    auto l = p.add_literal(migraphx::literal{s, {-1.0, 0.0, 10.0}});
+    migraphx::op::clip op;
+    op.max_val = 6.0;
+    op.min_val = 0.0;
+    p.add_instruction(op, l);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(3);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    std::vector<float> gold = {0.0, 0.0, 6.0};
+    EXPECT(migraphx::verify_range(results_vector, gold));
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/eliminate_allocation_test.cpp
+++ b/test/eliminate_allocation_test.cpp
@@ -20,6 +20,13 @@ struct eliminate_allocation_target
 struct allocate
 {
    migraphx::shape s{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::pack(f(self.s, "shape"));
+    }
+
    std::string name() const { return "allocate"; }
    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
    {

--- a/test/eliminate_concat_test.cpp
+++ b/test/eliminate_concat_test.cpp
@@ -10,6 +10,13 @@ struct concat
 {
    concat(std::size_t axis) { op.axis = axis; }
    migraphx::op::concat op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
    std::string name() const { return "eliminate_concat::concat"; }
    migraphx::shape compute_shape(std::vector<migraphx::shape> inputs) const
    {
@@ -51,6 +58,13 @@ struct eliminate_concat_target
 struct allocate
 {
    migraphx::shape s{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::pack(f(self.s, "shape"));
+    }
+
    std::string name() const { return "allocate"; }
    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
    {

--- a/test/gpu/adjust_allocation.cpp
+++ b/test/gpu/adjust_allocation.cpp
@@ -58,7 +58,7 @@ TEST_CASE(tanh_shape)
        if(ins->name() == "hip::allocate")
        {
            migraphx::shape new_s{migraphx::shape::float_type, {3, 2}, {1, 3}};
-            migraphx::instruction::replace(ins, ins->get_operator(), new_s, ins->inputs());
+            ins->replace(migraphx::gpu::hip_allocate{new_s});
        }
    }
    EXPECT(p1 != p2);

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -10,6 +10,7 @@
 #include <migraphx/type_name.hpp>
 #include <migraphx/verify_args.hpp>
 #include <migraphx/instruction.hpp>
+#include <migraphx/quantization.hpp>

 #include <miopen/miopen.h>

@@ -568,13 +569,13 @@ struct test_sub2 : verify_program<test_sub2>
    }
 };

-struct test_softmax : verify_program<test_softmax>
+struct test_softmax1 : verify_program<test_softmax1>
 {
    migraphx::program create_program() const
    {
        migraphx::program p;
-        auto x = p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {5, 3, 4, 2}});
-        p.add_instruction(migraphx::op::softmax{}, x);
+        auto x = p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {5, 3, 3, 4}});
+        p.add_instruction(migraphx::op::softmax{0}, x);
        return p;
    }
 };
@@ -591,6 +592,25 @@ struct test_softmax2 : verify_program<test_softmax2>
    }
 };

+template <int Axis>
+struct test_softmax : verify_program<test_softmax<Axis>>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::softmax{Axis}, param);
+
+        return p;
+    }
+};
+
+template struct test_softmax<0>;
+template struct test_softmax<1>;
+template struct test_softmax<2>;
+template struct test_softmax<3>;
+
 struct test_conv : verify_program<test_conv>
 {
    migraphx::program create_program() const
@@ -1250,22 +1270,6 @@ struct test_contiguous : verify_program<test_contiguous>
    }
 };

-struct test_eliminate_contiguous : verify_program<test_eliminate_contiguous>
-{
-    migraphx::program create_program() const
-    {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 5}};
-        auto seq = p.add_parameter("seq", s);
-        std::vector<int64_t> perm{0, 2, 1, 3};
-        auto tran_seq = p.add_instruction(migraphx::op::transpose{perm}, seq);
-        std::vector<int64_t> out_shape{0, 0, -1};
-        p.add_instruction(migraphx::op::reshape{out_shape}, tran_seq);
-
-        return p;
-    }
-};
-
 struct test_transpose : verify_program<test_transpose>
 {
    migraphx::program create_program() const
@@ -1326,6 +1330,17 @@ struct test_batchnorm_inference : verify_program<test_batchnorm_inference>
    }
 };

+struct test_clip : verify_program<test_clip>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto x = p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {3}});
+        p.add_instruction(migraphx::op::clip{6.0, 0.0}, x);
+        return p;
+    }
+};
+
 struct test_conv_bn : verify_program<test_conv_bn>
 {
    migraphx::program create_program() const
@@ -3330,7 +3345,6 @@ template struct test_logsoftmax<0>;
 template struct test_logsoftmax<1>;
 template struct test_logsoftmax<2>;
 template struct test_logsoftmax<3>;
-template struct test_logsoftmax<4>;

 template <int Axis>
 struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
@@ -3347,6 +3361,71 @@ struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
 };

 template struct test_logsoftmax_1<0>;
-template struct test_logsoftmax_1<1>;
+
+struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_parameter("p2", s);
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        migraphx::quantize(p, {"all"});
+        return p;
+    };
+};
+
+struct test_fp32_fp16_ladd : verify_program<test_fp32_fp16_ladd>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_parameter("p2", s);
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        migraphx::quantize(p, {"add"});
+        return p;
+    };
+};
+
+struct test_fp32_fp16_add : verify_program<test_fp32_fp16_add>
+{
+    migraphx::program create_program()
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+        migraphx::quantize(p, {"add"});
+
+        return p;
+    };
+};
+
+struct test_fp32_fp16_sub : verify_program<test_fp32_fp16_sub>
+{
+    migraphx::program create_program()
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+        migraphx::quantize(p, {"sub"});
+
+        return p;
+    };
+};

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/memory_coloring_test.cpp
+++ b/test/memory_coloring_test.cpp
@@ -18,6 +18,13 @@ struct memory_coloring_target
 struct allocate
 {
    migraphx::shape s{};
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::pack(f(self.s, "shape"));
+    }
+
    std::string name() const { return "allocate"; }
    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
    {

--- a/test/onnx/clip_test.onnx
+++ b/test/onnx/clip_test.onnx
--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
@@ -794,4 +794,14 @@ TEST_CASE(no_pad_test)
    EXPECT(p == prog);
 }

+TEST_CASE(clip_test)
+{
+    migraphx::program p;
+    auto l0 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {3}});
+    p.add_instruction(migraphx::op::clip{6.0, 0.0}, l0);
+    auto prog = migraphx::parse_onnx("clip_test.onnx");
+
+    EXPECT(p == prog);
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }