Merge branch 'ins_fp32_fp16' into int8_quantize

5656cc4e · Shucai Xiao · b93f5320 · 5f77b1e3 · 5656cc4e · 5656cc4e
Commit 5656cc4e authored May 13, 2019 by Shucai Xiao
18 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(migraphx
    generate.cpp
    instruction.cpp
    program.cpp
+    quantization.cpp
    shape.cpp
    schedule.cpp
    pass_manager.cpp

--- a/src/include/migraphx/op/binary.hpp
+++ b/src/include/migraphx/op/binary.hpp
@@ -24,11 +24,12 @@ struct binary : op_name<Derived>
            return {s0.type(), s0.lens()};
        }
    }
+
    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
-            if(input1.get_shape().standard() and input2.get_shape().standard())
+            if(input1.get_shape().packed() and input2.get_shape().packed())
            {
                std::transform(input1.begin(),
                               input1.end(),
@@ -44,6 +45,7 @@ struct binary : op_name<Derived>
                });
            }
        });
+
        return result;
    }
 };

--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
+#define MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
+
+#include <array>
+#include <migraphx/op/unary.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct convert : unary<convert>
+{
+    shape::type_t target_type = shape::half_type;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.target_type, "target_type"));
+    }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        return {target_type, inputs.at(0).lens(), inputs.at(0).strides()};
+    }
+
+    auto apply() const
+    {
+        return [](auto x) { return x; };
+    }
+
+    convert(shape::type_t t) : target_type{t} {}
+    convert() {}
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/unary.hpp
+++ b/src/include/migraphx/op/unary.hpp
@@ -23,25 +23,31 @@ struct unary : op_name<Derived>
            return {s.type(), s.lens()};
        }
    }
+
    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
-        visit_all(result, args[0])([&](auto output, auto input) {
-            if(input.get_shape().standard())
-            {
-                std::transform(input.begin(),
-                               input.end(),
-                               output.begin(),
-                               static_cast<const Derived&>(*this).apply());
-            }
-            else
-            {
+        result.visit([&](auto output) {
+            args[0].visit([&](auto input) {
+                if(input.get_shape().packed())
+                {
+                    std::transform(input.begin(),
+                                   input.end(),
+                                   output.begin(),
+                                   static_cast<const Derived&>(*this).apply());
+
+                    return result;
+                }
+
                shape_for_each(output.get_shape(), [&](const auto& idx) {
                    output(idx.begin(), idx.end()) =
                        static_cast<const Derived&>(*this).apply()(input(idx.begin(), idx.end()));
                });
-            }
+
+                return result;
+            });
        });
+
        return result;
    }
 };

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -15,6 +15,7 @@
 #include <migraphx/op/common.hpp>
 #include <migraphx/op/concat.hpp>
 #include <migraphx/op/contiguous.hpp>
+#include <migraphx/op/convert.hpp>
 #include <migraphx/op/convolution.hpp>
 #include <migraphx/op/cosh.hpp>
 #include <migraphx/op/cos.hpp>

--- a/src/include/migraphx/quantization.hpp
+++ b/src/include/migraphx/quantization.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_QUANTIZATION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_QUANTIZATION_HPP
+
+#include <string>
+#include <vector>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct program;
+
+void quantize(program& prog, const std::vector<std::string>& ins_names);
+void quantize(program& prog);
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/opt/memory_coloring_impl.cpp
+++ b/src/opt/memory_coloring_impl.cpp
@@ -177,7 +177,7 @@ void memory_coloring_impl::build()
 void memory_coloring_impl::rewrite()
 {
    std::vector<std::size_t> dims;
-    dims.push_back(required_bytes / sizeof(float));
+    dims.push_back((required_bytes + sizeof(float) - 1) / sizeof(float));
    shape s                       = {shape::float_type, dims};
    instruction_ref scratch_param = p_program->add_parameter("scratch", s);
    for(auto ins : iterator_for(*p_program))

--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -2,6 +2,7 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <migraphx/program.hpp>
+#include <migraphx/quantization.hpp>
 #include <migraphx/generate.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/stringutils.hpp>
@@ -181,6 +182,10 @@ PYBIND11_MODULE(migraphx, m)
    });

    m.def("generate_argument", &migraphx::generate_argument, py::arg("s"), py::arg("seed") = 0);
+    m.def("quantize", [](migraphx::program& p, std::vector<std::string>& ins_names) {
+        migraphx::quantize(p, ins_names);
+    });
+    m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); });

 #ifdef HAVE_GPU
    m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
+#include <migraphx/quantization.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/op/convert.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/ranges.hpp>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+instruction_ref insert_fp16(program& prog,
+                            instruction_ref& ins,
+                            shape::type_t type,
+                            std::unordered_map<instruction_ref, instruction_ref>& map_fp16)
+{
+    if(map_fp16.count(ins) > 0)
+    {
+        return map_fp16[ins];
+    }
+
+    assert(ins->get_shape().type() == shape::float_type ||
+           ins->get_shape().type() == shape::double_type);
+    instruction_ref ins_fp16{};
+    ins_fp16      = prog.insert_instruction(std::next(ins), op::convert{type}, ins);
+    map_fp16[ins] = ins_fp16;
+
+    return ins_fp16;
+}
+
+void quantize(program& prog, const std::vector<std::string>& ins_names)
+{
+    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
+    for(auto ins : iterator_for(prog))
+    {
+        // all indicates every instruction is converted
+        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
+        {
+            continue;
+        }
+
+        shape::type_t orig_type = ins->get_shape().type();
+        // process all inputs, if input is a fp32 or fp64, convert it
+        // to a fp16 by adding a convert operator.
+        auto inputs = ins->inputs();
+        std::vector<instruction_ref> converted_inputs;
+        for(auto input : inputs)
+        {
+            auto s = input->get_shape();
+            if(s.type() == shape::float_type || s.type() == shape::double_type)
+            {
+                // if the input is a convert operator, uses its input
+                // as its current input
+                instruction_ref input_fp16{};
+                if(input->name() == "convert")
+                {
+                    input_fp16 = input->inputs().front();
+                }
+                else
+                {
+                    input_fp16 = insert_fp16(prog, input, shape::half_type, map_fp16);
+                }
+                converted_inputs.push_back(input_fp16);
+            }
+            else
+            {
+                converted_inputs.push_back(input);
+            }
+        }
+
+        // no change for the input, go to the next instruction
+        if(inputs == converted_inputs)
+        {
+            continue;
+        }
+
+        auto op        = ins->get_operator();
+        auto ins_shape = compute_shape(op, converted_inputs);
+        if(ins_shape.type() != orig_type)
+        {
+            // insert another convert instruction to convert it back
+            if(ins == std::prev(prog.end()))
+            {
+                prog.add_instruction(op::convert{orig_type}, ins);
+            }
+            else
+            {
+                // check the dead code case to avoid assert
+                bool output_empty = ins->outputs().empty();
+                auto ins_orig_type =
+                    prog.insert_instruction(std::next(ins), op::convert{orig_type}, ins);
+                if(!output_empty)
+                {
+                    prog.replace_instruction(ins, ins_orig_type);
+                }
+            }
+        }
+
+        prog.replace_instruction(ins, op, converted_inputs);
+    }
+}
+
+void quantize(program& prog) { quantize(prog, {"all"}); }
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(migraphx_device
    device/add_relu.cpp
    device/contiguous.cpp
    device/logsoftmax.cpp
+    device/convert.cpp
    device/mul.cpp
    device/concat.cpp
    device/pad.cpp

--- a/src/targets/gpu/adjust_allocation.cpp
+++ b/src/targets/gpu/adjust_allocation.cpp
@@ -2,7 +2,6 @@
 #include <migraphx/instruction.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/iterator_for.hpp>
-#include <algorithm>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/device/convert.cpp
+++ b/src/targets/gpu/device/convert.cpp
+#include <migraphx/gpu/device/convert.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void convert(hipStream_t stream, const argument& result, const argument& arg)
+{
+    result.visit([&](auto output) {
+        arg.visit([&](auto input) {
+            const auto* input_ptr = device_cast(input.data());
+            auto* output_ptr      = device_cast(output.data());
+            gs_launch(stream,
+                      result.get_shape().elements())([=](auto i) { output_ptr[i] = input_ptr[i]; });
+        });
+    });
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/convert.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convert.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CONVERT_HPP
+
+#include <migraphx/shape.hpp>
+#include <migraphx/op/convert.hpp>
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/convert.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct hip_convert : unary_device<hip_convert, device::convert>
+{
+    op::convert op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
+    hip_convert(op::convert oper) : op(oper) {}
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        inputs.pop_back();
+        check_shapes{inputs}.packed();
+        return op.compute_shape(inputs);
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/convert.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/convert.hpp
+
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CONVERT_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CONVERT_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void convert(hipStream_t stream, const argument& result, const argument& arg);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -45,6 +45,7 @@
 #include <migraphx/gpu/pad.hpp>
 #include <migraphx/gpu/gather.hpp>
 #include <migraphx/gpu/lrn.hpp>
+#include <migraphx/gpu/convert.hpp>
 #include <migraphx/gpu/clip.hpp>
 #include <utility>
 #include <functional>
@@ -102,6 +103,7 @@ struct miopen_apply
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");
+        add_extend_op<hip_convert, op::convert>("convert");
        add_extend_op<hip_clip, op::clip>("clip");

        add_lrn_op();

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -3,6 +3,7 @@
 #include <migraphx/literal.hpp>
 #include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
+#include <migraphx/quantization.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/verify.hpp>
 #include <migraphx/onnx.hpp>
@@ -1557,6 +1558,34 @@ TEST_CASE(fp16_test)
    EXPECT(migraphx::verify_range(results_vector, gold));
 }

+TEST_CASE(fp32_fp16_test)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        return p;
+    };
+
+    auto test_case = [&](std::vector<std::string>&& op_names) {
+        std::vector<float> gold_res = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0};
+        auto p                      = create_program();
+        migraphx::quantize(p, op_names);
+        p.compile(migraphx::cpu::target{});
+        auto result = p.eval({});
+        std::vector<float> res;
+        result.visit([&](auto output) { res.assign(output.begin(), output.end()); });
+        EXPECT(migraphx::verify_range(res, gold_res));
+    };
+
+    test_case({"all"});
+    test_case({"add"});
+}
+
 TEST_CASE(clip_test)
 {
    migraphx::program p;

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -10,6 +10,7 @@
 #include <migraphx/type_name.hpp>
 #include <migraphx/verify_args.hpp>
 #include <migraphx/instruction.hpp>
+#include <migraphx/quantization.hpp>

 #include <miopen/miopen.h>

@@ -3360,4 +3361,70 @@ struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
 template struct test_logsoftmax_1<0>;
 template struct test_logsoftmax_1<1>;

+struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_parameter("p2", s);
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        migraphx::quantize(p, {"all"});
+        return p;
+    };
+};
+
+struct test_fp32_fp16_ladd : verify_program<test_fp32_fp16_ladd>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_parameter("p2", s);
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+        migraphx::quantize(p, {"add"});
+        return p;
+    };
+};
+
+struct test_fp32_fp16_add : verify_program<test_fp32_fp16_add>
+{
+    migraphx::program create_program()
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+        migraphx::quantize(p, {"add"});
+
+        return p;
+    };
+};
+
+struct test_fp32_fp16_sub : verify_program<test_fp32_fp16_sub>
+{
+    migraphx::program create_program()
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+        migraphx::quantize(p, {"sub"});
+
+        return p;
+    };
+};
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/type_conversion.cpp
+++ b/test/type_conversion.cpp
+#include <iostream>
+#include <vector>
+#include <migraphx/literal.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/verify.hpp>
+#include <migraphx/quantization.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/propagate_constant.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/onnx.hpp>
+#include "test.hpp"
+#include <migraphx/half.hpp>
+
+TEST_CASE(param_add)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1 = p.add_parameter("x", s);
+        auto p2 = p.add_parameter("y", s);
+        p.add_instruction(migraphx::op::add{}, p1, p2);
+
+        return p;
+    };
+
+    auto create_program_half = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
+        auto hs  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize(p1);
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize(p1, {"add"});
+        EXPECT(p1 == p2);
+    }
+}
+
+TEST_CASE(param_add_sub)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+
+        return p;
+    };
+
+    auto create_program_half_add = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(
+            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        auto sum   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
+        auto diff  = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        auto hdiff = p.add_instruction(
+            migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
+        auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);
+
+        return p;
+    };
+
+    auto create_program_half_sub = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto sum   = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto hsum  = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
+        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
+        auto diff  = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+
+        return p;
+    };
+
+    auto create_program_half_all = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(
+            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
+        auto hres  = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_add();
+
+        migraphx::quantize(p1, {"add"});
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_sub();
+
+        migraphx::quantize(p1, {"sub"});
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_all();
+
+        migraphx::quantize(p1);
+        migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
+
+        EXPECT(p1 == p2);
+    }
+}
+
+TEST_CASE(literal_add)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+
+        return p;
+    };
+
+    auto create_program_half = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::half_type, {2, 3}};
+        std::vector<migraphx::half> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize(p1, {"all"});
+        migraphx::run_passes(p1,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        migraphx::run_passes(p2,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize(p1, {"add"});
+        migraphx::run_passes(p1,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        migraphx::run_passes(p2,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        EXPECT(p1 == p2);
+    }
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }