Merge pull request #348 from ROCmSoftwarePlatform/int8_quantize

Int8 quantize

Merge pull request #348 from ROCmSoftwarePlatform/int8_quantize
Int8 quantize
debed772 · mvermeulen · GitHub · b5ba22ae · a8e33f9f · debed772
Unverified Commit debed772 authored Sep 04, 2019 by mvermeulen Committed by GitHub Sep 04, 2019
14 changed files
--- a/src/include/migraphx/quantization.hpp
+++ b/src/include/migraphx/quantization.hpp
@@ -6,23 +6,43 @@
 #include <migraphx/instruction_ref.hpp>
 #include <migraphx/operation.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/target.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/env.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 struct program;

-void quantize(program& prog, const std::vector<std::string>& ins_names);
-void quantize(program& prog);
+void quantize_fp16(program& prog, const std::vector<std::string>& ins_names = {"all"});

 // insert the capture operator for the inputs of each operator to be quantized
 // to int8
 std::size_t capture_arguments(program& prog,
                              const std::vector<std::string>& ins_names,
                              const std::function<void(std::size_t, std::vector<argument>)>& func);
+
+std::shared_ptr<std::vector<std::pair<float, float>>>
+capture_arguments_impl(program& prog, const target& t, const std::vector<std::string>& ins_names);
+
+template <class T>
 std::shared_ptr<std::vector<std::pair<float, float>>>
-capture_arguments(program& prog, const std::vector<std::string>& ins_names);
-std::shared_ptr<std::vector<std::pair<float, float>>> capture_arguments(program& prog);
+capture_arguments(program& prog, T&& t, const std::vector<std::string>& ins_names)
+{
+    static_assert(std::is_same<std::remove_cv_t<std::remove_reference_t<T>>, target>{} &&
+                      std::is_lvalue_reference<T>{},
+                  "Dangling reference to target!");
+    return capture_arguments_impl(prog, t, ins_names);
+}
+
+void quantize_int8(program& prog,
+                   const target& t,
+                   std::vector<program::parameter_map>& calibration,
+                   const std::vector<std::string>& ins_names = {"dot", "convolution"});
+void quantize_int8_impl(program& prog,
+                        const std::vector<std::pair<float, float>>& quant_params,
+                        const std::vector<std::string>& ins_names);

 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/target.hpp
+++ b/src/include/migraphx/target.hpp
@@ -11,6 +11,8 @@
 #include <migraphx/context.hpp>
 #include <migraphx/pass.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/rank.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -34,10 +36,86 @@ struct target
     * @return The context to be used during compilation and execution.
     */
    context get_context() const;
+    /**
+     * @brief copy an argument to the current target.
+     *
+     * @param arg Input argument to be copied to the target
+     * @return Argument in the target.
+     */
+    argument copy_to(const argument& arg) const;
+    /**
+     * @brief copy an argument from the current target.
+     *
+     * @param arg Input argument to be copied from the target
+     * @return Argument in the host.
+     */
+    argument copy_from(const argument& arg) const;
+    /**
+     * @brief Allocate an argument based on the input shape
+     *
+     * @param s Shape of the argument to be allocated in the target
+     * @return Allocated argument in the target.
+     */
+    argument allocate(const shape& s) const;
 };

 #else

+template <class T>
+auto target_allocate(rank<1>, T& x, const shape& s) -> decltype(x.allocate(s))
+{
+    return x.allocate(s);
+}
+
+template <class T>
+argument target_allocate(rank<0>, T& x, const shape&)
+{
+    std::string name = x.name();
+    MIGRAPHX_THROW("Not computable: " + name);
+}
+
+template <class T>
+argument target_allocate(T& x, const shape& s)
+{
+    return target_allocate(rank<1>{}, x, s);
+}
+
+template <class T>
+auto copy_to_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_to(arg))
+{
+    return x.copy_to(arg);
+}
+
+template <class T>
+argument copy_to_target(rank<0>, T&, const argument& arg)
+{
+    return arg;
+}
+
+template <class T>
+argument copy_to_target(T& x, const argument& arg)
+{
+    return copy_to_target(rank<1>{}, x, arg);
+}
+
+template <class T>
+auto copy_from_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_from(arg))
+{
+    return x.copy_from(arg);
+}
+
+template <class T>
+argument copy_from_target(rank<0>, T&, const argument& arg)
+{
+    return arg;
+}
+
+template <class T>
+argument copy_from_target(T& x, const argument& arg)
+{
+    return copy_from_target(rank<1>{}, x, arg);
+}
+
 /*
 * Type-erased interface for:
 *
@@ -46,6 +124,9 @@ struct target
 *      std::string name() const;
 *      std::vector<pass> get_passes(context& ctx) const;
 *      context get_context() const;
+ *      argument copy_to(const argument& input) const;
+ *      argument copy_from(const argument& input) const;
+ *      argument allocate(const shape& s) const;
 * };
 *
 */
@@ -125,6 +206,24 @@ struct target
        return (*this).private_detail_te_get_handle().get_context();
    }

+    argument copy_to(const argument& input) const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().copy_to(input);
+    }
+
+    argument copy_from(const argument& input) const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().copy_from(input);
+    }
+
+    argument allocate(const shape& s) const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().allocate(s);
+    }
+
    friend bool is_shared(const target& private_detail_x, const target& private_detail_y)
    {
        return private_detail_x.private_detail_te_handle_mem_var ==
@@ -141,6 +240,9 @@ struct target
        virtual std::string name() const                         = 0;
        virtual std::vector<pass> get_passes(context& ctx) const = 0;
        virtual context get_context() const                      = 0;
+        virtual argument copy_to(const argument& input) const    = 0;
+        virtual argument copy_from(const argument& input) const  = 0;
+        virtual argument allocate(const shape& s) const          = 0;
    };

    template <typename PrivateDetailTypeErasedT>
@@ -181,6 +283,24 @@ struct target

        context get_context() const override { return private_detail_te_value.get_context(); }

+        argument copy_to(const argument& input) const override
+        {
+
+            return copy_to_target(private_detail_te_value, input);
+        }
+
+        argument copy_from(const argument& input) const override
+        {
+
+            return copy_from_target(private_detail_te_value, input);
+        }
+
+        argument allocate(const shape& s) const override
+        {
+
+            return target_allocate(private_detail_te_value, s);
+        }
+
        PrivateDetailTypeErasedT private_detail_te_value;
    };


--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -183,10 +183,16 @@ PYBIND11_MODULE(migraphx, m)
    });

    m.def("generate_argument", &migraphx::generate_argument, py::arg("s"), py::arg("seed") = 0);
-    m.def("quantize", [](migraphx::program& p, std::vector<std::string>& ins_names) {
-        migraphx::quantize(p, ins_names);
-    });
-    m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); });
+    m.def("quantize_fp16",
+          &migraphx::quantize_fp16,
+          py::arg("prog"),
+          py::arg("ins_names") = std::vector<std::string>{"all"});
+    m.def("quantize_int8",
+          &migraphx::quantize_int8,
+          py::arg("prog"),
+          py::arg("t"),
+          py::arg("calibration") = std::vector<migraphx::program::parameter_map>{},
+          py::arg("ins_names")   = std::vector<std::string>{"dot", "convolution"});

 #ifdef HAVE_GPU
    m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -3,6 +3,8 @@
 #include <migraphx/instruction.hpp>
 #include <migraphx/iterator_for.hpp>
 #include <migraphx/op/convert.hpp>
+#include <migraphx/op/clip.hpp>
+#include <migraphx/op/round.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/mul.hpp>
 #include <migraphx/op/add.hpp>
@@ -13,17 +15,24 @@
 #include <migraphx/op/multibroadcast.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/ranges.hpp>
+#include <migraphx/target.hpp>
 #include <utility>
+#include <set>
 #include <iomanip>
 #include <fstream>
+#include <algorithm>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_INT8_QUANTIZATION_PARAMS)
+
 instruction_ref insert_quant_ins(program& prog,
                                 instruction_ref& ins,
                                 shape::type_t type,
-                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins)
+                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins,
+                                 float scale = 1.0f,
+                                 float shift = 0.0f)
 {
    if(map_ins.count(ins) > 0)
    {
@@ -35,11 +44,52 @@ instruction_ref insert_quant_ins(program& prog,
        return ins;
    }

-    assert(ins->get_shape().type() == shape::float_type ||
-           ins->get_shape().type() == shape::double_type ||
-           ins->get_shape().type() == shape::int32_type);
+    assert(ins->get_shape().type() == shape::float_type or
+           ins->get_shape().type() == shape::double_type or
+           ins->get_shape().type() == shape::int32_type or
+           ins->get_shape().type() == shape::half_type);
    instruction_ref quant_ins{};
-    quant_ins    = prog.insert_instruction(std::next(ins), op::convert{type}, ins);
+    auto insert_loc = std::next(ins);
+    if(type == shape::int8_type)
+    {
+        auto scaled_ins = ins;
+        if(scale != 1.0f)
+        {
+            auto float_ins = scaled_ins;
+            if(scaled_ins->get_shape().type() != shape::float_type)
+            {
+                float_ins =
+                    prog.insert_instruction(insert_loc, op::convert{shape::float_type}, scaled_ins);
+            }
+            std::vector<float> vec_scale(scaled_ins->get_shape().elements(), scale);
+            auto l_scale = prog.add_literal(literal(float_ins->get_shape(), vec_scale));
+            scaled_ins   = prog.insert_instruction(insert_loc, op::mul{}, l_scale, float_ins);
+        }
+
+        auto shifted_ins = scaled_ins;
+        if(shift != 0.0f)
+        {
+            auto float_ins = shifted_ins;
+            if(shifted_ins->get_shape().type() != shape::float_type)
+            {
+                float_ins = prog.insert_instruction(
+                    insert_loc, op::convert{shape::float_type}, shifted_ins);
+            }
+            std::vector<float> vec_shift(shifted_ins->get_shape().elements(), shift);
+            auto l_shift = prog.add_literal(literal(float_ins->get_shape(), vec_shift));
+            shifted_ins  = prog.insert_instruction(insert_loc, op::add{}, l_shift, float_ins);
+        }
+
+        auto rounded_ins = prog.insert_instruction(insert_loc, op::round{}, shifted_ins);
+        auto clipped_ins =
+            prog.insert_instruction(insert_loc, op::clip{127.0f, -128.0f}, rounded_ins);
+        quant_ins = prog.insert_instruction(insert_loc, op::convert{type}, clipped_ins);
+    }
+    else
+    {
+        quant_ins = prog.insert_instruction(insert_loc, op::convert{type}, ins);
+    }
+
    map_ins[ins] = quant_ins;

    return quant_ins;
@@ -50,7 +100,7 @@ instruction_ref insert_quant_ins(program& prog,
 // For the conversion, there could be cases of overflowing, but it
 // is very rare in the area of deeping learning, so we just do a
 // truncate of the input to get the fp16.
-void quantize(program& prog, const std::vector<std::string>& ins_names)
+void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
 {
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
    for(auto ins : iterator_for(prog))
@@ -115,7 +165,288 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
    }
 }

-void quantize(program& prog) { quantize(prog, {"all"}); }
+static void ins_quantize_int8(program& prog,
+                              instruction_ref ins,
+                              std::vector<instruction_ref>& converted_inputs,
+                              const std::vector<std::pair<float, float>>& ins_quant_params)
+{
+    auto orig_type = ins->get_shape().type();
+    auto inputs    = ins->inputs();
+    if(ins->name() == "dot")
+    {
+        auto dot_op     = any_cast<op::dot>(ins->get_operator());
+        float new_alpha = dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
+        float new_beta  = dot_op.beta;
+        // We need additional checking about the quant_alpha value. If
+        // abs(quant_alpha) > 50 (some tmp value set here), we can convert
+        // it to an integer as the new_alpha in the quant_dot
+        float threshold = 50.0f;
+        if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
+        {
+            int32_t quant_alpha = static_cast<int32_t>(std::round(new_alpha));
+            int32_t quant_beta  = static_cast<int32_t>(std::round(new_beta));
+            if(shape::int32_type == orig_type)
+            {
+                prog.replace_instruction(
+                    ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
+            }
+            else
+            {
+                auto quant_dot = prog.insert_instruction(
+                    ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
+                prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
+            }
+        }
+        // either alpha or beta cannot be quantized because of too big
+        // relative rounding error
+        else
+        {
+            if(converted_inputs.size() == 3)
+            {
+                converted_inputs.pop_back();
+            }
+            auto q_dot   = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
+            auto f_dot   = prog.insert_instruction(ins, op::convert{shape::float_type}, q_dot);
+            auto c_shape = q_dot->get_shape();
+            std::vector<float> vec_alpha(c_shape.elements(), new_alpha);
+            auto l_alpha =
+                prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_alpha));
+
+            if(inputs.size() == 3 and dot_op.beta != 0.0f)
+            {
+                auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, f_dot);
+                std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
+                auto l_beta =
+                    prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
+                instruction_ref beta_c{};
+                if(orig_type != shape::float_type)
+                {
+                    auto fp32_c =
+                        prog.insert_instruction(ins, op::convert{shape::float_type}, inputs.back());
+                    beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
+                }
+                else
+                {
+                    beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
+                }
+
+                if(orig_type == shape::float_type)
+                {
+                    prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
+                }
+                else
+                {
+                    auto f_res = prog.insert_instruction(ins, op::add{}, alpha_ab, beta_c);
+                    prog.replace_instruction(ins, op::convert{orig_type}, f_res);
+                }
+            }
+            else
+            {
+                if(orig_type == shape::float_type)
+                {
+                    prog.replace_instruction(ins, op::mul{}, l_alpha, f_dot);
+                }
+                else
+                {
+                    auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, f_dot);
+                    prog.replace_instruction(ins, op::convert{orig_type}, alpha_ab);
+                }
+            }
+        }
+    }
+    else if(ins->name() == "convolution")
+    {
+        // Current MIOpen convolution does not support alpha and beta,
+        // so we need a separate multiply to adjust the output
+        auto conv_op      = any_cast<op::convolution>(ins->get_operator());
+        auto padding      = conv_op.padding;
+        auto stride       = conv_op.stride;
+        auto dilation     = conv_op.dilation;
+        auto padding_mode = conv_op.padding_mode;
+        auto group        = conv_op.group;
+        auto adjust_factor =
+            std::round(1.0f / (ins_quant_params[0].first * ins_quant_params[1].first));
+
+        auto quant_conv = prog.insert_instruction(
+            ins,
+            op::quant_convolution{padding, stride, dilation, padding_mode, group},
+            converted_inputs);
+        float threshold = 50.0f;
+        std::vector<float> vec_factor(quant_conv->get_shape().elements(), adjust_factor);
+        if(quant_conv->get_shape().type() == orig_type and adjust_factor >= threshold)
+        {
+            auto l_factor = prog.add_literal(
+                literal(quant_conv->get_shape(), vec_factor.begin(), vec_factor.end()));
+            prog.replace_instruction(ins, op::mul{}, quant_conv, l_factor);
+        }
+        // convert quant_conv output to float type, multiply the factor and
+        // conver back to original type
+        else
+        {
+            auto float_conv =
+                prog.insert_instruction(ins, op::convert{shape::float_type}, quant_conv);
+            auto l_factor = prog.add_literal(literal(float_conv->get_shape(), vec_factor));
+            if(orig_type == shape::float_type)
+            {
+                prog.replace_instruction(ins, op::mul{}, l_factor, float_conv);
+            }
+            else
+            {
+                auto adjusted_conv = prog.insert_instruction(ins, op::mul{}, l_factor, float_conv);
+                prog.replace_instruction(ins, op::convert{orig_type}, adjusted_conv);
+            }
+        }
+    }
+    else
+    {
+        MIGRAPHX_THROW("QUANTIZE_INT8: does not support operator " + ins->name());
+    }
+}
+
+// int8 quantization is different from fp16 since int8 can only handle value
+// -128 ~ 127. To convert the float or double to int8, we need a scale and
+// a shift, then the convert can be done as v_int8 = fp * scale + shift.
+// To simplify the changes, we consider shift as 0.0f for now.
+void quantize_int8_impl(program& prog,
+                        const std::vector<std::pair<float, float>>& quant_params,
+                        const std::vector<std::string>& ins_names)
+{
+    if(enabled(MIGRAPHX_INT8_QUANTIZATION_PARAMS{}))
+    {
+        for(std::size_t i = 0; i < quant_params.size(); ++i)
+        {
+            auto param = quant_params.at(i);
+            std::cout << "ins_index = " << i << ", scale = " << param.first
+                      << ", shift = " << param.second << std::endl;
+        }
+        std::cout << std::endl;
+    }
+
+    // For now, we only support the int8 quantization of gemm and convolution
+    std::set<std::string> op_names = {"convolution", "dot"};
+    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
+    if(!std::includes(
+           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
+    {
+        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
+    }
+
+    std::size_t quant_param_index = 0;
+    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
+    std::unordered_map<instruction_ref, std::size_t> map_ins_index;
+    for(auto ins : iterator_for(prog))
+    {
+        if(not contains(ins_names, ins->name()))
+        {
+            continue;
+        }
+
+        // for the dot operator, there could be 2 or 3 input arguments
+        // if the 3rd argument is available, convert it to an int32.
+        std::vector<instruction_ref> converted_inputs;
+
+        // process all inputs, if input is a fp32 or fp64, convert it
+        // to a int8 type by adding a convert operator and replace
+        // the operator with the corresponding int8 version
+        auto inputs = ins->inputs();
+        std::vector<std::pair<float, float>> ins_quant_params;
+        for(auto input : inputs)
+        {
+            // calculate the index of each instruction to be quantized
+            std::size_t ins_index =
+                (map_ins_index.count(input) > 0) ? map_ins_index[input] : quant_param_index++;
+            map_ins_index[input] = ins_index;
+
+            auto param = quant_params[map_ins_index[input]];
+            ins_quant_params.push_back(param);
+
+            // In general, the target_type is int8, but for the dot
+            // operation, if it has 3 inputs, then the last one should
+            // be converted to int32_type
+            shape::type_t quant_type = shape::int8_type;
+            if((ins->name() == "dot") and (inputs.size() == 3) and (input == inputs.back()))
+            {
+                quant_type = shape::int32_type;
+            }
+
+            auto s = input->get_shape();
+            if((s.type() == shape::float_type or s.type() == shape::double_type or
+                s.type() == shape::half_type or s.type() == shape::int32_type) and
+               s.type() != quant_type)
+            {
+                // if the input is a convert operator, uses its input
+                // as its current input
+                instruction_ref quant_input{};
+                if(input->name() == "convert" and
+                   input->inputs().front()->get_shape().type() == quant_type)
+                {
+                    quant_input = input->inputs().front();
+                    // the scale in this case is not used, so tune the scale
+                    // to 1.0f for this parameter
+                    ins_quant_params.back() = std::pair<float, float>(1.0f, 0.0f);
+                }
+                else
+                {
+                    quant_input = insert_quant_ins(
+                        prog, input, quant_type, map_quant_ins, param.first, param.second);
+                }
+                converted_inputs.push_back(quant_input);
+            }
+            else
+            {
+                converted_inputs.push_back(input);
+            }
+        }
+
+        // no change for the input, go to the next instruction
+        if(inputs == converted_inputs)
+        {
+            continue;
+        }
+
+        ins_quantize_int8(prog, ins, converted_inputs, ins_quant_params);
+    }
+
+    if(quant_param_index != quant_params.size())
+    {
+        MIGRAPHX_THROW("QUANTIZE_INT8: number of scales does not match");
+    }
+}
+
+void quantize_int8(program& prog,
+                   const target& t,
+                   std::vector<program::parameter_map>& calibration,
+                   const std::vector<std::string>& ins_names)
+{
+    // insert capture operator
+    auto cap_prog          = prog;
+    auto int8_quant_params = capture_arguments(cap_prog, t, ins_names);
+
+    // use the calibration data to compute the quantization scale
+    cap_prog.compile(t);
+
+    // use all calibration data to run the program to calculate the
+    // quantization scale and shift
+    for(auto&& arg : calibration)
+    {
+        program::parameter_map m;
+        for(auto&& x : cap_prog.get_parameter_shapes())
+        {
+            if(arg.count(x.first) > 0)
+            {
+                assert(x.second == arg[x.first].get_shape());
+                m[x.first] = t.copy_to(arg[x.first]);
+            }
+            else
+            {
+                m[x.first] = t.allocate(x.second);
+            }
+        }
+        cap_prog.eval(m);
+    }
+
+    quantize_int8_impl(prog, *int8_quant_params, ins_names);
+}

 // For the input of each input argument, we need to insert a
 // capture operator to compute the scale and shift
@@ -126,10 +457,10 @@ std::size_t capture_arguments(program& prog,

    size_t num_quant_params = 0;
    // the int8 quantization only support dot and convolution
-    std::vector<std::string> op_names = {"dot", "convolution"};
-    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
-           return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
-       }))
+    std::set<std::string> op_names = {"dot", "convolution"};
+    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
+    if(!std::includes(
+           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
    {
        MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
    }
@@ -166,26 +497,35 @@ std::size_t capture_arguments(program& prog,
 }

 std::shared_ptr<std::vector<std::pair<float, float>>>
-capture_arguments(program& prog, const std::vector<std::string>& ins_names)
+capture_arguments_impl(program& prog, const target& t, const std::vector<std::string>& ins_names)
 {
    std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
        std::make_shared<std::vector<std::pair<float, float>>>();
    std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();

-    auto calc_quant_params = [int8_quant_params, max_abs_vals](
-                                 std::size_t ins_index, std::vector<migraphx::argument> args) {
+    auto calc_quant_params = [int8_quant_params, max_abs_vals, &t](std::size_t ins_index,
+                                                                   std::vector<argument> args) {
        std::pair<float, float> param_pair{64.0f, 0.0f};

        // scale and shift is need for only int8 type, and we do not
        // consider shift, so set shift to 0
        std::vector<float> vec_val;
-        args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
+        argument arg = t.copy_from(args.front());
+        arg.visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
        auto max_val                = *std::max_element(vec_val.begin(), vec_val.end());
        auto min_val                = *std::min_element(vec_val.begin(), vec_val.end());
        auto max_abs                = std::max(std::fabs(max_val), std::fabs(min_val));
        max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);

-        param_pair.first                 = 127.0f / max_abs_vals->at(ins_index);
+        // if all values are 0, no need to do scaling
+        if(max_abs_vals->at(ins_index) == 0.0f)
+        {
+            param_pair.first = 1.0f;
+        }
+        else
+        {
+            param_pair.first = 127.0f / max_abs_vals->at(ins_index);
+        }
        int8_quant_params->at(ins_index) = param_pair;
    };

@@ -197,11 +537,5 @@ capture_arguments(program& prog, const std::vector<std::string>& ins_names)
    return int8_quant_params;
 }

-std::shared_ptr<std::vector<std::pair<float, float>>> capture_arguments(program& prog)
-{
-    std::vector<std::string> ins_names = {"dot", "convolution"};
-    return capture_arguments(prog, ins_names);
-}
-
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/cpu/include/migraphx/cpu/target.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/target.hpp
@@ -15,6 +15,10 @@ struct target
    std::string name() const;
    std::vector<pass> get_passes(migraphx::context& ctx) const;
    migraphx::context get_context() const { return context{}; }
+
+    argument copy_to(const argument& arg) const { return arg; }
+    argument copy_from(const argument& arg) const { return arg; }
+    argument allocate(const shape& s) const;
 };

 } // namespace cpu

--- a/src/targets/cpu/target.cpp
+++ b/src/targets/cpu/target.cpp
@@ -5,6 +5,7 @@
 #include <migraphx/auto_contiguous.hpp>
 #include <migraphx/rewrite_rnn.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/generate.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -22,6 +23,8 @@ std::vector<pass> target::get_passes(migraphx::context&) const
            dead_code_elimination{}};
 }

+argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
+
 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/target.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/target.hpp
@@ -13,6 +13,10 @@ struct target
    std::string name() const;
    std::vector<pass> get_passes(migraphx::context& gctx) const;
    migraphx::context get_context() const;
+
+    argument copy_to(const argument& arg) const;
+    argument copy_from(const argument& arg) const;
+    argument allocate(const shape& s) const;
 };

 } // namespace gpu

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -85,6 +85,13 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
 std::string target::name() const { return "miopen"; }

 migraphx::context target::get_context() const { return context{}; }
+
+argument target::copy_to(const argument& arg) const { return gpu::to_gpu(arg); }
+
+argument target::copy_from(const argument& arg) const { return gpu::from_gpu(arg); }
+
+argument target::allocate(const shape& s) const { return gpu::allocate_gpu(s); }
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1821,7 +1821,7 @@ TEST_CASE(fp32_fp16_test)
    auto test_case = [&](std::vector<std::string>&& op_names) {
        std::vector<float> gold_res = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0};
        auto p                      = create_program();
-        migraphx::quantize(p, op_names);
+        migraphx::quantize_fp16(p, op_names);
        p.compile(migraphx::cpu::target{});
        auto result = p.eval({});
        std::vector<float> res;
@@ -2067,7 +2067,8 @@ TEST_CASE(op_capture)
    p.add_instruction(migraphx::op::dot{}, pa, ps);

    migraphx::program capture_p = p;
-    migraphx::capture_arguments(capture_p);
+    migraphx::target t          = migraphx::cpu::target{};
+    migraphx::capture_arguments(capture_p, t, {"dot"});

    p.compile(migraphx::cpu::target{});
    capture_p.compile(migraphx::cpu::target{});

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -3694,7 +3694,7 @@ struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
        auto l1 = p.add_literal(migraphx::literal(s, data));
        auto l2 = p.add_parameter("p2", s);
        p.add_instruction(migraphx::op::add{}, l1, l2);
-        migraphx::quantize(p, {"all"});
+        migraphx::quantize_fp16(p, {"all"});
        return p;
    };
 };
@@ -3710,7 +3710,7 @@ struct test_fp32_fp16_ladd : verify_program<test_fp32_fp16_ladd>
        auto l1 = p.add_literal(migraphx::literal(s, data));
        auto l2 = p.add_parameter("p2", s);
        p.add_instruction(migraphx::op::add{}, l1, l2);
-        migraphx::quantize(p, {"add"});
+        migraphx::quantize_fp16(p, {"add"});
        return p;
    };
 };
@@ -3726,7 +3726,7 @@ struct test_fp32_fp16_add : verify_program<test_fp32_fp16_add>
        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
        p.add_instruction(migraphx::op::add{}, diff, p1);
-        migraphx::quantize(p, {"add"});
+        migraphx::quantize_fp16(p, {"add"});

        return p;
    };
@@ -3743,7 +3743,7 @@ struct test_fp32_fp16_sub : verify_program<test_fp32_fp16_sub>
        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
        p.add_instruction(migraphx::op::add{}, diff, p1);
-        migraphx::quantize(p, {"sub"});
+        migraphx::quantize_fp16(p, {"sub"});

        return p;
    };
@@ -3851,11 +3851,12 @@ struct test_round : verify_program<test_round>
    migraphx::program create_program() const
    {
        migraphx::program p;
+
        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 6}};
        auto param = p.add_parameter("x", s);
        p.add_instruction(migraphx::op::round{}, param);
        return p;
-    }
+    };
 };

 struct test_convert : verify_program<test_convert>

--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
+#include <iostream>
+#include <vector>
+#include <migraphx/operators.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/quantization.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/verify.hpp>
+#include <migraphx/quantization.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/propagate_constant.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/onnx.hpp>
+#include "test.hpp"
+#include <migraphx/half.hpp>
+
+TEST_CASE(target_copy)
+{
+    auto run_prog = [](migraphx::program p,
+                       const migraphx::target& t,
+                       migraphx::program::parameter_map& m_in,
+                       std::vector<float>& res) {
+        p.compile(t);
+        migraphx::program::parameter_map m;
+        for(auto&& x : p.get_parameter_shapes())
+        {
+            if(m_in.count(x.first) > 0)
+            {
+                m[x.first] = t.copy_to(m_in[x.first]);
+            }
+            else
+            {
+                m[x.first] = t.allocate(x.second);
+            }
+        }
+
+        auto result = t.copy_from(p.eval(m));
+        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
+    };
+
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
+        auto p1 = p.add_parameter("x", s);
+        auto p2 = p.add_parameter("y", s);
+        p.add_instruction(migraphx::op::add{}, p1, p2);
+
+        return p;
+    };
+
+    {
+        auto p = create_program();
+        migraphx::program::parameter_map m;
+        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
+        m["x"] = migraphx::generate_argument(s);
+        std::vector<float> cpu_result;
+        migraphx::target cpu_t = migraphx::cpu::target{};
+        run_prog(p, cpu_t, m, cpu_result);
+
+        std::vector<float> gpu_result;
+        migraphx::target gpu_t = migraphx::gpu::target{};
+        run_prog(p, gpu_t, m, gpu_result);
+
+        EXPECT(migraphx::verify_range(cpu_result, gpu_result));
+    }
+}
+
+TEST_CASE(int8_quantization)
+{
+    auto run_prog = [](migraphx::program p,
+                       const migraphx::target& t,
+                       migraphx::program::parameter_map& m_in,
+                       std::vector<float>& res) {
+        std::vector<migraphx::program::parameter_map> cali_data;
+        cali_data.push_back(m_in);
+        migraphx::quantize_int8(p, t, cali_data);
+        p.compile(t);
+        migraphx::program::parameter_map m;
+        for(auto&& x : p.get_parameter_shapes())
+        {
+            if(m_in.count(x.first) > 0)
+            {
+                m[x.first] = t.copy_to(m_in[x.first]);
+            }
+            else
+            {
+                m[x.first] = t.allocate(x.second);
+            }
+        }
+
+        auto result = t.copy_from(p.eval(m));
+        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
+    };
+
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
+
+        return p;
+    };
+
+    {
+        auto p = create_program();
+        migraphx::program::parameter_map m;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        m["a"] = migraphx::generate_argument(sa);
+        m["c"] = migraphx::generate_argument(sc);
+        std::vector<float> cpu_result;
+        migraphx::target cpu_t = migraphx::cpu::target{};
+        run_prog(p, cpu_t, m, cpu_result);
+
+        std::vector<float> gpu_result;
+        migraphx::target gpu_t = migraphx::gpu::target{};
+        run_prog(p, gpu_t, m, gpu_result);
+
+        EXPECT(migraphx::verify_range(cpu_result, gpu_result));
+    }
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/quantization.cpp
+++ b/test/quantization.cpp
+#include <iostream>
+#include <vector>
+#include <migraphx/literal.hpp>
+#include <migraphx/operators.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/verify.hpp>
+#include <migraphx/quantization.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/propagate_constant.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/onnx.hpp>
+#include "test.hpp"
+#include <migraphx/half.hpp>
+
+TEST_CASE(param_add)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1 = p.add_parameter("x", s);
+        auto p2 = p.add_parameter("y", s);
+        p.add_instruction(migraphx::op::add{}, p1, p2);
+
+        return p;
+    };
+
+    auto create_program_half = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
+        auto hs  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize_fp16(p1);
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize_fp16(p1, {"add"});
+        EXPECT(p1 == p2);
+    }
+}
+
+TEST_CASE(param_add_sub)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1   = p.add_parameter("x", s);
+        auto p2   = p.add_parameter("y", s);
+        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+
+        return p;
+    };
+
+    auto create_program_half_add = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(
+            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        auto sum   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
+        auto diff  = p.add_instruction(migraphx::op::sub{}, sum, p2);
+        auto hdiff = p.add_instruction(
+            migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
+        auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);
+
+        return p;
+    };
+
+    auto create_program_half_sub = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto sum   = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto hsum  = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
+        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
+        auto diff  = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
+        p.add_instruction(migraphx::op::add{}, diff, p1);
+
+        return p;
+    };
+
+    auto create_program_half_all = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        auto p1  = p.add_parameter("x", s);
+        auto hp1 = p.insert_instruction(
+            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
+        auto p2  = p.add_parameter("y", s);
+        auto hp2 = p.insert_instruction(
+            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
+        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
+        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
+        auto hres  = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_add();
+
+        migraphx::quantize_fp16(p1, {"add"});
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_sub();
+
+        migraphx::quantize_fp16(p1, {"sub"});
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half_all();
+
+        migraphx::quantize_fp16(p1);
+        migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
+
+        EXPECT(p1 == p2);
+    }
+}
+
+TEST_CASE(literal_add)
+{
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
+        std::vector<float> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        p.add_instruction(migraphx::op::add{}, l1, l2);
+
+        return p;
+    };
+
+    auto create_program_half = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::half_type, {2, 3}};
+        std::vector<migraphx::half> data(2 * 3);
+        std::iota(data.begin(), data.end(), 1.0f);
+        auto l1 = p.add_literal(migraphx::literal(s, data));
+        auto l2 = p.add_literal(migraphx::literal(s, data));
+        auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
+
+        return p;
+    };
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize_fp16(p1, {"all"});
+        migraphx::run_passes(p1,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        migraphx::run_passes(p2,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+
+        EXPECT(p1 == p2);
+    }
+
+    {
+        auto p1 = create_program_float();
+        auto p2 = create_program_half();
+
+        migraphx::quantize_fp16(p1, {"add"});
+        migraphx::run_passes(p1,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        migraphx::run_passes(p2,
+                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
+        EXPECT(p1 == p2);
+    }
+}
+
+TEST_CASE(op_capture)
+{
+    auto test_func = [&](std::size_t ins_index, const std::vector<migraphx::argument>& args) {
+        (void)ins_index;
+        (void)args;
+    };
+
+    auto create_program_float = [] {
+        migraphx::program p;
+        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
+        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
+
+        auto p1 = p.add_parameter("x", s1);
+        auto p2 = p.add_parameter("y", s1);
+        auto pb = p.add_parameter("b", s2);
+        auto pc = p.add_parameter("c", s2);
+        auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto ps = p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
+        p.add_instruction(migraphx::op::dot{}, pa, ps);
+
+        return p;
+    };
+
+    auto create_program_op = [&] {
+        migraphx::program p;
+        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
+        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
+
+        auto p1  = p.add_parameter("x", s1);
+        auto p2  = p.add_parameter("y", s1);
+        auto pb  = p.add_parameter("b", s2);
+        auto pc  = p.add_parameter("c", s2);
+        auto pa  = p.add_instruction(migraphx::op::add{}, p1, p2);
+        auto opb = p.insert_instruction(std::next(pb), migraphx::op::capture{1, test_func}, pb);
+        auto opc = p.insert_instruction(std::next(pc), migraphx::op::capture{2, test_func}, pc);
+        auto opa = p.add_instruction(migraphx::op::capture{0, test_func}, pa);
+        auto ps  = p.add_instruction(migraphx::op::dot{}, opa, opb, opc);
+        auto ops = p.add_instruction(migraphx::op::capture{3, test_func}, ps);
+        p.add_instruction(migraphx::op::dot{}, opa, ops);
+
+        return p;
+    };
+
+    {
+        auto p             = create_program_float();
+        auto op_capture_p  = create_program_op();
+        migraphx::target t = migraphx::cpu::target{};
+        migraphx::capture_arguments(p, t, {"dot", "convolution"});
+        EXPECT(p == op_capture_p);
+    }
+}
+
+TEST_CASE(dot_float)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+
+        p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb, pc);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfa(sa.elements(), 0.1f);
+        auto fa = p.add_literal(migraphx::literal(sa, vfa));
+        auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
+        auto ra = p.add_instruction(migraphx::op::round{}, ma);
+        auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal(sb, vfb));
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
+        auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
+        std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
+        auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
+        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
+        std::vector<float> v_beta(pc->get_shape().elements(), 1.5f);
+        auto beta   = p.add_literal(migraphx::literal(pc->get_shape(), v_beta));
+        auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, pc);
+        p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{
+        {0.1f, 0.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    migraphx::run_passes(p, {migraphx::dead_code_elimination{}});
+
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_double_2args)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+
+        p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::double_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfa(sa.elements(), 0.1f);
+        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
+        auto fa  = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
+        auto ma  = p.add_instruction(migraphx::op::mul{}, fa, fpa);
+        auto ra  = p.add_instruction(migraphx::op::round{}, ma);
+        auto ca  = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa  = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        auto fpb        = p.insert_instruction(
+            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, fpb);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
+        auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
+        std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
+        auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
+        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::double_type}, alpha_ab);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_large_alpha_beta_float)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+
+        p.add_instruction(migraphx::op::dot{20.0f, 50.5f}, pa, pb, pc);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfa(sa.elements(), 0.1f);
+        auto fa = p.add_literal(migraphx::literal(sa, vfa));
+        auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
+        // add the shift
+        std::vector<float> vsa(sa.elements(), 1.0f);
+        auto sfta = p.add_literal(migraphx::literal(sa, vsa));
+        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
+        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
+        auto ca   = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal(sb, vfb));
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        // quantize parameter c to int32 type
+        auto qc = p.insert_instruction(
+            std::next(pc), migraphx::op::convert{migraphx::shape::int32_type}, pc);
+
+        auto qdot = p.add_instruction(migraphx::op::quant_dot{2000, 51}, qa, qb, qc);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{
+        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_large_alpha_beta_int32)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+
+        p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pb, pc);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfa(sa.elements(), 0.1f);
+        auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
+        auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
+        auto ma     = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
+
+        // add the shift
+        std::vector<float> vsa(sa.elements(), 1.0f);
+        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
+        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
+        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
+        auto ca   = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
+        auto conv_b = p.insert_instruction(
+            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        p.add_instruction(migraphx::op::quant_dot{2000, 50}, qa, qb, pc);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{
+        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_int32_one_arg)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
+        auto pa = p.add_parameter("a", s);
+
+        p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pa);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
+        auto pa = p.add_parameter("a", s);
+
+        // add the shift
+        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
+        std::vector<float> vsa(s.elements(), 1.0f);
+        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, s.lens()}, vsa));
+        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, fpa);
+        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
+        auto ca   = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        auto q_dot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qa);
+        auto f_dot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_dot);
+        std::vector<float> v_alpha(f_dot->get_shape().elements(), 20.0f);
+        auto new_alpha = p.add_literal(migraphx::literal{f_dot->get_shape(), v_alpha});
+        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, f_dot);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, alpha_ab);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{1.0f, 1.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_int32)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+
+        p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, pa, pb, pc);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfa(sa.elements(), 0.1f);
+        auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
+        auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
+        auto ma     = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
+
+        // add the shift
+        std::vector<float> vsa(sa.elements(), 1.0f);
+        auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
+        auto msa  = p.add_instruction(migraphx::op::add{}, sfta, ma);
+        auto ra   = p.add_instruction(migraphx::op::round{}, msa);
+        auto ca   = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
+        auto qa   = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
+        auto conv_b = p.insert_instruction(
+            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
+        auto fr   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
+        std::vector<float> v_alpha(fr->get_shape().elements(), 20.0f);
+        auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
+        auto alpha_ab  = p.add_instruction(migraphx::op::mul{}, new_alpha, fr);
+        auto fc        = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pc);
+        std::vector<float> v_beta(fc->get_shape().elements(), 5.5f);
+        auto beta   = p.add_literal(migraphx::literal(fc->get_shape(), v_beta));
+        auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, fc);
+        auto f_res  = p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, f_res);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{
+        {0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(dot_float_convert)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+
+        auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
+        p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, fpa, pb);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pb);
+        std::vector<float> vfb(sb.elements(), 0.1f);
+        auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
+        auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
+        auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
+        auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
+        auto qb =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
+
+        auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, pa, qb);
+        auto fr   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
+        std::vector<float> v_alpha(fr->get_shape().elements(), 10.0f);
+        auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
+        p.add_instruction(migraphx::op::mul{}, new_alpha, fr);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 1.0f}, {0.1f, 0.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"dot"});
+    migraphx::run_passes(p, {migraphx::dead_code_elimination{}});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(conv_float)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        auto input =
+            p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
+        auto weights =
+            p.add_parameter("w", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
+        p.add_instruction(migraphx::op::convolution{}, input, weights);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sx{migraphx::shape::float_type, {4, 3, 3, 3}};
+        migraphx::shape sw{migraphx::shape::float_type, {4, 3, 3, 3}};
+        auto px = p.add_parameter("x", sx);
+        auto pw = p.add_parameter("w", sw);
+        // quantize parameter a to int8 type, multiply the scale
+        std::vector<float> vfx(sx.elements(), 0.1f);
+        auto fx = p.add_literal(migraphx::literal(sx, vfx));
+        auto mx = p.add_instruction(migraphx::op::mul{}, fx, px);
+        auto rx = p.add_instruction(migraphx::op::round{}, mx);
+        auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
+        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pw);
+        std::vector<float> vfw(sw.elements(), 0.1f);
+        auto fw = p.add_literal(migraphx::literal(sw, vfw));
+        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, pw);
+        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
+        auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
+        auto qw =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
+
+        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
+        auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
+        std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
+        auto adj = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
+        p.add_instruction(migraphx::op::mul{}, adj, f_conv);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(conv_int32)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        auto input =
+            p.add_parameter("x", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
+        auto weights =
+            p.add_parameter("w", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
+        p.add_instruction(migraphx::op::convolution{}, input, weights);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sx{migraphx::shape::int32_type, {4, 3, 3, 3}};
+        migraphx::shape sw{migraphx::shape::int32_type, {4, 3, 3, 3}};
+        auto px = p.add_parameter("x", sx);
+        auto pw = p.add_parameter("w", sw);
+        // quantize parameter a to int8 type, multiply the scale
+        auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
+        std::vector<float> vfx(sx.elements(), 0.1f);
+        auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
+        auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
+        auto rx = p.add_instruction(migraphx::op::round{}, mx);
+        auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
+        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pw);
+        auto fpw        = p.insert_instruction(
+            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
+        std::vector<float> vfw(sw.elements(), 0.1f);
+        auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
+        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
+        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
+        auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
+        auto qw =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
+
+        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
+        std::vector<float> v_adj(q_conv->get_shape().elements(), 100.0f);
+        auto adj = p.add_literal(migraphx::literal(q_conv->get_shape(), v_adj));
+        p.add_instruction(migraphx::op::mul{}, q_conv, adj);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(conv_half)
+{
+    auto create_program = [] {
+        migraphx::program p;
+        auto input =
+            p.add_parameter("x", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
+        auto weights =
+            p.add_parameter("w", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
+        p.add_instruction(migraphx::op::convolution{}, input, weights);
+
+        return p;
+    };
+
+    auto create_int8_quantized_prog = [] {
+        migraphx::program p;
+        migraphx::shape sx{migraphx::shape::half_type, {4, 3, 3, 3}};
+        migraphx::shape sw{migraphx::shape::half_type, {4, 3, 3, 3}};
+        auto px = p.add_parameter("x", sx);
+        auto pw = p.add_parameter("w", sw);
+        // quantize parameter a to int8 type, multiply the scale
+        auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
+        std::vector<float> vfx(sx.elements(), 0.1f);
+        auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
+        auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
+        auto rx = p.add_instruction(migraphx::op::round{}, mx);
+        auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
+        auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
+
+        // quantize parameter b to int8 type
+        auto insert_loc = std::next(pw);
+        auto fpw        = p.insert_instruction(
+            insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
+        std::vector<float> vfw(sw.elements(), 0.1f);
+        auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
+        auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
+        auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
+        auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
+        auto qw =
+            p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
+
+        auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
+        auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
+        std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
+        auto adj   = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
+        auto f_res = p.add_instruction(migraphx::op::mul{}, adj, f_conv);
+        p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, f_res);
+
+        return p;
+    };
+
+    auto p = create_program();
+    const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
+    migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
+    auto qp = create_int8_quantized_prog();
+
+    EXPECT(p == qp);
+}
+
+TEST_CASE(target_copy)
+{
+    auto run_prog = [](migraphx::program p,
+                       const migraphx::target& t,
+                       migraphx::program::parameter_map& m_in,
+                       std::vector<float>& res) {
+        p.compile(t);
+        migraphx::program::parameter_map m;
+        for(auto&& x : p.get_parameter_shapes())
+        {
+            if(m_in.count(x.first) > 0)
+            {
+                m[x.first] = t.copy_to(m_in[x.first]);
+            }
+            else
+            {
+                m[x.first] = t.allocate(x.second);
+            }
+        }
+
+        auto result = t.copy_from(p.eval(m));
+        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
+    };
+
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
+        auto p1 = p.add_parameter("x", s);
+        auto p2 = p.add_parameter("y", s);
+        p.add_instruction(migraphx::op::add{}, p1, p2);
+
+        return p;
+    };
+
+    {
+        auto p = create_program();
+        migraphx::program::parameter_map m;
+        migraphx::shape s{migraphx::shape::float_type, {3, 3}};
+        m["x"] = migraphx::generate_argument(s);
+        std::vector<float> cpu_result;
+        migraphx::target cpu_t = migraphx::cpu::target{};
+        run_prog(p, cpu_t, m, cpu_result);
+
+        std::vector<float> orig_result;
+        run_prog(p, cpu_t, m, orig_result);
+
+        EXPECT(migraphx::verify_range(cpu_result, orig_result));
+    }
+}
+
+TEST_CASE(int8_quantization)
+{
+    auto run_prog = [](migraphx::program p,
+                       const migraphx::target& t,
+                       migraphx::program::parameter_map& m_in,
+                       std::vector<float>& res,
+                       bool b_quantize = false) {
+        if(b_quantize)
+        {
+            std::vector<migraphx::program::parameter_map> cali_data;
+            cali_data.push_back(m_in);
+            migraphx::quantize_int8(p, t, cali_data);
+        }
+        p.compile(t);
+        migraphx::program::parameter_map m;
+        for(auto&& x : p.get_parameter_shapes())
+        {
+            if(m_in.count(x.first) > 0)
+            {
+                m[x.first] = t.copy_to(m_in[x.first]);
+            }
+            else
+            {
+                m[x.first] = t.allocate(x.second);
+            }
+        }
+
+        auto result = t.copy_from(p.eval(m));
+        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
+    };
+
+    auto create_program = [] {
+        migraphx::program p;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        auto pa = p.add_parameter("a", sa);
+        auto pb = p.add_parameter("b", sb);
+        auto pc = p.add_parameter("c", sc);
+        p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
+
+        return p;
+    };
+
+    {
+        auto p = create_program();
+        migraphx::program::parameter_map m;
+        migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
+        migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
+        m["a"] = migraphx::generate_argument(sa);
+        m["c"] = migraphx::generate_argument(sc);
+        std::vector<float> quant_result;
+        migraphx::target cpu_t = migraphx::cpu::target{};
+        run_prog(p, cpu_t, m, quant_result, true);
+
+        std::vector<float> no_quant_result;
+        run_prog(p, cpu_t, m, no_quant_result);
+
+        EXPECT(migraphx::verify_range(quant_result, no_quant_result));
+    }
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/type_conversion.cpp
+++ b/test/type_conversion.cpp
-#include <iostream>
-#include <vector>
-#include <migraphx/literal.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/instruction.hpp>
-#include <migraphx/cpu/target.hpp>
-#include <migraphx/verify.hpp>
-#include <migraphx/quantization.hpp>
-#include <migraphx/dead_code_elimination.hpp>
-#include <migraphx/propagate_constant.hpp>
-#include <migraphx/pass_manager.hpp>
-#include <migraphx/onnx.hpp>
-#include "test.hpp"
-#include <migraphx/half.hpp>
-
-TEST_CASE(param_add)
-{
-    auto create_program_float = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1 = p.add_parameter("x", s);
-        auto p2 = p.add_parameter("y", s);
-        p.add_instruction(migraphx::op::add{}, p1, p2);
-
-        return p;
-    };
-
-    auto create_program_half = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1  = p.add_parameter("x", s);
-        auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
-        auto p2  = p.add_parameter("y", s);
-        auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
-        auto hs  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
-        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
-
-        return p;
-    };
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half();
-
-        migraphx::quantize(p1);
-        EXPECT(p1 == p2);
-    }
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half();
-
-        migraphx::quantize(p1, {"add"});
-        EXPECT(p1 == p2);
-    }
-}
-
-TEST_CASE(param_add_sub)
-{
-    auto create_program_float = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1   = p.add_parameter("x", s);
-        auto p2   = p.add_parameter("y", s);
-        auto sum  = p.add_instruction(migraphx::op::add{}, p1, p2);
-        auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
-        p.add_instruction(migraphx::op::add{}, diff, p1);
-
-        return p;
-    };
-
-    auto create_program_half_add = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1  = p.add_parameter("x", s);
-        auto hp1 = p.insert_instruction(
-            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
-        auto p2  = p.add_parameter("y", s);
-        auto hp2 = p.insert_instruction(
-            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
-        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
-        auto sum   = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
-        auto diff  = p.add_instruction(migraphx::op::sub{}, sum, p2);
-        auto hdiff = p.add_instruction(
-            migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
-        auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
-        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);
-
-        return p;
-    };
-
-    auto create_program_half_sub = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1  = p.add_parameter("x", s);
-        auto p2  = p.add_parameter("y", s);
-        auto hp2 = p.insert_instruction(
-            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
-        auto sum   = p.add_instruction(migraphx::op::add{}, p1, p2);
-        auto hsum  = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
-        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
-        auto diff  = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
-        p.add_instruction(migraphx::op::add{}, diff, p1);
-
-        return p;
-    };
-
-    auto create_program_half_all = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        auto p1  = p.add_parameter("x", s);
-        auto hp1 = p.insert_instruction(
-            std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
-        auto p2  = p.add_parameter("y", s);
-        auto hp2 = p.insert_instruction(
-            std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
-        auto hsum  = p.add_instruction(migraphx::op::add{}, hp1, hp2);
-        auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
-        auto hres  = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
-        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);
-
-        return p;
-    };
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half_add();
-
-        migraphx::quantize(p1, {"add"});
-        EXPECT(p1 == p2);
-    }
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half_sub();
-
-        migraphx::quantize(p1, {"sub"});
-        EXPECT(p1 == p2);
-    }
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half_all();
-
-        migraphx::quantize(p1);
-        migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
-
-        EXPECT(p1 == p2);
-    }
-}
-
-TEST_CASE(literal_add)
-{
-    auto create_program_float = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3}};
-        std::vector<float> data(2 * 3);
-        std::iota(data.begin(), data.end(), 1.0f);
-        auto l1 = p.add_literal(migraphx::literal(s, data));
-        auto l2 = p.add_literal(migraphx::literal(s, data));
-        p.add_instruction(migraphx::op::add{}, l1, l2);
-
-        return p;
-    };
-
-    auto create_program_half = [] {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::half_type, {2, 3}};
-        std::vector<migraphx::half> data(2 * 3);
-        std::iota(data.begin(), data.end(), 1.0f);
-        auto l1 = p.add_literal(migraphx::literal(s, data));
-        auto l2 = p.add_literal(migraphx::literal(s, data));
-        auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
-        p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
-
-        return p;
-    };
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half();
-
-        migraphx::quantize(p1, {"all"});
-        migraphx::run_passes(p1,
-                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
-        migraphx::run_passes(p2,
-                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
-
-        EXPECT(p1 == p2);
-    }
-
-    {
-        auto p1 = create_program_float();
-        auto p2 = create_program_half();
-
-        migraphx::quantize(p1, {"add"});
-        migraphx::run_passes(p1,
-                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
-        migraphx::run_passes(p2,
-                             {migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
-        EXPECT(p1 == p2);
-    }
-}
-
-TEST_CASE(op_capture)
-{
-    auto test_func = [&](std::size_t ins_index, const std::vector<migraphx::argument>& args) {
-        (void)ins_index;
-        (void)args;
-    };
-
-    auto create_program_float = [] {
-        migraphx::program p;
-        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
-        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
-
-        auto p1 = p.add_parameter("x", s1);
-        auto p2 = p.add_parameter("y", s1);
-        auto pb = p.add_parameter("b", s2);
-        auto pc = p.add_parameter("c", s2);
-        auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
-        auto ps = p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
-        p.add_instruction(migraphx::op::dot{}, pa, ps);
-
-        return p;
-    };
-
-    auto create_program_op = [&] {
-        migraphx::program p;
-        migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
-        migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
-
-        auto p1  = p.add_parameter("x", s1);
-        auto p2  = p.add_parameter("y", s1);
-        auto pb  = p.add_parameter("b", s2);
-        auto pc  = p.add_parameter("c", s2);
-        auto pa  = p.add_instruction(migraphx::op::add{}, p1, p2);
-        auto opb = p.insert_instruction(std::next(pb), migraphx::op::capture{1, test_func}, pb);
-        auto opc = p.insert_instruction(std::next(pc), migraphx::op::capture{2, test_func}, pc);
-        auto opa = p.add_instruction(migraphx::op::capture{0, test_func}, pa);
-        auto ps  = p.add_instruction(migraphx::op::dot{}, opa, opb, opc);
-        auto ops = p.add_instruction(migraphx::op::capture{3, test_func}, ps);
-        p.add_instruction(migraphx::op::dot{}, opa, ops);
-
-        return p;
-    };
-
-    {
-        auto p            = create_program_float();
-        auto op_capture_p = create_program_op();
-        migraphx::capture_arguments(p);
-        EXPECT(p == op_capture_p);
-    }
-}
-
-int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/tools/include/target.hpp
+++ b/tools/include/target.hpp
@@ -11,6 +11,8 @@
 #include <migraphx/context.hpp>
 #include <migraphx/pass.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/rank.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -34,15 +36,103 @@ struct target
     * @return The context to be used during compilation and execution.
     */
    context get_context() const;
+    /**
+     * @brief copy an argument to the current target.
+     *
+     * @param arg Input argument to be copied to the target
+     * @return Argument in the target.
+     */
+    argument copy_to(const argument& arg) const;
+    /**
+     * @brief copy an argument from the current target.
+     *
+     * @param arg Input argument to be copied from the target
+     * @return Argument in the host.
+     */
+    argument copy_from(const argument& arg) const;
+    /**
+     * @brief Allocate an argument based on the input shape
+     *
+     * @param s Shape of the argument to be allocated in the target
+     * @return Allocated argument in the target.
+     */
+    argument allocate(const shape& s) const;
 };

 #else

+template <class T>
+auto target_allocate(rank<1>, T& x, const shape& s) -> decltype(x.allocate(s))
+{
+    return x.allocate(s);
+}
+
+template <class T>
+argument target_allocate(rank<0>, T& x, const shape&)
+{
+    std::string name = x.name();
+    MIGRAPHX_THROW("Not computable: " + name);
+}
+
+template <class T>
+argument target_allocate(T& x, const shape& s)
+{
+    return target_allocate(rank<1>{}, x, s);
+}
+
+template <class T>
+auto copy_to_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_to(arg))
+{
+    return x.copy_to(arg);
+}
+
+template <class T>
+argument copy_to_target(rank<0>, T&, const argument& arg)
+{
+    return arg;
+}
+
+template <class T>
+argument copy_to_target(T& x, const argument& arg)
+{
+    return copy_to_target(rank<1>{}, x, arg);
+}
+
+template <class T>
+auto copy_from_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_from(arg))
+{
+    return x.copy_from(arg);
+}
+
+template <class T>
+argument copy_from_target(rank<0>, T&, const argument& arg)
+{
+    return arg;
+}
+
+template <class T>
+argument copy_from_target(T& x, const argument& arg)
+{
+    return copy_from_target(rank<1>{}, x, arg);
+}
+
 <%
 interface('target',
-    virtual('name', returns='std::string', const=True),
-    virtual('get_passes', ctx='context&', returns='std::vector<pass>', const=True),
-    virtual('get_context', returns='context', const=True)
+     virtual('name', returns='std::string', const=True),
+     virtual('get_passes', ctx='context&', returns='std::vector<pass>', const=True),
+     virtual('get_context', returns='context', const=True),
+     virtual('copy_to',
+             returns = 'argument',
+             input   = 'const argument&',
+             const   = True,
+             default = 'copy_to_target'),
+     virtual('copy_from',
+             returns = 'argument',
+             input   = 'const argument&',
+             const   = True,
+             default = 'copy_from_target'),
+    virtual('allocate', s='const shape&', returns='argument', const=True,
+             default = 'target_allocate')
 )
 %>