Unverified Commit debed772 authored by mvermeulen's avatar mvermeulen Committed by GitHub
Browse files

Merge pull request #348 from ROCmSoftwarePlatform/int8_quantize

Int8 quantize
parents b5ba22ae a8e33f9f
......@@ -6,23 +6,43 @@
#include <migraphx/instruction_ref.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/config.hpp>
#include <migraphx/target.hpp>
#include <migraphx/program.hpp>
#include <migraphx/env.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct program;
void quantize(program& prog, const std::vector<std::string>& ins_names);
void quantize(program& prog);
void quantize_fp16(program& prog, const std::vector<std::string>& ins_names = {"all"});
// insert the capture operator for the inputs of each operator to be quantized
// to int8
std::size_t capture_arguments(program& prog,
const std::vector<std::string>& ins_names,
const std::function<void(std::size_t, std::vector<argument>)>& func);
std::shared_ptr<std::vector<std::pair<float, float>>>
capture_arguments_impl(program& prog, const target& t, const std::vector<std::string>& ins_names);
template <class T>
std::shared_ptr<std::vector<std::pair<float, float>>>
capture_arguments(program& prog, const std::vector<std::string>& ins_names);
std::shared_ptr<std::vector<std::pair<float, float>>> capture_arguments(program& prog);
capture_arguments(program& prog, T&& t, const std::vector<std::string>& ins_names)
{
static_assert(std::is_same<std::remove_cv_t<std::remove_reference_t<T>>, target>{} &&
std::is_lvalue_reference<T>{},
"Dangling reference to target!");
return capture_arguments_impl(prog, t, ins_names);
}
void quantize_int8(program& prog,
const target& t,
std::vector<program::parameter_map>& calibration,
const std::vector<std::string>& ins_names = {"dot", "convolution"});
void quantize_int8_impl(program& prog,
const std::vector<std::pair<float, float>>& quant_params,
const std::vector<std::string>& ins_names);
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -11,6 +11,8 @@
#include <migraphx/context.hpp>
#include <migraphx/pass.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/rank.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -34,10 +36,86 @@ struct target
* @return The context to be used during compilation and execution.
*/
context get_context() const;
/**
* @brief copy an argument to the current target.
*
* @param arg Input argument to be copied to the target
* @return Argument in the target.
*/
argument copy_to(const argument& arg) const;
/**
* @brief copy an argument from the current target.
*
* @param arg Input argument to be copied from the target
* @return Argument in the host.
*/
argument copy_from(const argument& arg) const;
/**
* @brief Allocate an argument based on the input shape
*
* @param s Shape of the argument to be allocated in the target
* @return Allocated argument in the target.
*/
argument allocate(const shape& s) const;
};
#else
template <class T>
auto target_allocate(rank<1>, T& x, const shape& s) -> decltype(x.allocate(s))
{
return x.allocate(s);
}
template <class T>
argument target_allocate(rank<0>, T& x, const shape&)
{
std::string name = x.name();
MIGRAPHX_THROW("Not computable: " + name);
}
template <class T>
argument target_allocate(T& x, const shape& s)
{
return target_allocate(rank<1>{}, x, s);
}
template <class T>
auto copy_to_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_to(arg))
{
return x.copy_to(arg);
}
template <class T>
argument copy_to_target(rank<0>, T&, const argument& arg)
{
return arg;
}
template <class T>
argument copy_to_target(T& x, const argument& arg)
{
return copy_to_target(rank<1>{}, x, arg);
}
template <class T>
auto copy_from_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_from(arg))
{
return x.copy_from(arg);
}
template <class T>
argument copy_from_target(rank<0>, T&, const argument& arg)
{
return arg;
}
template <class T>
argument copy_from_target(T& x, const argument& arg)
{
return copy_from_target(rank<1>{}, x, arg);
}
/*
* Type-erased interface for:
*
......@@ -46,6 +124,9 @@ struct target
* std::string name() const;
* std::vector<pass> get_passes(context& ctx) const;
* context get_context() const;
* argument copy_to(const argument& input) const;
* argument copy_from(const argument& input) const;
* argument allocate(const shape& s) const;
* };
*
*/
......@@ -125,6 +206,24 @@ struct target
return (*this).private_detail_te_get_handle().get_context();
}
argument copy_to(const argument& input) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().copy_to(input);
}
argument copy_from(const argument& input) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().copy_from(input);
}
argument allocate(const shape& s) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().allocate(s);
}
friend bool is_shared(const target& private_detail_x, const target& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
......@@ -141,6 +240,9 @@ struct target
virtual std::string name() const = 0;
virtual std::vector<pass> get_passes(context& ctx) const = 0;
virtual context get_context() const = 0;
virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0;
};
template <typename PrivateDetailTypeErasedT>
......@@ -181,6 +283,24 @@ struct target
context get_context() const override { return private_detail_te_value.get_context(); }
argument copy_to(const argument& input) const override
{
return copy_to_target(private_detail_te_value, input);
}
argument copy_from(const argument& input) const override
{
return copy_from_target(private_detail_te_value, input);
}
argument allocate(const shape& s) const override
{
return target_allocate(private_detail_te_value, s);
}
PrivateDetailTypeErasedT private_detail_te_value;
};
......
......@@ -183,10 +183,16 @@ PYBIND11_MODULE(migraphx, m)
});
m.def("generate_argument", &migraphx::generate_argument, py::arg("s"), py::arg("seed") = 0);
m.def("quantize", [](migraphx::program& p, std::vector<std::string>& ins_names) {
migraphx::quantize(p, ins_names);
});
m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); });
m.def("quantize_fp16",
&migraphx::quantize_fp16,
py::arg("prog"),
py::arg("ins_names") = std::vector<std::string>{"all"});
m.def("quantize_int8",
&migraphx::quantize_int8,
py::arg("prog"),
py::arg("t"),
py::arg("calibration") = std::vector<migraphx::program::parameter_map>{},
py::arg("ins_names") = std::vector<std::string>{"dot", "convolution"});
#ifdef HAVE_GPU
m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);
......
......@@ -3,6 +3,8 @@
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/clip.hpp>
#include <migraphx/op/round.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
......@@ -13,17 +15,24 @@
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/target.hpp>
#include <utility>
#include <set>
#include <iomanip>
#include <fstream>
#include <algorithm>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_INT8_QUANTIZATION_PARAMS)
instruction_ref insert_quant_ins(program& prog,
instruction_ref& ins,
shape::type_t type,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
std::unordered_map<instruction_ref, instruction_ref>& map_ins,
float scale = 1.0f,
float shift = 0.0f)
{
if(map_ins.count(ins) > 0)
{
......@@ -35,11 +44,52 @@ instruction_ref insert_quant_ins(program& prog,
return ins;
}
assert(ins->get_shape().type() == shape::float_type ||
ins->get_shape().type() == shape::double_type ||
ins->get_shape().type() == shape::int32_type);
assert(ins->get_shape().type() == shape::float_type or
ins->get_shape().type() == shape::double_type or
ins->get_shape().type() == shape::int32_type or
ins->get_shape().type() == shape::half_type);
instruction_ref quant_ins{};
quant_ins = prog.insert_instruction(std::next(ins), op::convert{type}, ins);
auto insert_loc = std::next(ins);
if(type == shape::int8_type)
{
auto scaled_ins = ins;
if(scale != 1.0f)
{
auto float_ins = scaled_ins;
if(scaled_ins->get_shape().type() != shape::float_type)
{
float_ins =
prog.insert_instruction(insert_loc, op::convert{shape::float_type}, scaled_ins);
}
std::vector<float> vec_scale(scaled_ins->get_shape().elements(), scale);
auto l_scale = prog.add_literal(literal(float_ins->get_shape(), vec_scale));
scaled_ins = prog.insert_instruction(insert_loc, op::mul{}, l_scale, float_ins);
}
auto shifted_ins = scaled_ins;
if(shift != 0.0f)
{
auto float_ins = shifted_ins;
if(shifted_ins->get_shape().type() != shape::float_type)
{
float_ins = prog.insert_instruction(
insert_loc, op::convert{shape::float_type}, shifted_ins);
}
std::vector<float> vec_shift(shifted_ins->get_shape().elements(), shift);
auto l_shift = prog.add_literal(literal(float_ins->get_shape(), vec_shift));
shifted_ins = prog.insert_instruction(insert_loc, op::add{}, l_shift, float_ins);
}
auto rounded_ins = prog.insert_instruction(insert_loc, op::round{}, shifted_ins);
auto clipped_ins =
prog.insert_instruction(insert_loc, op::clip{127.0f, -128.0f}, rounded_ins);
quant_ins = prog.insert_instruction(insert_loc, op::convert{type}, clipped_ins);
}
else
{
quant_ins = prog.insert_instruction(insert_loc, op::convert{type}, ins);
}
map_ins[ins] = quant_ins;
return quant_ins;
......@@ -50,7 +100,7 @@ instruction_ref insert_quant_ins(program& prog,
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
void quantize(program& prog, const std::vector<std::string>& ins_names)
void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
{
std::unordered_map<instruction_ref, instruction_ref> map_fp16;
for(auto ins : iterator_for(prog))
......@@ -115,7 +165,288 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
}
}
void quantize(program& prog) { quantize(prog, {"all"}); }
static void ins_quantize_int8(program& prog,
instruction_ref ins,
std::vector<instruction_ref>& converted_inputs,
const std::vector<std::pair<float, float>>& ins_quant_params)
{
auto orig_type = ins->get_shape().type();
auto inputs = ins->inputs();
if(ins->name() == "dot")
{
auto dot_op = any_cast<op::dot>(ins->get_operator());
float new_alpha = dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
float new_beta = dot_op.beta;
// We need additional checking about the quant_alpha value. If
// abs(quant_alpha) > 50 (some tmp value set here), we can convert
// it to an integer as the new_alpha in the quant_dot
float threshold = 50.0f;
if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
{
int32_t quant_alpha = static_cast<int32_t>(std::round(new_alpha));
int32_t quant_beta = static_cast<int32_t>(std::round(new_beta));
if(shape::int32_type == orig_type)
{
prog.replace_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
}
else
{
auto quant_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
}
}
// either alpha or beta cannot be quantized because of too big
// relative rounding error
else
{
if(converted_inputs.size() == 3)
{
converted_inputs.pop_back();
}
auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
auto f_dot = prog.insert_instruction(ins, op::convert{shape::float_type}, q_dot);
auto c_shape = q_dot->get_shape();
std::vector<float> vec_alpha(c_shape.elements(), new_alpha);
auto l_alpha =
prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_alpha));
if(inputs.size() == 3 and dot_op.beta != 0.0f)
{
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, f_dot);
std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
auto l_beta =
prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
instruction_ref beta_c{};
if(orig_type != shape::float_type)
{
auto fp32_c =
prog.insert_instruction(ins, op::convert{shape::float_type}, inputs.back());
beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
}
else
{
beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
}
if(orig_type == shape::float_type)
{
prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
}
else
{
auto f_res = prog.insert_instruction(ins, op::add{}, alpha_ab, beta_c);
prog.replace_instruction(ins, op::convert{orig_type}, f_res);
}
}
else
{
if(orig_type == shape::float_type)
{
prog.replace_instruction(ins, op::mul{}, l_alpha, f_dot);
}
else
{
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, f_dot);
prog.replace_instruction(ins, op::convert{orig_type}, alpha_ab);
}
}
}
}
else if(ins->name() == "convolution")
{
// Current MIOpen convolution does not support alpha and beta,
// so we need a separate multiply to adjust the output
auto conv_op = any_cast<op::convolution>(ins->get_operator());
auto padding = conv_op.padding;
auto stride = conv_op.stride;
auto dilation = conv_op.dilation;
auto padding_mode = conv_op.padding_mode;
auto group = conv_op.group;
auto adjust_factor =
std::round(1.0f / (ins_quant_params[0].first * ins_quant_params[1].first));
auto quant_conv = prog.insert_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
float threshold = 50.0f;
std::vector<float> vec_factor(quant_conv->get_shape().elements(), adjust_factor);
if(quant_conv->get_shape().type() == orig_type and adjust_factor >= threshold)
{
auto l_factor = prog.add_literal(
literal(quant_conv->get_shape(), vec_factor.begin(), vec_factor.end()));
prog.replace_instruction(ins, op::mul{}, quant_conv, l_factor);
}
// convert quant_conv output to float type, multiply the factor and
// conver back to original type
else
{
auto float_conv =
prog.insert_instruction(ins, op::convert{shape::float_type}, quant_conv);
auto l_factor = prog.add_literal(literal(float_conv->get_shape(), vec_factor));
if(orig_type == shape::float_type)
{
prog.replace_instruction(ins, op::mul{}, l_factor, float_conv);
}
else
{
auto adjusted_conv = prog.insert_instruction(ins, op::mul{}, l_factor, float_conv);
prog.replace_instruction(ins, op::convert{orig_type}, adjusted_conv);
}
}
}
else
{
MIGRAPHX_THROW("QUANTIZE_INT8: does not support operator " + ins->name());
}
}
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void quantize_int8_impl(program& prog,
const std::vector<std::pair<float, float>>& quant_params,
const std::vector<std::string>& ins_names)
{
if(enabled(MIGRAPHX_INT8_QUANTIZATION_PARAMS{}))
{
for(std::size_t i = 0; i < quant_params.size(); ++i)
{
auto param = quant_params.at(i);
std::cout << "ins_index = " << i << ", scale = " << param.first
<< ", shift = " << param.second << std::endl;
}
std::cout << std::endl;
}
// For now, we only support the int8 quantization of gemm and convolution
std::set<std::string> op_names = {"convolution", "dot"};
std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
if(!std::includes(
op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
{
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
}
std::size_t quant_param_index = 0;
std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
std::unordered_map<instruction_ref, std::size_t> map_ins_index;
for(auto ins : iterator_for(prog))
{
if(not contains(ins_names, ins->name()))
{
continue;
}
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std::vector<instruction_ref> converted_inputs;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto inputs = ins->inputs();
std::vector<std::pair<float, float>> ins_quant_params;
for(auto input : inputs)
{
// calculate the index of each instruction to be quantized
std::size_t ins_index =
(map_ins_index.count(input) > 0) ? map_ins_index[input] : quant_param_index++;
map_ins_index[input] = ins_index;
auto param = quant_params[map_ins_index[input]];
ins_quant_params.push_back(param);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape::type_t quant_type = shape::int8_type;
if((ins->name() == "dot") and (inputs.size() == 3) and (input == inputs.back()))
{
quant_type = shape::int32_type;
}
auto s = input->get_shape();
if((s.type() == shape::float_type or s.type() == shape::double_type or
s.type() == shape::half_type or s.type() == shape::int32_type) and
s.type() != quant_type)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref quant_input{};
if(input->name() == "convert" and
input->inputs().front()->get_shape().type() == quant_type)
{
quant_input = input->inputs().front();
// the scale in this case is not used, so tune the scale
// to 1.0f for this parameter
ins_quant_params.back() = std::pair<float, float>(1.0f, 0.0f);
}
else
{
quant_input = insert_quant_ins(
prog, input, quant_type, map_quant_ins, param.first, param.second);
}
converted_inputs.push_back(quant_input);
}
else
{
converted_inputs.push_back(input);
}
}
// no change for the input, go to the next instruction
if(inputs == converted_inputs)
{
continue;
}
ins_quantize_int8(prog, ins, converted_inputs, ins_quant_params);
}
if(quant_param_index != quant_params.size())
{
MIGRAPHX_THROW("QUANTIZE_INT8: number of scales does not match");
}
}
void quantize_int8(program& prog,
const target& t,
std::vector<program::parameter_map>& calibration,
const std::vector<std::string>& ins_names)
{
// insert capture operator
auto cap_prog = prog;
auto int8_quant_params = capture_arguments(cap_prog, t, ins_names);
// use the calibration data to compute the quantization scale
cap_prog.compile(t);
// use all calibration data to run the program to calculate the
// quantization scale and shift
for(auto&& arg : calibration)
{
program::parameter_map m;
for(auto&& x : cap_prog.get_parameter_shapes())
{
if(arg.count(x.first) > 0)
{
assert(x.second == arg[x.first].get_shape());
m[x.first] = t.copy_to(arg[x.first]);
}
else
{
m[x.first] = t.allocate(x.second);
}
}
cap_prog.eval(m);
}
quantize_int8_impl(prog, *int8_quant_params, ins_names);
}
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
......@@ -126,10 +457,10 @@ std::size_t capture_arguments(program& prog,
size_t num_quant_params = 0;
// the int8 quantization only support dot and convolution
std::vector<std::string> op_names = {"dot", "convolution"};
if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
}))
std::set<std::string> op_names = {"dot", "convolution"};
std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
if(!std::includes(
op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
{
MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
}
......@@ -166,26 +497,35 @@ std::size_t capture_arguments(program& prog,
}
std::shared_ptr<std::vector<std::pair<float, float>>>
capture_arguments(program& prog, const std::vector<std::string>& ins_names)
capture_arguments_impl(program& prog, const target& t, const std::vector<std::string>& ins_names)
{
std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
std::make_shared<std::vector<std::pair<float, float>>>();
std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();
auto calc_quant_params = [int8_quant_params, max_abs_vals](
std::size_t ins_index, std::vector<migraphx::argument> args) {
auto calc_quant_params = [int8_quant_params, max_abs_vals, &t](std::size_t ins_index,
std::vector<argument> args) {
std::pair<float, float> param_pair{64.0f, 0.0f};
// scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0
std::vector<float> vec_val;
args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
argument arg = t.copy_from(args.front());
arg.visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
auto max_val = *std::max_element(vec_val.begin(), vec_val.end());
auto min_val = *std::min_element(vec_val.begin(), vec_val.end());
auto max_abs = std::max(std::fabs(max_val), std::fabs(min_val));
max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);
param_pair.first = 127.0f / max_abs_vals->at(ins_index);
// if all values are 0, no need to do scaling
if(max_abs_vals->at(ins_index) == 0.0f)
{
param_pair.first = 1.0f;
}
else
{
param_pair.first = 127.0f / max_abs_vals->at(ins_index);
}
int8_quant_params->at(ins_index) = param_pair;
};
......@@ -197,11 +537,5 @@ capture_arguments(program& prog, const std::vector<std::string>& ins_names)
return int8_quant_params;
}
std::shared_ptr<std::vector<std::pair<float, float>>> capture_arguments(program& prog)
{
std::vector<std::string> ins_names = {"dot", "convolution"};
return capture_arguments(prog, ins_names);
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -15,6 +15,10 @@ struct target
std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx) const;
migraphx::context get_context() const { return context{}; }
argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; }
argument allocate(const shape& s) const;
};
} // namespace cpu
......
......@@ -5,6 +5,7 @@
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/generate.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -22,6 +23,8 @@ std::vector<pass> target::get_passes(migraphx::context&) const
dead_code_elimination{}};
}
argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -13,6 +13,10 @@ struct target
std::string name() const;
std::vector<pass> get_passes(migraphx::context& gctx) const;
migraphx::context get_context() const;
argument copy_to(const argument& arg) const;
argument copy_from(const argument& arg) const;
argument allocate(const shape& s) const;
};
} // namespace gpu
......
......@@ -85,6 +85,13 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
std::string target::name() const { return "miopen"; }
migraphx::context target::get_context() const { return context{}; }
argument target::copy_to(const argument& arg) const { return gpu::to_gpu(arg); }
argument target::copy_from(const argument& arg) const { return gpu::from_gpu(arg); }
argument target::allocate(const shape& s) const { return gpu::allocate_gpu(s); }
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -1821,7 +1821,7 @@ TEST_CASE(fp32_fp16_test)
auto test_case = [&](std::vector<std::string>&& op_names) {
std::vector<float> gold_res = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0};
auto p = create_program();
migraphx::quantize(p, op_names);
migraphx::quantize_fp16(p, op_names);
p.compile(migraphx::cpu::target{});
auto result = p.eval({});
std::vector<float> res;
......@@ -2067,7 +2067,8 @@ TEST_CASE(op_capture)
p.add_instruction(migraphx::op::dot{}, pa, ps);
migraphx::program capture_p = p;
migraphx::capture_arguments(capture_p);
migraphx::target t = migraphx::cpu::target{};
migraphx::capture_arguments(capture_p, t, {"dot"});
p.compile(migraphx::cpu::target{});
capture_p.compile(migraphx::cpu::target{});
......
......@@ -3694,7 +3694,7 @@ struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_parameter("p2", s);
p.add_instruction(migraphx::op::add{}, l1, l2);
migraphx::quantize(p, {"all"});
migraphx::quantize_fp16(p, {"all"});
return p;
};
};
......@@ -3710,7 +3710,7 @@ struct test_fp32_fp16_ladd : verify_program<test_fp32_fp16_ladd>
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_parameter("p2", s);
p.add_instruction(migraphx::op::add{}, l1, l2);
migraphx::quantize(p, {"add"});
migraphx::quantize_fp16(p, {"add"});
return p;
};
};
......@@ -3726,7 +3726,7 @@ struct test_fp32_fp16_add : verify_program<test_fp32_fp16_add>
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
p.add_instruction(migraphx::op::add{}, diff, p1);
migraphx::quantize(p, {"add"});
migraphx::quantize_fp16(p, {"add"});
return p;
};
......@@ -3743,7 +3743,7 @@ struct test_fp32_fp16_sub : verify_program<test_fp32_fp16_sub>
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
p.add_instruction(migraphx::op::add{}, diff, p1);
migraphx::quantize(p, {"sub"});
migraphx::quantize_fp16(p, {"sub"});
return p;
};
......@@ -3851,11 +3851,12 @@ struct test_round : verify_program<test_round>
migraphx::program create_program() const
{
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 6}};
auto param = p.add_parameter("x", s);
p.add_instruction(migraphx::op::round{}, param);
return p;
}
};
};
struct test_convert : verify_program<test_convert>
......
#include <iostream>
#include <vector>
#include <migraphx/operators.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/onnx.hpp>
#include "test.hpp"
#include <migraphx/half.hpp>
TEST_CASE(target_copy)
{
auto run_prog = [](migraphx::program p,
const migraphx::target& t,
migraphx::program::parameter_map& m_in,
std::vector<float>& res) {
p.compile(t);
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
if(m_in.count(x.first) > 0)
{
m[x.first] = t.copy_to(m_in[x.first]);
}
else
{
m[x.first] = t.allocate(x.second);
}
}
auto result = t.copy_from(p.eval(m));
result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
};
auto create_program = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
p.add_instruction(migraphx::op::add{}, p1, p2);
return p;
};
{
auto p = create_program();
migraphx::program::parameter_map m;
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
m["x"] = migraphx::generate_argument(s);
std::vector<float> cpu_result;
migraphx::target cpu_t = migraphx::cpu::target{};
run_prog(p, cpu_t, m, cpu_result);
std::vector<float> gpu_result;
migraphx::target gpu_t = migraphx::gpu::target{};
run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify_range(cpu_result, gpu_result));
}
}
TEST_CASE(int8_quantization)
{
auto run_prog = [](migraphx::program p,
const migraphx::target& t,
migraphx::program::parameter_map& m_in,
std::vector<float>& res) {
std::vector<migraphx::program::parameter_map> cali_data;
cali_data.push_back(m_in);
migraphx::quantize_int8(p, t, cali_data);
p.compile(t);
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
if(m_in.count(x.first) > 0)
{
m[x.first] = t.copy_to(m_in[x.first]);
}
else
{
m[x.first] = t.allocate(x.second);
}
}
auto result = t.copy_from(p.eval(m));
result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
};
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
return p;
};
{
auto p = create_program();
migraphx::program::parameter_map m;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
m["a"] = migraphx::generate_argument(sa);
m["c"] = migraphx::generate_argument(sc);
std::vector<float> cpu_result;
migraphx::target cpu_t = migraphx::cpu::target{};
run_prog(p, cpu_t, m, cpu_result);
std::vector<float> gpu_result;
migraphx::target gpu_t = migraphx::gpu::target{};
run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify_range(cpu_result, gpu_result));
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
#include <iostream>
#include <vector>
#include <migraphx/literal.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/onnx.hpp>
#include "test.hpp"
#include <migraphx/half.hpp>
TEST_CASE(param_add)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
p.add_instruction(migraphx::op::add{}, p1, p2);
return p;
};
auto create_program_half = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
auto hs = p.add_instruction(migraphx::op::add{}, hp1, hp2);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize_fp16(p1);
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize_fp16(p1, {"add"});
EXPECT(p1 == p2);
}
}
TEST_CASE(param_add_sub)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
p.add_instruction(migraphx::op::add{}, diff, p1);
return p;
};
auto create_program_half_add = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(
std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto hsum = p.add_instruction(migraphx::op::add{}, hp1, hp2);
auto sum = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
auto hdiff = p.add_instruction(
migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);
return p;
};
auto create_program_half_sub = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto hsum = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
auto diff = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
p.add_instruction(migraphx::op::add{}, diff, p1);
return p;
};
auto create_program_half_all = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(
std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto hsum = p.add_instruction(migraphx::op::add{}, hp1, hp2);
auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
auto hres = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half_add();
migraphx::quantize_fp16(p1, {"add"});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half_sub();
migraphx::quantize_fp16(p1, {"sub"});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half_all();
migraphx::quantize_fp16(p1);
migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
}
TEST_CASE(literal_add)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
std::vector<float> data(2 * 3);
std::iota(data.begin(), data.end(), 1.0f);
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_literal(migraphx::literal(s, data));
p.add_instruction(migraphx::op::add{}, l1, l2);
return p;
};
auto create_program_half = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::half_type, {2, 3}};
std::vector<migraphx::half> data(2 * 3);
std::iota(data.begin(), data.end(), 1.0f);
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_literal(migraphx::literal(s, data));
auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize_fp16(p1, {"all"});
migraphx::run_passes(p1,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
migraphx::run_passes(p2,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize_fp16(p1, {"add"});
migraphx::run_passes(p1,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
migraphx::run_passes(p2,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
}
TEST_CASE(op_capture)
{
auto test_func = [&](std::size_t ins_index, const std::vector<migraphx::argument>& args) {
(void)ins_index;
(void)args;
};
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
auto p1 = p.add_parameter("x", s1);
auto p2 = p.add_parameter("y", s1);
auto pb = p.add_parameter("b", s2);
auto pc = p.add_parameter("c", s2);
auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
auto ps = p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
p.add_instruction(migraphx::op::dot{}, pa, ps);
return p;
};
auto create_program_op = [&] {
migraphx::program p;
migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
auto p1 = p.add_parameter("x", s1);
auto p2 = p.add_parameter("y", s1);
auto pb = p.add_parameter("b", s2);
auto pc = p.add_parameter("c", s2);
auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
auto opb = p.insert_instruction(std::next(pb), migraphx::op::capture{1, test_func}, pb);
auto opc = p.insert_instruction(std::next(pc), migraphx::op::capture{2, test_func}, pc);
auto opa = p.add_instruction(migraphx::op::capture{0, test_func}, pa);
auto ps = p.add_instruction(migraphx::op::dot{}, opa, opb, opc);
auto ops = p.add_instruction(migraphx::op::capture{3, test_func}, ps);
p.add_instruction(migraphx::op::dot{}, opa, ops);
return p;
};
{
auto p = create_program_float();
auto op_capture_p = create_program_op();
migraphx::target t = migraphx::cpu::target{};
migraphx::capture_arguments(p, t, {"dot", "convolution"});
EXPECT(p == op_capture_p);
}
}
TEST_CASE(dot_float)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb, pc);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfa(sa.elements(), 0.1f);
auto fa = p.add_literal(migraphx::literal(sa, vfa));
auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
auto ra = p.add_instruction(migraphx::op::round{}, ma);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal(sb, vfb));
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
auto alpha_ab = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
std::vector<float> v_beta(pc->get_shape().elements(), 1.5f);
auto beta = p.add_literal(migraphx::literal(pc->get_shape(), v_beta));
auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, pc);
p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{
{0.1f, 0.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
migraphx::run_passes(p, {migraphx::dead_code_elimination{}});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_double_2args)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
p.add_instruction(migraphx::op::dot{2.0f, 1.5f}, pa, pb);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::double_type, {2, 16}};
migraphx::shape sb{migraphx::shape::double_type, {16, 8}};
migraphx::shape sc{migraphx::shape::double_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfa(sa.elements(), 0.1f);
auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
auto ma = p.add_instruction(migraphx::op::mul{}, fa, fpa);
auto ra = p.add_instruction(migraphx::op::round{}, ma);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
auto fpb = p.insert_instruction(
insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, fpb);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
auto fdot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
std::vector<float> v_alpha(fdot->get_shape().elements(), 200.0f);
auto new_alpha = p.add_literal(migraphx::literal(fdot->get_shape(), v_alpha));
auto alpha_ab = p.add_instruction(migraphx::op::mul{}, new_alpha, fdot);
p.add_instruction(migraphx::op::convert{migraphx::shape::double_type}, alpha_ab);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_large_alpha_beta_float)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{20.0f, 50.5f}, pa, pb, pc);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfa(sa.elements(), 0.1f);
auto fa = p.add_literal(migraphx::literal(sa, vfa));
auto ma = p.add_instruction(migraphx::op::mul{}, fa, pa);
// add the shift
std::vector<float> vsa(sa.elements(), 1.0f);
auto sfta = p.add_literal(migraphx::literal(sa, vsa));
auto msa = p.add_instruction(migraphx::op::add{}, sfta, ma);
auto ra = p.add_instruction(migraphx::op::round{}, msa);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal(sb, vfb));
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
// quantize parameter c to int32 type
auto qc = p.insert_instruction(
std::next(pc), migraphx::op::convert{migraphx::shape::int32_type}, pc);
auto qdot = p.add_instruction(migraphx::op::quant_dot{2000, 51}, qa, qb, qc);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{
{0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_large_alpha_beta_int32)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pb, pc);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfa(sa.elements(), 0.1f);
auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
auto ma = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
// add the shift
std::vector<float> vsa(sa.elements(), 1.0f);
auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
auto msa = p.add_instruction(migraphx::op::add{}, sfta, ma);
auto ra = p.add_instruction(migraphx::op::round{}, msa);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
auto conv_b = p.insert_instruction(
insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
p.add_instruction(migraphx::op::quant_dot{2000, 50}, qa, qb, pc);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{
{0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_int32_one_arg)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
auto pa = p.add_parameter("a", s);
p.add_instruction(migraphx::op::dot{20.0f, 50.0f}, pa, pa);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::int32_type, {16, 16}};
auto pa = p.add_parameter("a", s);
// add the shift
auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
std::vector<float> vsa(s.elements(), 1.0f);
auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, s.lens()}, vsa));
auto msa = p.add_instruction(migraphx::op::add{}, sfta, fpa);
auto ra = p.add_instruction(migraphx::op::round{}, msa);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
auto q_dot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qa);
auto f_dot = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_dot);
std::vector<float> v_alpha(f_dot->get_shape().elements(), 20.0f);
auto new_alpha = p.add_literal(migraphx::literal{f_dot->get_shape(), v_alpha});
auto alpha_ab = p.add_instruction(migraphx::op::mul{}, new_alpha, f_dot);
p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, alpha_ab);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{1.0f, 1.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_int32)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, pa, pb, pc);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int32_type, {2, 16}};
migraphx::shape sb{migraphx::shape::int32_type, {16, 8}};
migraphx::shape sc{migraphx::shape::int32_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfa(sa.elements(), 0.1f);
auto fa = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vfa));
auto conv_a = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
auto ma = p.add_instruction(migraphx::op::mul{}, fa, conv_a);
// add the shift
std::vector<float> vsa(sa.elements(), 1.0f);
auto sfta = p.add_literal(migraphx::literal({migraphx::shape::float_type, sa.lens()}, vsa));
auto msa = p.add_instruction(migraphx::op::add{}, sfta, ma);
auto ra = p.add_instruction(migraphx::op::round{}, msa);
auto ca = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, ra);
auto qa = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, ca);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
auto conv_b = p.insert_instruction(
insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pb);
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, conv_b);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, qa, qb);
auto fr = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
std::vector<float> v_alpha(fr->get_shape().elements(), 20.0f);
auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
auto alpha_ab = p.add_instruction(migraphx::op::mul{}, new_alpha, fr);
auto fc = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pc);
std::vector<float> v_beta(fc->get_shape().elements(), 5.5f);
auto beta = p.add_literal(migraphx::literal(fc->get_shape(), v_beta));
auto beta_c = p.add_instruction(migraphx::op::mul{}, beta, fc);
auto f_res = p.add_instruction(migraphx::op::add{}, alpha_ab, beta_c);
p.add_instruction(migraphx::op::convert{migraphx::shape::int32_type}, f_res);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{
{0.1f, 1.0f}, {0.1f, 0.0f}, {0.1f, 100.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(dot_float_convert)
{
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto fpa = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, pa);
p.add_instruction(migraphx::op::dot{2.0f, 5.5f}, fpa, pb);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::int8_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
// quantize parameter b to int8 type
auto insert_loc = std::next(pb);
std::vector<float> vfb(sb.elements(), 0.1f);
auto fb = p.add_literal(migraphx::literal({migraphx::shape::float_type, sb.lens()}, vfb));
auto mb = p.insert_instruction(insert_loc, migraphx::op::mul{}, fb, pb);
auto rb = p.insert_instruction(insert_loc, migraphx::op::round{}, mb);
auto cb = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rb);
auto qb =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cb);
auto qdot = p.add_instruction(migraphx::op::quant_dot{1, 0}, pa, qb);
auto fr = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, qdot);
std::vector<float> v_alpha(fr->get_shape().elements(), 10.0f);
auto new_alpha = p.add_literal(migraphx::literal(fr->get_shape(), v_alpha));
p.add_instruction(migraphx::op::mul{}, new_alpha, fr);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{0.1f, 1.0f}, {0.1f, 0.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"dot"});
migraphx::run_passes(p, {migraphx::dead_code_elimination{}});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(conv_float)
{
auto create_program = [] {
migraphx::program p;
auto input =
p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3}});
p.add_instruction(migraphx::op::convolution{}, input, weights);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sx{migraphx::shape::float_type, {4, 3, 3, 3}};
migraphx::shape sw{migraphx::shape::float_type, {4, 3, 3, 3}};
auto px = p.add_parameter("x", sx);
auto pw = p.add_parameter("w", sw);
// quantize parameter a to int8 type, multiply the scale
std::vector<float> vfx(sx.elements(), 0.1f);
auto fx = p.add_literal(migraphx::literal(sx, vfx));
auto mx = p.add_instruction(migraphx::op::mul{}, fx, px);
auto rx = p.add_instruction(migraphx::op::round{}, mx);
auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
// quantize parameter b to int8 type
auto insert_loc = std::next(pw);
std::vector<float> vfw(sw.elements(), 0.1f);
auto fw = p.add_literal(migraphx::literal(sw, vfw));
auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, pw);
auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
auto qw =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
auto adj = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
p.add_instruction(migraphx::op::mul{}, adj, f_conv);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(conv_int32)
{
auto create_program = [] {
migraphx::program p;
auto input =
p.add_parameter("x", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraphx::shape{migraphx::shape::int32_type, {4, 3, 3, 3}});
p.add_instruction(migraphx::op::convolution{}, input, weights);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sx{migraphx::shape::int32_type, {4, 3, 3, 3}};
migraphx::shape sw{migraphx::shape::int32_type, {4, 3, 3, 3}};
auto px = p.add_parameter("x", sx);
auto pw = p.add_parameter("w", sw);
// quantize parameter a to int8 type, multiply the scale
auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
std::vector<float> vfx(sx.elements(), 0.1f);
auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
auto rx = p.add_instruction(migraphx::op::round{}, mx);
auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
// quantize parameter b to int8 type
auto insert_loc = std::next(pw);
auto fpw = p.insert_instruction(
insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
std::vector<float> vfw(sw.elements(), 0.1f);
auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
auto qw =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
std::vector<float> v_adj(q_conv->get_shape().elements(), 100.0f);
auto adj = p.add_literal(migraphx::literal(q_conv->get_shape(), v_adj));
p.add_instruction(migraphx::op::mul{}, q_conv, adj);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(conv_half)
{
auto create_program = [] {
migraphx::program p;
auto input =
p.add_parameter("x", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraphx::shape{migraphx::shape::half_type, {4, 3, 3, 3}});
p.add_instruction(migraphx::op::convolution{}, input, weights);
return p;
};
auto create_int8_quantized_prog = [] {
migraphx::program p;
migraphx::shape sx{migraphx::shape::half_type, {4, 3, 3, 3}};
migraphx::shape sw{migraphx::shape::half_type, {4, 3, 3, 3}};
auto px = p.add_parameter("x", sx);
auto pw = p.add_parameter("w", sw);
// quantize parameter a to int8 type, multiply the scale
auto fpx = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, px);
std::vector<float> vfx(sx.elements(), 0.1f);
auto fx = p.add_literal(migraphx::literal(fpx->get_shape(), vfx));
auto mx = p.add_instruction(migraphx::op::mul{}, fx, fpx);
auto rx = p.add_instruction(migraphx::op::round{}, mx);
auto cx = p.add_instruction(migraphx::op::clip{127.0f, -128.0f}, rx);
auto qx = p.add_instruction(migraphx::op::convert{migraphx::shape::int8_type}, cx);
// quantize parameter b to int8 type
auto insert_loc = std::next(pw);
auto fpw = p.insert_instruction(
insert_loc, migraphx::op::convert{migraphx::shape::float_type}, pw);
std::vector<float> vfw(sw.elements(), 0.1f);
auto fw = p.add_literal(migraphx::literal(fpw->get_shape(), vfw));
auto mw = p.insert_instruction(insert_loc, migraphx::op::mul{}, fw, fpw);
auto rw = p.insert_instruction(insert_loc, migraphx::op::round{}, mw);
auto cw = p.insert_instruction(insert_loc, migraphx::op::clip{127.0f, -128.0f}, rw);
auto qw =
p.insert_instruction(insert_loc, migraphx::op::convert{migraphx::shape::int8_type}, cw);
auto q_conv = p.add_instruction(migraphx::op::quant_convolution{}, qx, qw);
auto f_conv = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, q_conv);
std::vector<float> v_adj(f_conv->get_shape().elements(), 100.0f);
auto adj = p.add_literal(migraphx::literal(f_conv->get_shape(), v_adj));
auto f_res = p.add_instruction(migraphx::op::mul{}, adj, f_conv);
p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, f_res);
return p;
};
auto p = create_program();
const std::vector<std::pair<float, float>>& quant_params{{0.1f, 0.0f}, {0.1f, 0.0f}};
migraphx::quantize_int8_impl(p, quant_params, {"convolution"});
auto qp = create_int8_quantized_prog();
EXPECT(p == qp);
}
TEST_CASE(target_copy)
{
auto run_prog = [](migraphx::program p,
const migraphx::target& t,
migraphx::program::parameter_map& m_in,
std::vector<float>& res) {
p.compile(t);
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
if(m_in.count(x.first) > 0)
{
m[x.first] = t.copy_to(m_in[x.first]);
}
else
{
m[x.first] = t.allocate(x.second);
}
}
auto result = t.copy_from(p.eval(m));
result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
};
auto create_program = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
p.add_instruction(migraphx::op::add{}, p1, p2);
return p;
};
{
auto p = create_program();
migraphx::program::parameter_map m;
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
m["x"] = migraphx::generate_argument(s);
std::vector<float> cpu_result;
migraphx::target cpu_t = migraphx::cpu::target{};
run_prog(p, cpu_t, m, cpu_result);
std::vector<float> orig_result;
run_prog(p, cpu_t, m, orig_result);
EXPECT(migraphx::verify_range(cpu_result, orig_result));
}
}
TEST_CASE(int8_quantization)
{
auto run_prog = [](migraphx::program p,
const migraphx::target& t,
migraphx::program::parameter_map& m_in,
std::vector<float>& res,
bool b_quantize = false) {
if(b_quantize)
{
std::vector<migraphx::program::parameter_map> cali_data;
cali_data.push_back(m_in);
migraphx::quantize_int8(p, t, cali_data);
}
p.compile(t);
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
if(m_in.count(x.first) > 0)
{
m[x.first] = t.copy_to(m_in[x.first]);
}
else
{
m[x.first] = t.allocate(x.second);
}
}
auto result = t.copy_from(p.eval(m));
result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
};
auto create_program = [] {
migraphx::program p;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
auto pa = p.add_parameter("a", sa);
auto pb = p.add_parameter("b", sb);
auto pc = p.add_parameter("c", sc);
p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
return p;
};
{
auto p = create_program();
migraphx::program::parameter_map m;
migraphx::shape sa{migraphx::shape::float_type, {2, 16}};
migraphx::shape sc{migraphx::shape::float_type, {2, 8}};
m["a"] = migraphx::generate_argument(sa);
m["c"] = migraphx::generate_argument(sc);
std::vector<float> quant_result;
migraphx::target cpu_t = migraphx::cpu::target{};
run_prog(p, cpu_t, m, quant_result, true);
std::vector<float> no_quant_result;
run_prog(p, cpu_t, m, no_quant_result);
EXPECT(migraphx::verify_range(quant_result, no_quant_result));
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
#include <iostream>
#include <vector>
#include <migraphx/literal.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/onnx.hpp>
#include "test.hpp"
#include <migraphx/half.hpp>
TEST_CASE(param_add)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
p.add_instruction(migraphx::op::add{}, p1, p2);
return p;
};
auto create_program_half = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(std::next(p1), migraphx::op::convert{}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(std::next(p2), migraphx::op::convert{}, p2);
auto hs = p.add_instruction(migraphx::op::add{}, hp1, hp2);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize(p1);
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize(p1, {"add"});
EXPECT(p1 == p2);
}
}
TEST_CASE(param_add_sub)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
p.add_instruction(migraphx::op::add{}, diff, p1);
return p;
};
auto create_program_half_add = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(
std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto hsum = p.add_instruction(migraphx::op::add{}, hp1, hp2);
auto sum = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hsum);
auto diff = p.add_instruction(migraphx::op::sub{}, sum, p2);
auto hdiff = p.add_instruction(
migraphx::op::convert{migraphx::op::convert{migraphx::shape::half_type}}, diff);
auto res = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, res);
return p;
};
auto create_program_half_sub = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto sum = p.add_instruction(migraphx::op::add{}, p1, p2);
auto hsum = p.add_instruction(migraphx::op::convert{migraphx::shape::half_type}, sum);
auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
auto diff = p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hdiff);
p.add_instruction(migraphx::op::add{}, diff, p1);
return p;
};
auto create_program_half_all = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
auto p1 = p.add_parameter("x", s);
auto hp1 = p.insert_instruction(
std::next(p1), migraphx::op::convert{migraphx::shape::half_type}, p1);
auto p2 = p.add_parameter("y", s);
auto hp2 = p.insert_instruction(
std::next(p2), migraphx::op::convert{migraphx::shape::half_type}, p2);
auto hsum = p.add_instruction(migraphx::op::add{}, hp1, hp2);
auto hdiff = p.add_instruction(migraphx::op::sub{}, hsum, hp2);
auto hres = p.add_instruction(migraphx::op::add{}, hdiff, hp1);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hres);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half_add();
migraphx::quantize(p1, {"add"});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half_sub();
migraphx::quantize(p1, {"sub"});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half_all();
migraphx::quantize(p1);
migraphx::run_passes(p1, {migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
}
TEST_CASE(literal_add)
{
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
std::vector<float> data(2 * 3);
std::iota(data.begin(), data.end(), 1.0f);
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_literal(migraphx::literal(s, data));
p.add_instruction(migraphx::op::add{}, l1, l2);
return p;
};
auto create_program_half = [] {
migraphx::program p;
migraphx::shape s{migraphx::shape::half_type, {2, 3}};
std::vector<migraphx::half> data(2 * 3);
std::iota(data.begin(), data.end(), 1.0f);
auto l1 = p.add_literal(migraphx::literal(s, data));
auto l2 = p.add_literal(migraphx::literal(s, data));
auto hs = p.add_instruction(migraphx::op::add{}, l1, l2);
p.add_instruction(migraphx::op::convert{migraphx::shape::float_type}, hs);
return p;
};
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize(p1, {"all"});
migraphx::run_passes(p1,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
migraphx::run_passes(p2,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
{
auto p1 = create_program_float();
auto p2 = create_program_half();
migraphx::quantize(p1, {"add"});
migraphx::run_passes(p1,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
migraphx::run_passes(p2,
{migraphx::propagate_constant{}, migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
}
TEST_CASE(op_capture)
{
auto test_func = [&](std::size_t ins_index, const std::vector<migraphx::argument>& args) {
(void)ins_index;
(void)args;
};
auto create_program_float = [] {
migraphx::program p;
migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
auto p1 = p.add_parameter("x", s1);
auto p2 = p.add_parameter("y", s1);
auto pb = p.add_parameter("b", s2);
auto pc = p.add_parameter("c", s2);
auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
auto ps = p.add_instruction(migraphx::op::dot{}, pa, pb, pc);
p.add_instruction(migraphx::op::dot{}, pa, ps);
return p;
};
auto create_program_op = [&] {
migraphx::program p;
migraphx::shape s1{migraphx::shape::float_type, {3, 3}};
migraphx::shape s2{migraphx::shape::float_type, {3, 6}};
auto p1 = p.add_parameter("x", s1);
auto p2 = p.add_parameter("y", s1);
auto pb = p.add_parameter("b", s2);
auto pc = p.add_parameter("c", s2);
auto pa = p.add_instruction(migraphx::op::add{}, p1, p2);
auto opb = p.insert_instruction(std::next(pb), migraphx::op::capture{1, test_func}, pb);
auto opc = p.insert_instruction(std::next(pc), migraphx::op::capture{2, test_func}, pc);
auto opa = p.add_instruction(migraphx::op::capture{0, test_func}, pa);
auto ps = p.add_instruction(migraphx::op::dot{}, opa, opb, opc);
auto ops = p.add_instruction(migraphx::op::capture{3, test_func}, ps);
p.add_instruction(migraphx::op::dot{}, opa, ops);
return p;
};
{
auto p = create_program_float();
auto op_capture_p = create_program_op();
migraphx::capture_arguments(p);
EXPECT(p == op_capture_p);
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -11,6 +11,8 @@
#include <migraphx/context.hpp>
#include <migraphx/pass.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/rank.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -34,15 +36,103 @@ struct target
* @return The context to be used during compilation and execution.
*/
context get_context() const;
/**
* @brief copy an argument to the current target.
*
* @param arg Input argument to be copied to the target
* @return Argument in the target.
*/
argument copy_to(const argument& arg) const;
/**
* @brief copy an argument from the current target.
*
* @param arg Input argument to be copied from the target
* @return Argument in the host.
*/
argument copy_from(const argument& arg) const;
/**
* @brief Allocate an argument based on the input shape
*
* @param s Shape of the argument to be allocated in the target
* @return Allocated argument in the target.
*/
argument allocate(const shape& s) const;
};
#else
template <class T>
auto target_allocate(rank<1>, T& x, const shape& s) -> decltype(x.allocate(s))
{
return x.allocate(s);
}
template <class T>
argument target_allocate(rank<0>, T& x, const shape&)
{
std::string name = x.name();
MIGRAPHX_THROW("Not computable: " + name);
}
template <class T>
argument target_allocate(T& x, const shape& s)
{
return target_allocate(rank<1>{}, x, s);
}
template <class T>
auto copy_to_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_to(arg))
{
return x.copy_to(arg);
}
template <class T>
argument copy_to_target(rank<0>, T&, const argument& arg)
{
return arg;
}
template <class T>
argument copy_to_target(T& x, const argument& arg)
{
return copy_to_target(rank<1>{}, x, arg);
}
template <class T>
auto copy_from_target(rank<1>, T& x, const argument& arg) -> decltype(x.copy_from(arg))
{
return x.copy_from(arg);
}
template <class T>
argument copy_from_target(rank<0>, T&, const argument& arg)
{
return arg;
}
template <class T>
argument copy_from_target(T& x, const argument& arg)
{
return copy_from_target(rank<1>{}, x, arg);
}
<%
interface('target',
virtual('name', returns='std::string', const=True),
virtual('get_passes', ctx='context&', returns='std::vector<pass>', const=True),
virtual('get_context', returns='context', const=True)
virtual('name', returns='std::string', const=True),
virtual('get_passes', ctx='context&', returns='std::vector<pass>', const=True),
virtual('get_context', returns='context', const=True),
virtual('copy_to',
returns = 'argument',
input = 'const argument&',
const = True,
default = 'copy_to_target'),
virtual('copy_from',
returns = 'argument',
input = 'const argument&',
const = True,
default = 'copy_from_target'),
virtual('allocate', s='const shape&', returns='argument', const=True,
default = 'target_allocate')
)
%>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment