Commit cb555646 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

merge changes from int8_quantize

parents 12ccb601 4a10535c
...@@ -36,7 +36,7 @@ struct argument : raw_data<argument> ...@@ -36,7 +36,7 @@ struct argument : raw_data<argument>
} }
/// Provides a raw pointer to the data /// Provides a raw pointer to the data
std::function<char*()> data; std::function<char*()> data = nullptr;
/// Whether data is available /// Whether data is available
bool empty() const { return not data; } bool empty() const { return not data; }
......
...@@ -28,23 +28,31 @@ struct binary : op_name<Derived> ...@@ -28,23 +28,31 @@ struct binary : op_name<Derived>
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) { auto s1 = args[0].get_shape();
if(input1.get_shape().packed() and input2.get_shape().packed()) auto s2 = args[1].get_shape();
{ if(s1 == s2 and s1.packed())
{
shape std_shape{s1.type(), s1.lens()};
argument std_result{std_shape, result.data()};
argument std_arg0{std_shape, args[0].data()};
argument std_arg1{std_shape, args[1].data()};
visit_all(std_result, std_arg0, std_arg1)([&](auto output, auto input1, auto input2) {
std::transform(input1.begin(), std::transform(input1.begin(),
input1.end(), input1.end(),
input2.begin(), input2.begin(),
output.begin(), output.begin(),
static_cast<const Derived&>(*this).apply()); static_cast<const Derived&>(*this).apply());
} });
else }
{ else
{
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
shape_for_each(output.get_shape(), [&](const auto& idx) { shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = static_cast<const Derived&>(*this).apply()( output(idx.begin(), idx.end()) = static_cast<const Derived&>(*this).apply()(
input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end())); input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
}); });
} });
}); }
return result; return result;
} }
......
...@@ -23,7 +23,7 @@ struct capture ...@@ -23,7 +23,7 @@ struct capture
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack(f(self.ins_index, "instruction_index")); return pack(f(self.ins_index, "ins_index"));
} }
std::string name() const { return "capture"; } std::string name() const { return "capture"; }
......
...@@ -20,11 +20,14 @@ namespace op { ...@@ -20,11 +20,14 @@ namespace op {
struct convert : unary<convert> struct convert : unary<convert>
{ {
shape::type_t target_type = shape::half_type; shape::type_t target_type = shape::half_type;
float scale = 1.0f;
float shift = 0.0f;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack(f(self.target_type, "target_type")); return pack(
f(self.target_type, "target_type"), f(self.scale, "scale"), f(self.shift, "shift"));
} }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
...@@ -35,10 +38,22 @@ struct convert : unary<convert> ...@@ -35,10 +38,22 @@ struct convert : unary<convert>
auto apply() const auto apply() const
{ {
return [](auto x) { return x; }; return [&](auto x) {
float res = scale * x + shift;
if(target_type == shape::int8_type)
{
int factor = (res > 0) ? 1 : -1;
res = res + factor * 0.5f;
res = res > 127.0 ? 127.0 : res;
res = res < -128.0 ? -128.0 : res;
}
return res;
};
} }
convert(shape::type_t t) : target_type{t} {} convert(shape::type_t t) : target_type{t} {}
convert(shape::type_t t, float sle, float sft) : target_type{t}, scale{sle}, shift{sft} {}
convert() {} convert() {}
}; };
......
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#include <array>
#include <migraphx/op/common.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct quant_convolution
{
std::array<std::size_t, 2> padding = {{0, 0}};
std::array<std::size_t, 2> stride = {{1, 1}};
std::array<std::size_t, 2> dilation = {{1, 1}};
padding_mode_t padding_mode = default_;
int group = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.padding, "padding"),
f(self.stride, "stride"),
f(self.dilation, "dilation"),
f(self.padding_mode, "padding_mode"),
f(self.group, "group"));
}
std::string name() const { return "quant_convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).same_type().same_ndims().only_dims(4);
const shape& input = inputs.at(0);
const shape& weights = inputs.at(1);
auto t = input.type();
// all input type must be int8_type and output is float_type
if(t != shape::int8_type)
{
MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t");
}
t = shape::float_type;
if(padding_mode == default_)
{
return {t,
{
input.lens()[0],
weights.lens()[0],
std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
2 * padding[0]) /
stride[0] +
1)),
std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
2 * padding[1]) /
stride[1] +
1)),
}};
}
else if(padding_mode == same)
{
return {t,
{input.lens()[0],
weights.lens()[0],
static_cast<std::size_t>(
std::ceil(static_cast<double>(input.lens()[2]) / stride[0])),
static_cast<std::size_t>(
std::ceil(static_cast<double>(input.lens()[3]) / stride[1]))}};
}
else if(padding_mode == valid)
{
return {
t,
{input.lens()[0],
weights.lens()[0],
static_cast<std::size_t>(std::ceil(
static_cast<double>(input.lens()[2] - weights.lens()[2] + 1) / stride[0])),
static_cast<std::size_t>(std::ceil(
static_cast<double>(input.lens()[3] - weights.lens()[3] + 1) / stride[1]))}};
}
else
{
MIGRAPHX_THROW("QUANT_CONVOLUTION: invalid padding mode");
}
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#include <array>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct quant_dot
{
int32_t alpha = 1;
int32_t beta = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(as_number(self.alpha), "alpha"), f(as_number(self.beta), "beta"));
}
std::string name() const { return "quant_dot"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type();
const shape& a = inputs.at(0);
const shape& b = inputs.at(1);
auto t = a.type();
if(t != shape::int8_type)
{
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t");
}
if(!std::all_of(inputs.begin(), inputs.end(), [](auto s) { return s.lens().size() >= 2; }))
{
MIGRAPHX_THROW("QUANT_DOT: dot only accept 2 or more dims operands");
}
// only handle the case that the batch size of a and b are the same
if(!std::equal(
a.lens().rbegin() + 2, a.lens().rend(), b.lens().rbegin() + 2, b.lens().rend()))
{
MIGRAPHX_THROW("QUANT_DOT: batch size of A and B mismatch: {" +
to_string_range(a.lens()) + "} x {" + to_string_range(b.lens()) + "}");
}
std::size_t dim_0 = a.lens().size() - 2;
std::size_t dim_1 = a.lens().size() - 1;
if(a.lens()[dim_1] != b.lens()[dim_0])
{
MIGRAPHX_THROW("QUANT_DOT: inner dimensions do not match: {" +
to_string_range(a.lens()) + "} x {" + to_string_range(b.lens()) + "}");
}
// k be multiple of 4
if((a.lens()[dim_1] % 4) != 0)
{
MIGRAPHX_THROW("QUANT_DOT: size of A {" + to_string_range(a.lens()) + "} and B {" +
to_string_range(b.lens()) + "} must be multiple of 4 for int8 type");
}
auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("QUANT_DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
if(inputs.size() == 3 && inputs.at(2).type() != shape::int32_type)
{
MIGRAPHX_THROW("QUANT_DOT: operand C type must be int32");
}
return {shape::int32_type, out_lens};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -27,26 +27,34 @@ struct unary : op_name<Derived> ...@@ -27,26 +27,34 @@ struct unary : op_name<Derived>
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
result.visit([&](auto output) { auto in_shape = args[0].get_shape();
args[0].visit([&](auto input) { if(in_shape.packed())
if(input.get_shape().packed()) {
{ shape std_in_shape{in_shape.type(), in_shape.lens()};
shape std_out_shape{output_shape.type(), output_shape.lens()};
argument arg_in{std_in_shape, args[0].data()};
argument arg_out{std_out_shape, result.data()};
arg_out.visit([&](auto output) {
arg_in.visit([&](auto input) {
std::transform(input.begin(), std::transform(input.begin(),
input.end(), input.end(),
output.begin(), output.begin(),
static_cast<const Derived&>(*this).apply()); static_cast<const Derived&>(*this).apply());
return result;
}
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
static_cast<const Derived&>(*this).apply()(input(idx.begin(), idx.end()));
}); });
return result;
}); });
}); }
else
{
result.visit([&](auto output) {
args[0].visit([&](auto input) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = static_cast<const Derived&>(*this).apply()(
input(idx.begin(), idx.end()));
});
});
});
}
return result; return result;
} }
......
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include <migraphx/op/outline.hpp> #include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp> #include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp> #include <migraphx/op/pooling.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/relu.hpp> #include <migraphx/op/relu.hpp>
#include <migraphx/op/reshape.hpp> #include <migraphx/op/reshape.hpp>
#include <migraphx/op/rnn.hpp> #include <migraphx/op/rnn.hpp>
......
...@@ -21,6 +21,9 @@ void capture_arguments(program& prog, ...@@ -21,6 +21,9 @@ void capture_arguments(program& prog,
const std::vector<std::string>& ins_names, const std::vector<std::string>& ins_names,
std::size_t& num_quant_params, std::size_t& num_quant_params,
std::function<void(std::size_t, std::vector<argument> args)> func); std::function<void(std::size_t, std::vector<argument> args)> func);
void quantize_int8(program& prog,
const std::vector<std::string>& ins_names,
std::vector<std::pair<float, float>>& int8_quant_params);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -186,6 +186,12 @@ PYBIND11_MODULE(migraphx, m) ...@@ -186,6 +186,12 @@ PYBIND11_MODULE(migraphx, m)
migraphx::quantize(p, ins_names); migraphx::quantize(p, ins_names);
}); });
m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); }); m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); });
m.def("quantize_int8",
[](migraphx::program& p,
std::vector<std::string>& ins_names,
std::vector<std::pair<float, float>>& quant_params) {
migraphx::quantize_int8(p, ins_names, quant_params);
});
#ifdef HAVE_GPU #ifdef HAVE_GPU
m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false); m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);
......
...@@ -2,12 +2,15 @@ ...@@ -2,12 +2,15 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/dot.hpp> #include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp> #include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp> #include <migraphx/op/add.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp> #include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <utility> #include <utility>
...@@ -113,6 +116,305 @@ void quantize(program& prog, const std::vector<std::string>& ins_names) ...@@ -113,6 +116,305 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
void quantize(program& prog) { quantize(prog, {"all"}); } void quantize(program& prog) { quantize(prog, {"all"}); }
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void quantize_int8(program& prog,
const std::vector<std::string>& ins_names,
std::vector<std::pair<float, float>>& int8_quant_params)
{
// // For debugging
// auto print_gemm_res = [&](std::size_t ins_index, std::vector<migraphx::argument> args) {
// // scale and shift is need for only int8 type, and we do not
// // consider shift, so set shift to 0
// std::vector<float> vec_val;
// args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
// std::cout << "quant_gemm = " << std::endl;
// for (size_t i = 0; i < 20; i++)
// {
// std::cout << vec_val[i] << "\t";
// }
// std::cout << std::endl;
// };
// // For debugging
// auto print_conv_res = [&](std::size_t ins_index, std::vector<migraphx::argument> args) {
// // scale and shift is need for only int8 type, and we do not
// // consider shift, so set shift to 0
// std::vector<float> vec_val;
// args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
// std::cout << "quant_conv = " << std::endl;
// for (size_t i = 0; i < 20; i++)
// {
// std::cout << vec_val[i] << "\t";
// }
// std::cout << std::endl;
// };
// For now, we only support the int8 quantization of gemm and convolution
std::vector<std::string> op_names = {"dot", "convolution"};
if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
return (std::find(op_names.begin(), op_names.end(), name) != op_names.end());
}))
{
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
}
std::size_t quant_param_index = 0;
std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
for(auto ins : iterator_for(prog))
{
if(not contains(ins_names, ins->name()))
{
continue;
}
shape::type_t orig_type = ins->get_shape().type();
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std::vector<instruction_ref> converted_inputs;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto inputs = ins->inputs();
std::vector<std::pair<float, float>> ins_quant_params;
for(auto input : inputs)
{
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape::type_t quant_type = shape::int8_type;
auto param = int8_quant_params[quant_param_index++];
ins_quant_params.push_back(param);
if(ins->name() == "dot" and inputs.size() == 3 and input == inputs.back())
{
quant_type = shape::int32_type;
}
auto s = input->get_shape();
if((s.type() == shape::float_type || s.type() == shape::double_type ||
s.type() == shape::int32_type) &&
s.type() != quant_type)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref quant_input{};
if(input->name() == "convert")
{
auto tmp_ins = input->inputs().front();
if(tmp_ins->get_shape().type() == quant_type)
{
quant_input = input->inputs().front();
}
else
{
quant_input = insert_quant_ins(
prog, input, quant_type, map_quant_ins, param.first, param.second);
}
}
else
{
quant_input = insert_quant_ins(
prog, input, quant_type, map_quant_ins, param.first, param.second);
}
converted_inputs.push_back(quant_input);
}
else
{
converted_inputs.push_back(input);
}
}
// no change for the input, go to the next instruction
if(inputs == converted_inputs)
{
continue;
}
// When converting from other types to int8_type, there are parameters
// used as scale and shift(.0f), which will generate results diffrent from
// the original results. To adjust the output to be "correct(approximatly
// equal)", we need additional calculation for the adjustment
if(ins->name() == "dot")
{
auto dot_op = any_cast<op::dot>(ins->get_operator());
float new_alpha =
dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
float new_beta = dot_op.beta;
// We need additional checking about the quant_alpha value. If
// abs(quant_alpha) > 50 (some tmp value set here), we can convert
// it to an integer as the new_alpha in the quant_dot
float threshold = 50.0f;
if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
{
int32_t quant_alpha = static_cast<int32_t>(new_alpha);
int32_t quant_beta = static_cast<int32_t>(new_beta);
shape quant_shape = compute_shape(op::quant_dot{1, 0}, converted_inputs);
if(quant_shape.type() == orig_type)
{
prog.replace_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
}
else
{
auto quant_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
}
}
// only alpha can be quantized, quantization of beta will cause
// big error, so we have to manually do the multiplication and
// addition
else if(fabs(new_alpha) >= threshold)
{
int32_t quant_alpha = static_cast<int32_t>(new_alpha);
int32_t quant_beta = 0;
if(orig_type == shape::int32_type)
{
if(inputs.size() == 2 or dot_op.beta == 0.0f)
{
prog.replace_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
auto l_beta = prog.add_literal(literal{orig_type, vec_beta});
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
}
}
else
{
if(inputs.size() == 2 or dot_op.beta == 0.0f)
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
prog.replace_instruction(ins, op::convert{orig_type}, q_dot);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
auto l_beta = prog.add_literal(literal{oq_dot->get_shape(), vec_beta});
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
}
}
}
else
{
auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
std::vector<float> vec_alpha(q_dot->get_shape().elements(), new_alpha);
if(orig_type == shape::int32_type)
{
auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
{
prog.replace_instruction(ins, op::mul{}, l_alpha, q_dot);
}
// case of 3 arguments
else
{
std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
auto l_beta = prog.add_literal(literal(ins->get_shape(), vec_beta));
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, q_dot);
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
}
}
else
{
auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
{
prog.replace_instruction(ins, op::mul{}, l_alpha, oq_dot);
}
// case of 3 arguments
else
{
std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
auto l_beta = prog.add_literal(literal(ins->get_shape(), vec_beta));
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, oq_dot);
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
}
}
}
}
else if(ins->name() == "convolution")
{
// Current MIOpen convolution does not support alpha and beta,
// so we need a separate multiply to adjust the output
auto conv_op = any_cast<op::convolution>(ins->get_operator());
auto padding = conv_op.padding;
auto stride = conv_op.stride;
auto dilation = conv_op.dilation;
auto padding_mode = conv_op.padding_mode;
auto group = conv_op.group;
auto adjust_factor = 1.0 / (ins_quant_params[0].first * ins_quant_params[1].first);
shape quant_shape =
compute_shape(op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
std::vector<float> vec_factor(quant_shape.elements(), adjust_factor);
auto fl = prog.add_literal(literal{{orig_type, quant_shape.lens()}, vec_factor});
if(quant_shape.type() == orig_type)
{
if(adjust_factor == 1.0f)
{
prog.replace_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
}
else
{
auto quant_conv = prog.insert_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
prog.replace_instruction(ins, op::mul{}, quant_conv, fl);
}
}
else
{
auto quant_conv = prog.insert_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
if(adjust_factor == 1.0f)
{
prog.replace_instruction(ins, op::convert{orig_type}, quant_conv);
}
else
{
auto oq_conv = prog.insert_instruction(ins, op::convert{orig_type}, quant_conv);
prog.replace_instruction(ins, op::mul{}, oq_conv, fl);
}
}
}
else
{
MIGRAPHX_THROW("INT8_QUANTIZE: does not support operator" + ins->name());
}
}
}
// For the input of each input argument, we need to insert a // For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift // capture operator to compute the scale and shift
void capture_arguments(program& prog, void capture_arguments(program& prog,
......
...@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type ...@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
{ {
}; };
template <class T> template <class T, class F>
void migemm_impl(tensor_view<T> cmat, void migemm_impl(
tensor_view<T> amat, tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::true_type)
tensor_view<T> bmat,
float alpha,
float beta,
std::true_type)
{ {
visit_mat(amat, [&](const auto& a) { visit_mat(amat, [&](const auto& a) {
visit_mat(bmat, [&](const auto& b) { visit_mat(bmat, [&](const auto& b) {
...@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat, ...@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
}); });
} }
template <class T> template <class T, class F>
void migemm_impl(tensor_view<T> cmat, void migemm_impl(
tensor_view<T> amat, tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::false_type)
tensor_view<T> bmat,
float alpha,
float beta,
std::false_type)
{ {
std::size_t n_dims = cmat.get_shape().lens().size(); std::size_t n_dims = cmat.get_shape().lens().size();
std::size_t dim_0 = n_dims - 2; std::size_t dim_0 = n_dims - 2;
...@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat, ...@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
}); });
} }
template <class T> template <class T, class F>
void migemm_impl( void migemm_impl(tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta)
tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, float alpha, float beta)
{ {
auto lens = amat.get_shape().lens(); auto lens = amat.get_shape().lens();
bool batch_mul = bool batch_mul =
...@@ -113,13 +104,29 @@ void migemm_impl( ...@@ -113,13 +104,29 @@ void migemm_impl(
} }
} }
void migemm( template <class F>
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta) void migemm_tpl(
const argument& c_arg, const argument& a_arg, const argument& b_arg, F alpha, F beta)
{ {
visit_all(c_arg, a_arg, b_arg)( visit_all(c_arg, a_arg, b_arg)(
[&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); }); [&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); });
} }
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
} // namespace cpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -10,6 +10,11 @@ namespace cpu { ...@@ -10,6 +10,11 @@ namespace cpu {
void migemm( void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta); const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta);
} // namespace cpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -204,6 +204,61 @@ struct cpu_convolution ...@@ -204,6 +204,61 @@ struct cpu_convolution
} }
}; };
struct cpu_quant_convolution
{
op::quant_convolution op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::quant_convolution"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto output) {
visit_all(args[0], args[1])([&](auto input, auto weights) {
auto in = input.get_shape().lens();
auto in_h = in[2];
auto in_w = in[3];
auto wei = weights.get_shape().lens();
auto wei_n = wei[0];
auto wei_c = wei[1];
auto wei_h = wei[2];
auto wei_w = wei[3];
par_dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group);
float acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
const int in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
});
});
});
return result;
}
};
struct cpu_im2col struct cpu_im2col
{ {
op::im2col op; op::im2col op;
...@@ -421,7 +476,7 @@ struct cpu_gemm ...@@ -421,7 +476,7 @@ struct cpu_gemm
{ {
argument result{output_shape}; argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then // 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is broadcastable to A * B // A and B are matrices, and C is of the same shape as A * B
if(args.size() == 3) if(args.size() == 3)
{ {
// no need to consider the value of args[2] // no need to consider the value of args[2]
...@@ -448,6 +503,73 @@ struct cpu_gemm ...@@ -448,6 +503,73 @@ struct cpu_gemm
} }
}; };
struct cpu_quant_gemm
{
op::quant_dot op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::quant_dot"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
if(inputs.size() == 3)
{
auto c_shape = inputs.at(2);
check_shapes{{c_shape}}.not_broadcasted();
}
return op.compute_shape(inputs);
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument arg_0{{shape::int32_type, {args.at(0).get_shape().lens()}}};
argument arg_1{{shape::int32_type, {args.at(1).get_shape().lens()}}};
arg_0.visit([&](auto output) {
args.at(0).visit(
[&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
});
arg_1.visit([&](auto output) {
args.at(1).visit(
[&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
});
if(args.size() == 3)
{
// no need to consider the value of args[2]
if(op.beta == 0)
{
result.visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
}
else
{
visit_all(result, args[2])([&](auto output, auto input) {
std::copy(input.begin(), input.end(), output.begin());
});
}
migemm(result, arg_0, arg_1, op.alpha, op.beta);
return result;
}
// 2 input arguments
int32_t beta = 0;
migemm(result, arg_0, arg_1, op.alpha, beta);
return result;
}
};
struct leaky_relu_op struct leaky_relu_op
{ {
op::leaky_relu op; op::leaky_relu op;
...@@ -652,15 +774,17 @@ struct cpu_apply ...@@ -652,15 +774,17 @@ struct cpu_apply
{ {
apply_map["batch_norm_inference"] = apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>(); extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>(); apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>(); apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>(); apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>();
apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>(); apply_map["quant_convolution"] = extend_op<cpu_quant_convolution, op::quant_convolution>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>(); apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>(); apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>(); apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>(); apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
apply_map["softmax"] = simple_op<softmax2d>(); apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["softmax"] = simple_op<softmax2d>();
} }
void apply() void apply()
......
...@@ -33,6 +33,7 @@ add_library(migraphx_device ...@@ -33,6 +33,7 @@ add_library(migraphx_device
device/pad.cpp device/pad.cpp
device/gather.cpp device/gather.cpp
device/sub.cpp device/sub.cpp
device/pack.cpp
device/clip.cpp device/clip.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
...@@ -48,8 +49,10 @@ add_library(migraphx_gpu ...@@ -48,8 +49,10 @@ add_library(migraphx_gpu
target.cpp target.cpp
lowering.cpp lowering.cpp
gemm.cpp gemm.cpp
quant_gemm.cpp
pooling.cpp pooling.cpp
convolution.cpp convolution.cpp
quant_convolution.cpp
softmax.cpp softmax.cpp
logsoftmax.cpp logsoftmax.cpp
contiguous.cpp contiguous.cpp
...@@ -65,6 +68,7 @@ add_library(migraphx_gpu ...@@ -65,6 +68,7 @@ add_library(migraphx_gpu
elu.cpp elu.cpp
pad.cpp pad.cpp
gather.cpp gather.cpp
convert.cpp
lrn.cpp lrn.cpp
schedule_model.cpp schedule_model.cpp
adjust_allocation.cpp adjust_allocation.cpp
......
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_convert::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
check_shapes{inputs}.packed();
return op.compute_shape(inputs);
}
argument hip_convert::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::convert(ctx.get_stream().get(), args[1], args[0], op.scale, op.shift, op.target_type);
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -6,14 +6,31 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -6,14 +6,31 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
void convert(hipStream_t stream, const argument& result, const argument& arg) void convert(hipStream_t stream,
const argument& result,
const argument& arg,
float scale,
float shift,
shape::type_t target_type)
{ {
result.visit([&](auto output) { result.visit([&](auto output) {
arg.visit([&](auto input) { arg.visit([&](auto input) {
const auto* input_ptr = device_cast(input.data()); const auto* input_ptr = device_cast(input.data());
auto* output_ptr = device_cast(output.data()); auto* output_ptr = device_cast(output.data());
gs_launch(stream, if(target_type == shape::int8_type)
result.get_shape().elements())([=](auto i) { output_ptr[i] = input_ptr[i]; }); {
gs_launch(stream, result.get_shape().elements())([=](auto i) {
float res = input_ptr[i] * scale + shift;
int factor = (res > 0) ? 1 : -1;
output_ptr[i] =
std::min<int8_t>(std::max<float>(-128, res + factor * 0.5), 127);
});
}
else
{
gs_launch(stream, result.get_shape().elements())(
[=](auto i) { output_ptr[i] = input_ptr[i] * scale + shift; });
}
}); });
}); });
} }
......
...@@ -67,13 +67,16 @@ struct hip_tensor_descriptor ...@@ -67,13 +67,16 @@ struct hip_tensor_descriptor
{ {
hip_index<NDim> result{}; hip_index<NDim> result{};
size_t tidx = idx; size_t tidx = idx;
for(size_t is = 0; is < NDim; is++) for(size_t is = 0; is < NDim; is++)
{ {
result[is] = tidx / strides[is]; result[is] = tidx / strides[is];
tidx = tidx % strides[is]; tidx = tidx % strides[is];
} }
return result; return result;
} }
__device__ __host__ size_t linear(hip_index<NDim> s) const __device__ __host__ size_t linear(hip_index<NDim> s) const
{ {
size_t idx = 0; size_t idx = 0;
......
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/pack.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void pack_a(hipStream_t stream, const argument& result, const argument& arg)
{
auto output_shape = result.get_shape();
auto out_lens = output_shape.lens();
auto dim_0 = out_lens.size() - 2;
auto dim_1 = out_lens.size() - 1;
std::size_t lda = output_shape.strides()[dim_0];
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape);
gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_m = idx[dim_1];
std::size_t i_k = idx[dim_0];
std::size_t offset = ii / m_size * m_size;
out_ptr[i_k % nb + (i_m + (i_k / nb) * lda) * nb + offset] =
in_ptr[i_m + i_k * lda + offset];
});
});
});
}
void pack_b(hipStream_t stream, const argument& result, const argument& arg)
{
auto trans_shape = result.get_shape();
auto out_lens = trans_shape.lens();
auto dim_0 = trans_shape.lens().size() - 2;
auto dim_1 = trans_shape.lens().size() - 1;
std::size_t ldb = trans_shape.strides()[dim_1];
auto wrap_lens = out_lens;
std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
shape output_shape{trans_shape.type(), wrap_lens};
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape);
gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_n = idx[dim_1];
std::size_t i_k = idx[dim_0];
std::size_t offset = ii / m_size * m_size;
out_ptr[i_k % nb + (i_n + (i_k / nb) * ldb) * nb + offset] =
in_ptr[i_n + i_k * ldb + offset];
});
});
});
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment