Unverified Commit f47e0b5b authored by turneram's avatar turneram Committed by GitHub
Browse files

CK GEMM Int8 Bug Fixes (#2229)

Adds workarounds to avoid passing capture ops and scalar literals from quantization as arguments to ck_gemm.
parent b8b4630b
......@@ -33,6 +33,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK_WORKAROUNDS);
void apply_quantizelinear(module& m, instruction_ref ins)
{
assert(ins->name() == "quantizelinear");
......@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant = qt.max();
min_quant = qt.min();
});
auto s = add_zero_point->get_shape();
auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
auto s = add_zero_point->get_shape();
instruction_ref min_arg;
instruction_ref max_arg;
if(enabled(MIGRAPHX_ENABLE_CK_WORKAROUNDS{}))
{
std::vector<int> min_data(s.elements(), min_quant);
std::vector<int> max_data(s.elements(), max_quant);
min_arg = m.add_literal(literal(s, min_data));
max_arg = m.add_literal(literal(s, max_data));
}
else
{
min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
}
auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
......
......@@ -26,6 +26,7 @@
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/device_name.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
auto m = a.lens()[a.lens().size() - 2];
auto n = b.lens().back();
auto k = a.lens().back();
auto batch_size = std::accumulate(
a.lens().rbegin() + 2, a.lens().rend(), std::size_t{1}, std::multiplies<std::size_t>());
// Integer gemms must be divisible by 4 in ck
if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type()))
{
......@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
if(k % 4 != 0)
return false;
}
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
// to avoid poor-performing GEMM kernels from CK
// To-do: Investigate a more precise strategy
auto device_name = trim(split_string(get_device_name(), ':').front());
if(device_name == "gfx940")
{
if(ins->get_shape().type() == shape::half_type)
{
if(batch_size >= 64)
return m < 2048 or k <= 64 or n <= 384 or n >= 2048;
return true;
}
return true;
}
return k <= 2048;
}
......@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
return not input->inputs().empty() and input->inputs().front()->name() == "capture";
}))
return;
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
return not input->inputs().empty() and input->inputs().front()->name() == "capture";
}))
return;
assert(gemm_it != inputs.end());
if(gemm_idx != 0)
{
......
......@@ -42,11 +42,14 @@
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/unknown.hpp>
#include <migraphx/env.hpp>
#include <migraphx/serialize.hpp>
#include "test.hpp"
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK_WORKAROUNDS);
migraphx::program optimize_onnx(const std::string& name, bool run_passes = false)
{
migraphx::onnx_options options;
......@@ -5540,6 +5543,31 @@ TEST_CASE(qlinearmatmul_2D_test)
EXPECT(p.sort() == prog.sort());
}
migraphx::instruction_ref insert_quantizelinear_clip(migraphx::module& m,
const migraphx::instruction_ref ins,
const migraphx::instruction_ref round,
const migraphx::shape s,
const int64_t min_quant,
const int64_t max_quant)
{
migraphx::instruction_ref min_arg;
migraphx::instruction_ref max_arg;
if(migraphx::enabled(MIGRAPHX_ENABLE_CK_WORKAROUNDS{}))
{
std::vector<int> min_data(s.elements(), min_quant);
std::vector<int> max_data(s.elements(), max_quant);
min_arg = m.add_literal(migraphx::literal(s, min_data));
max_arg = m.add_literal(migraphx::literal(s, max_data));
}
else
{
min_arg = m.add_literal(migraphx::literal{migraphx::shape{s.type()}, {min_quant}});
max_arg = m.add_literal(migraphx::literal{migraphx::shape{s.type()}, {max_quant}});
}
return migraphx::insert_common_op(m, ins, migraphx::make_op("clip"), {round, min_arg, max_arg});
}
TEST_CASE(quantizelinear_test)
{
migraphx::program p;
......@@ -5548,16 +5576,10 @@ TEST_CASE(quantizelinear_test)
auto l1 = mm->add_parameter("1", {migraphx::shape::float_type, {1}});
auto l1_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {5}}}), l1);
auto div = mm->add_instruction(migraphx::make_op("div"), l0, l1_mbcast);
auto round = mm->add_instruction(migraphx::make_op("round"), div);
auto s = round->get_shape();
auto min_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {0}});
auto max_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {255}});
auto min_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto max_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto clip = mm->add_instruction(migraphx::make_op("clip"), round, min_mbcast, max_mbcast);
auto div = mm->add_instruction(migraphx::make_op("div"), l0, l1_mbcast);
auto round = mm->add_instruction(migraphx::make_op("round"), div);
auto s = round->get_shape();
auto clip = insert_quantizelinear_clip(*mm, div, round, s, 0, 255);
mm->add_instruction(
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::uint8_type)}}),
......@@ -5579,16 +5601,10 @@ TEST_CASE(quantizelinear_int32_test)
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::float_type)}}),
l0);
auto div = mm->add_instruction(migraphx::make_op("div"), l0, l1_mbcast);
auto round = mm->add_instruction(migraphx::make_op("round"), div);
auto s = round->get_shape();
auto min_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {0}});
auto max_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {255}});
auto min_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto max_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto clip = mm->add_instruction(migraphx::make_op("clip"), round, min_mbcast, max_mbcast);
auto div = mm->add_instruction(migraphx::make_op("div"), l0, l1_mbcast);
auto round = mm->add_instruction(migraphx::make_op("round"), div);
auto s = round->get_shape();
auto clip = insert_quantizelinear_clip(*mm, div, round, s, 0, 255);
mm->add_instruction(
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::uint8_type)}}),
......@@ -5615,15 +5631,9 @@ TEST_CASE(quantizelinear_zero_point_test)
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::float_type)}}),
l2_mbcast);
auto add = mm->add_instruction(migraphx::make_op("add"), round, l2_mbcast);
auto s = round->get_shape();
auto min_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {-128}});
auto max_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {127}});
auto min_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto max_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto clip = mm->add_instruction(migraphx::make_op("clip"), add, min_mbcast, max_mbcast);
auto add = mm->add_instruction(migraphx::make_op("add"), round, l2_mbcast);
auto s = round->get_shape();
auto clip = insert_quantizelinear_clip(*mm, div, add, s, -128, 127);
mm->add_instruction(
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::int8_type)}}),
......@@ -5654,15 +5664,9 @@ migraphx::program make_quantizelinear_axis_prog()
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::float_type)}}),
l2_bcast);
auto add = mm->add_instruction(migraphx::make_op("add"), round, l2_bcast);
auto s = round->get_shape();
auto min_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {-128}});
auto max_arg = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {127}});
auto min_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto max_mbcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto clip = mm->add_instruction(migraphx::make_op("clip"), add, min_mbcast, max_mbcast);
auto add = mm->add_instruction(migraphx::make_op("add"), round, l2_bcast);
auto s = round->get_shape();
auto clip = insert_quantizelinear_clip(*mm, div, add, s, -128, 127);
mm->add_instruction(
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(migraphx::shape::int8_type)}}),
......
......@@ -31,10 +31,13 @@
#include <migraphx/ranges.hpp>
#include <test.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/env.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/pass_manager.hpp>
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK_WORKAROUNDS);
bool is_quantizelinear(migraphx::instruction& ins) { return ins.name() == "quantizelinear"; }
bool is_dequantizelinear(migraphx::instruction& ins) { return ins.name() == "dequantizelinear"; }
bool is_clip_scalar(migraphx::instruction& ins)
......@@ -82,7 +85,11 @@ TEST_CASE(quantizelinear)
EXPECT(any_of(*p1.get_main_module(), &is_quantizelinear));
EXPECT(none_of(*p2.get_main_module(), &is_quantizelinear));
// ensure clip literals created in quantized program are scalar
EXPECT(any_of(*p2.get_main_module(), &is_clip_scalar));
// unless CK workarounds are enabled
if(migraphx::enabled(MIGRAPHX_ENABLE_CK_WORKAROUNDS{}))
EXPECT(none_of(*p2.get_main_module(), &is_clip_scalar));
else
EXPECT(any_of(*p2.get_main_module(), &is_clip_scalar));
}
TEST_CASE(dequantizelinear)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment