Commit 6e01c7cb authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'int8_miopen_call' into int8_quantize

parents 13dfa914 ee157fc2
...@@ -82,6 +82,8 @@ std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed ...@@ -82,6 +82,8 @@ std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed
{ {
std::vector<T> result(s.elements()); std::vector<T> result(s.elements());
std::generate(result.begin(), result.end(), xorshf96_generator<T>{seed}); std::generate(result.begin(), result.end(), xorshf96_generator<T>{seed});
// divide a value to avoid integer overflow
std::transform(result.begin(), result.end(), result.begin(), [](auto i) { return i / 32; });
// std::generate(result.begin(), result.end(), [&]{ return seed % 7; }); // std::generate(result.begin(), result.end(), [&]{ return seed % 7; });
// std::generate(result.begin(), result.end(), []{ return 1; }); // std::generate(result.begin(), result.end(), []{ return 1; });
return result; return result;
......
...@@ -50,7 +50,7 @@ struct quant_convolution ...@@ -50,7 +50,7 @@ struct quant_convolution
{ {
MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t"); MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t");
} }
t = shape::float_type; t = shape::int32_type;
if(padding_mode == default_) if(padding_mode == default_)
{ {
......
...@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval) ...@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
offset += (element_size - (offset % element_size)); offset += (element_size - (offset % element_size));
conflict_queue.pop(); conflict_queue.pop();
} }
// when int8 type is used, the offset could be any number
// if not 4-byte aligned, miopen int8 convolution can crash
offset = (offset + 3) / 4 * 4;
segment.offset = offset; segment.offset = offset;
MIGRAPHX_DEBUG(segment.dump()); MIGRAPHX_DEBUG(segment.dump());
required_bytes = std::max(required_bytes, offset + segment.size); required_bytes = std::max(required_bytes, offset + segment.size);
......
...@@ -219,40 +219,39 @@ struct cpu_quant_convolution ...@@ -219,40 +219,39 @@ struct cpu_quant_convolution
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
result.visit([&](auto output) { auto output = result.get<int32_t>();
visit_all(args[0], args[1])([&](auto input, auto weights) { visit_all(args[0], args[1])([&](auto input, auto weights) {
auto in = input.get_shape().lens(); auto in = input.get_shape().lens();
auto in_h = in[2]; auto in_h = in[2];
auto in_w = in[3]; auto in_w = in[3];
auto wei = weights.get_shape().lens(); auto wei = weights.get_shape().lens();
auto wei_n = wei[0]; auto wei_n = wei[0];
auto wei_c = wei[1]; auto wei_c = wei[1];
auto wei_h = wei[2]; auto wei_h = wei[2];
auto wei_w = wei[3]; auto wei_w = wei[3];
par_dfor(output_shape.lens()[0], par_dfor(output_shape.lens()[0],
output_shape.lens()[1], output_shape.lens()[1],
output_shape.lens()[2], output_shape.lens()[2],
output_shape.lens()[3])( output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) { [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0]; const auto start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1]; const auto start_y = j * op.stride[1] - op.padding[1];
const int group_id = w / (wei_n / op.group); const auto group_id = w / (wei_n / op.group);
float acc = 0; int32_t acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) { dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x; const auto in_x = start_x + x;
const int in_y = start_y + y; const auto in_y = start_y + y;
const int in_ch = group_id * wei_c + k; const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w) if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{ {
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y); acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
} }
});
output(o, w, i, j) = acc;
}); });
}); output(o, w, i, j) = acc;
});
}); });
return result; return result;
......
...@@ -13,18 +13,18 @@ namespace device { ...@@ -13,18 +13,18 @@ namespace device {
void pack_a(hipStream_t stream, const argument& result, const argument& arg) void pack_a(hipStream_t stream, const argument& result, const argument& arg)
{ {
auto output_shape = result.get_shape(); auto comp_shape = arg.get_shape();
auto out_lens = output_shape.lens(); auto out_lens = comp_shape.lens();
auto dim_0 = out_lens.size() - 2; auto dim_0 = out_lens.size() - 2;
auto dim_1 = out_lens.size() - 1; auto dim_1 = out_lens.size() - 1;
std::size_t lda = output_shape.strides()[dim_0]; std::size_t lda = comp_shape.strides()[dim_0];
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
...@@ -40,7 +40,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg) ...@@ -40,7 +40,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg)
void pack_b(hipStream_t stream, const argument& result, const argument& arg) void pack_b(hipStream_t stream, const argument& result, const argument& arg)
{ {
auto trans_shape = result.get_shape(); auto trans_shape = arg.get_shape();
auto out_lens = trans_shape.lens(); auto out_lens = trans_shape.lens();
auto dim_0 = trans_shape.lens().size() - 2; auto dim_0 = trans_shape.lens().size() - 2;
auto dim_1 = trans_shape.lens().size() - 1; auto dim_1 = trans_shape.lens().size() - 1;
...@@ -48,14 +48,14 @@ void pack_b(hipStream_t stream, const argument& result, const argument& arg) ...@@ -48,14 +48,14 @@ void pack_b(hipStream_t stream, const argument& result, const argument& arg)
auto wrap_lens = out_lens; auto wrap_lens = out_lens;
std::swap(wrap_lens[dim_0], wrap_lens[dim_1]); std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
shape output_shape{trans_shape.type(), wrap_lens}; shape comp_shape{trans_shape.type(), wrap_lens};
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
......
...@@ -33,7 +33,10 @@ struct miopen_quant_convolution ...@@ -33,7 +33,10 @@ struct miopen_quant_convolution
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs); shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
private: private:
shape pack_int8_shape(shape& s); shape pack_int8_shape(shape& s);
......
...@@ -13,8 +13,6 @@ struct context; ...@@ -13,8 +13,6 @@ struct context;
struct miopen_quant_gemm struct miopen_quant_gemm
{ {
op::quant_dot op; op::quant_dot op;
mutable argument arg_a{};
mutable argument arg_b{};
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
......
...@@ -99,7 +99,6 @@ struct miopen_apply ...@@ -99,7 +99,6 @@ struct miopen_apply
add_generic_op<hip_min>("min"); add_generic_op<hip_min>("min");
add_extend_op<miopen_gemm, op::dot>("dot"); add_extend_op<miopen_gemm, op::dot>("dot");
add_extend_op<miopen_quant_gemm, op::quant_dot>("quant_dot");
add_extend_op<miopen_contiguous, op::contiguous>("contiguous"); add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
add_extend_op<hip_concat, op::concat>("concat"); add_extend_op<hip_concat, op::concat>("concat");
add_extend_op<miopen_softmax, op::softmax>("softmax"); add_extend_op<miopen_softmax, op::softmax>("softmax");
...@@ -112,6 +111,7 @@ struct miopen_apply ...@@ -112,6 +111,7 @@ struct miopen_apply
add_lrn_op(); add_lrn_op();
add_convolution_op(); add_convolution_op();
add_quant_convolution_op(); add_quant_convolution_op();
add_quant_dot_op();
add_pooling_op(); add_pooling_op();
add_batch_norm_inference_op(); add_batch_norm_inference_op();
} }
...@@ -167,10 +167,31 @@ struct miopen_apply ...@@ -167,10 +167,31 @@ struct miopen_apply
auto ws = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs())); auto ws = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws, "workspace"); auto workspace = insert_allocation(ins, ws, "workspace");
auto output = insert_allocation(ins, ins->get_shape());
// add a temp float output to store the miopen convolution output
shape tmp_output_shape{shape::float_type, ins->get_shape().lens()};
auto tmp_output = insert_allocation(ins, tmp_output_shape, "tmp_out");
auto output = insert_allocation(ins, ins->get_shape());
return prog->replace_instruction( return prog->replace_instruction(
ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output); ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, tmp_output, output);
});
}
void add_quant_dot_op()
{
apply_map.emplace("quant_dot", [=](instruction_ref ins) {
auto&& op = any_cast<op::quant_dot>(ins->get_operator());
auto inputs = ins->inputs();
auto in_shapes = to_shapes(inputs);
auto pack_a = insert_allocation(ins, in_shapes[0], "pack_a");
auto pack_b = insert_allocation(ins, in_shapes[1], "pack_b");
auto output = insert_allocation(ins, ins->get_shape());
inputs.push_back(pack_a);
inputs.push_back(pack_b);
inputs.push_back(output);
return prog->replace_instruction(ins, miopen_quant_gemm{op}, inputs);
}); });
} }
......
#include <migraphx/gpu/quant_convolution.hpp> #include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
...@@ -8,7 +9,7 @@ namespace gpu { ...@@ -8,7 +9,7 @@ namespace gpu {
shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(4).standard(); check_shapes{inputs, *this}.has(5).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)}); return op.compute_shape({inputs.at(0), inputs.at(1)});
} }
argument miopen_quant_convolution::compute(context& ctx, argument miopen_quant_convolution::compute(context& ctx,
...@@ -19,7 +20,8 @@ argument miopen_quant_convolution::compute(context& ctx, ...@@ -19,7 +20,8 @@ argument miopen_quant_convolution::compute(context& ctx,
auto x_desc_vec4 = make_tensor(args[0].get_shape(), true); auto x_desc_vec4 = make_tensor(args[0].get_shape(), true);
auto w_desc = make_tensor(args[1].get_shape()); auto w_desc = make_tensor(args[1].get_shape());
auto w_desc_vec4 = make_tensor(args[1].get_shape(), true); auto w_desc_vec4 = make_tensor(args[1].get_shape(), true);
auto y_desc = make_tensor(output_shape); shape tmp_output_shape{shape::float_type, output_shape.lens()};
auto y_desc = make_tensor(tmp_output_shape);
float alpha = 1; float alpha = 1;
float beta = 0; float beta = 0;
...@@ -67,7 +69,11 @@ argument miopen_quant_convolution::compute(context& ctx, ...@@ -67,7 +69,11 @@ argument miopen_quant_convolution::compute(context& ctx,
{ {
MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed"); MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed");
} }
return args[3];
// Add a conversion from float to int32_t
device::convert(ctx.get_stream().get(), args[4], args[3]);
return args[4];
} }
shape miopen_quant_convolution::compile(context& ctx, shape miopen_quant_convolution::compile(context& ctx,
...@@ -77,7 +83,8 @@ shape miopen_quant_convolution::compile(context& ctx, ...@@ -77,7 +83,8 @@ shape miopen_quant_convolution::compile(context& ctx,
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(inputs[0], true); auto x_desc = make_tensor(inputs[0], true);
auto w_desc = make_tensor(inputs[1], true); auto w_desc = make_tensor(inputs[1], true);
auto y_desc = make_tensor(output_shape); shape tmp_output_shape{shape::float_type, output_shape.lens()};
auto y_desc = make_tensor(tmp_output_shape);
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(), miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
...@@ -90,7 +97,7 @@ shape miopen_quant_convolution::compile(context& ctx, ...@@ -90,7 +97,7 @@ shape miopen_quant_convolution::compile(context& ctx,
arg_vec4_x = to_gpu(generate_argument(pack_int8_shape(inputs[0]))); arg_vec4_x = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
arg_vec4_w = to_gpu(generate_argument(pack_int8_shape(inputs[1]))); arg_vec4_w = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
auto y = allocate_gpu(output_shape); auto y = allocate_gpu(tmp_output_shape);
auto workspace = allocate_gpu(workspace_shape); auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1; int algo_count = 1;
......
...@@ -54,10 +54,11 @@ rb_type<T>* to_rocblas_type(T* x) ...@@ -54,10 +54,11 @@ rb_type<T>* to_rocblas_type(T* x)
shape miopen_quant_gemm::compute_shape(const std::vector<shape>& inputs) const shape miopen_quant_gemm::compute_shape(const std::vector<shape>& inputs) const
{ {
std::vector<shape> input_shapes(inputs); std::vector<shape> in_shapes(inputs);
input_shapes.pop_back(); in_shapes.erase(in_shapes.begin() + in_shapes.size() - 3, in_shapes.end());
check_shapes{input_shapes}.not_broadcasted(); check_shapes{in_shapes}.not_broadcasted();
return op.compute_shape(input_shapes);
return op.compute_shape(in_shapes);
} }
argument miopen_quant_gemm::compute(context& ctx, argument miopen_quant_gemm::compute(context& ctx,
...@@ -69,31 +70,24 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -69,31 +70,24 @@ argument miopen_quant_gemm::compute(context& ctx,
auto n_dim = output_shape.lens().size(); auto n_dim = output_shape.lens().size();
auto dim_1 = n_dim - 1; auto dim_1 = n_dim - 1;
auto dim_0 = n_dim - 2; auto dim_0 = n_dim - 2;
auto arg_num = args.size();
rocblas_int lda = args[0].get_shape().strides()[transa ? dim_1 : dim_0]; rocblas_int lda = args[0].get_shape().strides()[transa ? dim_1 : dim_0];
rocblas_int ldb = args[1].get_shape().strides()[transb ? dim_1 : dim_0]; rocblas_int ldb = args[1].get_shape().strides()[transb ? dim_1 : dim_0];
rocblas_int ldc = args[2].get_shape().strides()[dim_0]; rocblas_int ldc = args[arg_num - 1].get_shape().strides()[dim_0];
if(!transb) if(!transb)
{ {
if(arg_b.empty()) device::pack_a(ctx.get_stream().get(), args[arg_num - 2], args[1]);
{
arg_b = allocate_gpu(args[1].get_shape());
}
device::pack_a(ctx.get_stream().get(), arg_b, args[1]);
} }
// need to pack A in this scenario, use the algorithm to pack B in the // need to pack A in this scenario, use the algorithm to pack B in the
// comment of the API // comment of the API
if(transa) if(transa)
{ {
if(arg_a.empty()) device::pack_b(ctx.get_stream().get(), args[arg_num - 3], args[0]);
{
arg_a = allocate_gpu(args.at(0).get_shape());
}
device::pack_b(ctx.get_stream().get(), arg_a, args[0]);
} }
bool is_3inputs = (args.size() == 4); bool is_3inputs = (arg_num == 6);
int32_t beta = 0; int32_t beta = 0;
if(is_3inputs) if(is_3inputs)
{ {
...@@ -127,17 +121,18 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -127,17 +121,18 @@ argument miopen_quant_gemm::compute(context& ctx,
m, m,
k, k,
&alpha_r, &alpha_r,
(!transb) ? to_pointer(arg_b) : to_pointer(args.at(1)), (!transb) ? to_pointer(args[arg_num - 2])
: to_pointer(args.at(1)),
rocblas_datatype_i8_r, rocblas_datatype_i8_r,
ldb, ldb,
transa ? to_pointer(arg_a) : to_pointer(args.at(0)), transa ? to_pointer(args[arg_num - 3]) : to_pointer(args.at(0)),
rocblas_datatype_i8_r, rocblas_datatype_i8_r,
lda, lda,
&beta_r, &beta_r,
to_pointer(args[2]), to_pointer(args[2]),
rocblas_datatype_i32_r, rocblas_datatype_i32_r,
ldc, ldc,
is_3inputs ? to_pointer(args.at(3)) : to_pointer(args[2]), to_pointer(args[arg_num - 1]),
rocblas_datatype_i32_r, rocblas_datatype_i32_r,
ldc, ldc,
rocblas_datatype_i32_r, rocblas_datatype_i32_r,
...@@ -157,11 +152,11 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -157,11 +152,11 @@ argument miopen_quant_gemm::compute(context& ctx,
m, m,
k, k,
&alpha_r, &alpha_r,
(!transb) ? to_pointer(arg_b) : to_pointer(args.at(1)), (!transb) ? to_pointer(args[arg_num - 2]) : to_pointer(args.at(1)),
rocblas_datatype_i8_r, rocblas_datatype_i8_r,
ldb, ldb,
k * n, k * n,
transa ? to_pointer(arg_a) : to_pointer(args.at(0)), transa ? to_pointer(args[arg_num - 3]) : to_pointer(args.at(0)),
rocblas_datatype_i8_r, rocblas_datatype_i8_r,
lda, lda,
m * k, m * k,
...@@ -170,7 +165,7 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -170,7 +165,7 @@ argument miopen_quant_gemm::compute(context& ctx,
rocblas_datatype_i32_r, rocblas_datatype_i32_r,
ldc, ldc,
m * n, m * n,
is_3inputs ? to_pointer(args.at(3)) : to_pointer(args[2]), to_pointer(args[arg_num - 1]),
rocblas_datatype_i32_r, rocblas_datatype_i32_r,
ldc, ldc,
m * n, m * n,
...@@ -184,7 +179,7 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -184,7 +179,7 @@ argument miopen_quant_gemm::compute(context& ctx,
} }
}); });
return is_3inputs ? args.at(3) : args[2]; return args[arg_num - 1];
} }
} // namespace gpu } // namespace gpu
......
...@@ -1317,24 +1317,24 @@ TEST_CASE(quant_conv2d_test) ...@@ -1317,24 +1317,24 @@ TEST_CASE(quant_conv2d_test)
p.compile(migraphx::cpu::target{}); p.compile(migraphx::cpu::target{});
auto result = p.eval({}); auto result = p.eval({});
std::vector<float> s = {10197, std::vector<int32_t> s = {10197,
10548, 10548,
11601, 11601,
11952, 11952,
25506, 25506,
26586, 26586,
29826, 29826,
30906, 30906,
27045, 27045,
27396, 27396,
28449, 28449,
28800, 28800,
77346, 77346,
78426, 78426,
81666, 81666,
82746}; 82746};
std::vector<float> results_vector; std::vector<int32_t> results_vector;
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, s)); EXPECT(migraphx::verify_range(results_vector, s));
} }
...@@ -1357,14 +1357,14 @@ TEST_CASE(quant_conv2d_test_default_mode) ...@@ -1357,14 +1357,14 @@ TEST_CASE(quant_conv2d_test_default_mode)
p.compile(migraphx::cpu::target{}); p.compile(migraphx::cpu::target{});
auto result = p.eval({}); auto result = p.eval({});
std::vector<float> s = { std::vector<int32_t> s = {
10197, 10548, 6939, 3420, 11601, 11952, 7839, 3852, 7383, 7590, 4953, 2421, 3480, 10197, 10548, 6939, 3420, 11601, 11952, 7839, 3852, 7383, 7590, 4953, 2421, 3480,
3570, 2316, 1125, 25506, 26586, 17874, 9009, 29826, 30906, 20718, 10413, 20505, 21198, 3570, 2316, 1125, 25506, 26586, 17874, 9009, 29826, 30906, 20718, 10413, 20505, 21198,
14187, 7119, 10527, 10860, 7257, 3636, 27045, 27396, 17739, 8604, 28449, 28800, 18639, 14187, 7119, 10527, 10860, 7257, 3636, 27045, 27396, 17739, 8604, 28449, 28800, 18639,
9036, 17319, 17526, 11289, 5445, 7800, 7890, 5052, 2421, 77346, 78426, 52002, 25857, 9036, 17319, 17526, 11289, 5445, 7800, 7890, 5052, 2421, 77346, 78426, 52002, 25857,
81666, 82746, 54846, 27261, 53769, 54462, 36075, 17919, 26511, 26844, 17769, 8820}; 81666, 82746, 54846, 27261, 53769, 54462, 36075, 17919, 26511, 26844, 17769, 8820};
std::vector<float> results_vector; std::vector<int32_t> results_vector;
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, s)); EXPECT(migraphx::verify_range(results_vector, s));
} }
...@@ -1387,24 +1387,24 @@ TEST_CASE(quant_conv2d_test_valid_mode) ...@@ -1387,24 +1387,24 @@ TEST_CASE(quant_conv2d_test_valid_mode)
p.compile(migraphx::cpu::target{}); p.compile(migraphx::cpu::target{});
auto result = p.eval({}); auto result = p.eval({});
std::vector<float> s = {10197, std::vector<int32_t> s = {10197,
10548, 10548,
11601, 11601,
11952, 11952,
25506, 25506,
26586, 26586,
29826, 29826,
30906, 30906,
27045, 27045,
27396, 27396,
28449, 28449,
28800, 28800,
77346, 77346,
78426, 78426,
81666, 81666,
82746}; 82746};
std::vector<float> results_vector; std::vector<int32_t> results_vector;
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, s)); EXPECT(migraphx::verify_range(results_vector, s));
} }
...@@ -1422,15 +1422,15 @@ TEST_CASE(quant_conv2d_padding_test) ...@@ -1422,15 +1422,15 @@ TEST_CASE(quant_conv2d_padding_test)
auto cl = p.add_literal(migraphx::literal{c_shape, c}); auto cl = p.add_literal(migraphx::literal{c_shape, c});
p.add_instruction(migraphx::op::quant_convolution{{{1, 1}}, {{1, 1}}}, al, cl); p.add_instruction(migraphx::op::quant_convolution{{{1, 1}}, {{1, 1}}}, al, cl);
p.compile(migraphx::cpu::target{}); p.compile(migraphx::cpu::target{});
auto result = p.eval({}); auto result = p.eval({});
std::vector<float> s = { std::vector<int32_t> s = {
4521, 6753, 7014, 4635, 6858, 10197, 10548, 6939, 7830, 11601, 11952, 7839, 5007, 4521, 6753, 7014, 4635, 6858, 10197, 10548, 6939, 7830, 11601, 11952, 7839, 5007,
7383, 7590, 4953, 10515, 15987, 16734, 11277, 16821, 25506, 26586, 17874, 19737, 29826, 7383, 7590, 4953, 10515, 15987, 16734, 11277, 16821, 25506, 26586, 17874, 19737, 29826,
30906, 20718, 13593, 20505, 21198, 14187, 13161, 19281, 19542, 12699, 18522, 27045, 27396, 30906, 20718, 13593, 20505, 21198, 14187, 13161, 19281, 19542, 12699, 18522, 27045, 27396,
17739, 19494, 28449, 28800, 18639, 11919, 17319, 17526, 11289, 34707, 51843, 52590, 34893, 17739, 19494, 28449, 28800, 18639, 11919, 17319, 17526, 11289, 34707, 51843, 52590, 34893,
51813, 77346, 78426, 52002, 54729, 81666, 82746, 54846, 36057, 53769, 54462, 36075}; 51813, 77346, 78426, 52002, 54729, 81666, 82746, 54846, 36057, 53769, 54462, 36075};
std::vector<float> results_vector; std::vector<int32_t> results_vector;
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, s)); EXPECT(migraphx::verify_range(results_vector, s));
} }
...@@ -1450,23 +1450,23 @@ TEST_CASE(quant_conv2d_padding_stride_test) ...@@ -1450,23 +1450,23 @@ TEST_CASE(quant_conv2d_padding_stride_test)
p.compile(migraphx::cpu::target{}); p.compile(migraphx::cpu::target{});
auto result = p.eval({}); auto result = p.eval({});
std::vector<float> s = {4521, std::vector<int32_t> s = {4521,
7014, 7014,
7830, 7830,
11952, 11952,
10515, 10515,
16734, 16734,
19737, 19737,
30906, 30906,
13161, 13161,
19542, 19542,
19494, 19494,
28800, 28800,
34707, 34707,
52590, 52590,
54729, 54729,
82746}; 82746};
std::vector<float> results_vector; std::vector<int32_t> results_vector;
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, s)); EXPECT(migraphx::verify_range(results_vector, s));
} }
......
...@@ -82,10 +82,6 @@ auto get_hash(const T& x) ...@@ -82,10 +82,6 @@ auto get_hash(const T& x)
return std::hash<T>{}(x); return std::hash<T>{}(x);
} }
// add an overload function for int type
// to avoid overflow in test examples
inline auto get_hash(const int& x) { return std::hash<int>{}(x) / 64; }
void compile_check(migraphx::program& p, const migraphx::target& t) void compile_check(migraphx::program& p, const migraphx::target& t)
{ {
auto name = t.name(); auto name = t.name();
......
...@@ -78,24 +78,24 @@ TEST_CASE(convolution_shape) ...@@ -78,24 +78,24 @@ TEST_CASE(convolution_shape)
TEST_CASE(quant_convolution_shape) TEST_CASE(quant_convolution_shape)
{ {
migraphx::shape output{migraphx::shape::float_type, {4, 4, 1, 1}}; migraphx::shape output{migraphx::shape::int32_type, {4, 4, 1, 1}};
migraphx::shape input{migraphx::shape::int8_type, {4, 3, 3, 3}}; migraphx::shape input{migraphx::shape::int8_type, {4, 3, 3, 3}};
migraphx::shape weights{migraphx::shape::int8_type, {4, 3, 3, 3}}; migraphx::shape weights{migraphx::shape::int8_type, {4, 3, 3, 3}};
expect_shape(output, migraphx::op::quant_convolution{}, input, weights); expect_shape(output, migraphx::op::quant_convolution{}, input, weights);
throws_shape(migraphx::op::quant_convolution{}, input); throws_shape(migraphx::op::quant_convolution{}, input);
migraphx::shape input2{migraphx::shape::float_type, {3, 3}}; migraphx::shape input2{migraphx::shape::int32_type, {3, 3}};
migraphx::shape weights2{migraphx::shape::float_type, {3, 3}}; migraphx::shape weights2{migraphx::shape::float_type, {3, 3}};
throws_shape(migraphx::op::quant_convolution{}, input2, weights2); throws_shape(migraphx::op::quant_convolution{}, input2, weights2);
throws_shape(migraphx::op::quant_convolution{}, input2, weights); throws_shape(migraphx::op::quant_convolution{}, input2, weights);
migraphx::shape input3{migraphx::shape::float_type, {4, 3, 3, 3}}; migraphx::shape input3{migraphx::shape::int32_type, {4, 3, 3, 3}};
migraphx::shape weight3{migraphx::shape::float_type, {4, 3, 3, 3}}; migraphx::shape weight3{migraphx::shape::float_type, {4, 3, 3, 3}};
throws_shape(migraphx::op::quant_convolution{}, input3, weights); throws_shape(migraphx::op::quant_convolution{}, input3, weights);
throws_shape(migraphx::op::quant_convolution{}, input, weight3); throws_shape(migraphx::op::quant_convolution{}, input, weight3);
throws_shape(migraphx::op::quant_convolution{}, input3, weight3); throws_shape(migraphx::op::quant_convolution{}, input3, weight3);
migraphx::shape output_same_mode{migraphx::shape::float_type, {4, 4, 3, 3}}; migraphx::shape output_same_mode{migraphx::shape::int32_type, {4, 4, 3, 3}};
expect_shape(output_same_mode, expect_shape(output_same_mode,
migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::same}, migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::same},
input, input,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment