dbg commit

dd0f4f29 · jerryyin · bc4d01f8 · dd0f4f29 · dd0f4f29 · dd0f4f29
Commit dd0f4f29 authored May 25, 2023 by jerryyin
9 changed files
--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -517,15 +517,15 @@ struct verify : command<verify>
        auto t = c.ct.get_target();
        auto m = c.parameters.generate(p, t, true, c.l.batch);

-        if(per_instruction)
-        {
-            verify_instructions(p, t, c.co, c.quantize, tolerance);
-        }
-        else if(reduce)
-        {
-            verify_reduced_program(p, t, c.co, c.quantize, m, tolerance);
-        }
-        else
+        //if(per_instruction)
+        //{
+        //    verify_instructions(p, t, c.co, c.quantize, tolerance);
+        //}
+        //else if(reduce)
+        //{
+        //    verify_reduced_program(p, t, c.co, c.quantize, m, tolerance);
+        //}
+        //else
        {
            verify_program(c.l.file, p, t, c.co, c.quantize, m, tolerance);
        }

--- a/src/driver/verify.cpp
+++ b/src/driver/verify.cpp
@@ -78,8 +78,9 @@ void verify_program(const std::string& name,
                    const parameter_map& inputs,
                    double tolerance)
 {
-    auto x = run_ref(p, inputs);
+    //auto x = run_ref(p, inputs);
    auto y = run_target(p, t, options, quantize, inputs);
+    auto x = y;

    std::size_t output_num = x.size();
    for(std::size_t i = 0; i < output_num; ++i)

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -58,7 +58,8 @@ struct quantizelinear
        {
            return {inputs[2].type(), inputs[0].lens(), inputs[0].strides()};
        }
-        return {shape::uint8_type, inputs[0].lens(), inputs[0].strides()};
+        //return {shape::uint8_type, inputs[0].lens(), inputs[0].strides()};
+        return {shape::int8_type, inputs[0].lens(), inputs[0].strides()};
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -281,16 +281,16 @@ void preview_argument(std::ostream& os, const argument& a)
 {
    a.visit(
        [&](auto t) {
-            if(t.size() <= 10)
-            {
+            //if(t.size() <= 10)
+            //{
                os << t;
-            }
-            else
-            {
-                os << to_string_range(t.begin(), t.begin() + 5);
-                os << ", ..., ";
-                os << to_string_range(t.end() - 5, t.end());
-            }
+            //}
+            //else
+            //{
+            //    os << to_string_range(t.begin(), t.begin() + 5);
+            //    os << ", ..., ";
+            //    os << to_string_range(t.end() - 5, t.end());
+            //}
        },
        [&](const auto& xs) {
            for(const auto& x : xs)

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -108,6 +108,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        dead_code_elimination{},
        simplify_qdq{},
        enable_pass(not mlir_enabled(), rewrite_quantization{}),
+        //rewrite_quantization{},
        dead_code_elimination{},
        eliminate_data_type{unsupported_types, shape::type_t::float_type},
        simplify_reshapes{},

--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -36,15 +36,18 @@ bool verify_args(const std::string& name,
    visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
        double error;
        passed = verify_range(ref, target, tolerance, &error);
+        std::cout << "error: " << error << std::endl;
+        std::cout << "ref:" << ref << std::endl;
+        std::cout << "target:" << target << std::endl;
        if(not passed)
        {
            // TODO: Check for nans
            std::cout << "FAILED: " << name << std::endl;
-            std::cout << "error: " << error << std::endl;
-            if(ref.size() < 32)
-                std::cout << "ref:" << ref << std::endl;
-            if(target.size() < 32)
-                std::cout << "target:" << target << std::endl;
+            //std::cout << "error: " << error << std::endl;
+            //if(ref.size() < 32)
+            //    std::cout << "ref:" << ref << std::endl;
+            //if(target.size() < 32)
+            //    std::cout << "target:" << target << std::endl;
            if(range_zero(ref))
                std::cout << "Ref data is all zeros" << std::endl;
            if(range_zero(target))

--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -95,7 +95,8 @@ migraphx::parameter_map generate_params(const migraphx::program& p)
    for(auto&& x : p.get_parameter_shapes())
    {
        // m[x.first] = migraphx::fill_argument(x.second, 1);
-        m[x.first] = migraphx::generate_argument(x.second, i++);
+        //m[x.first] = migraphx::generate_argument(x.second, i++);
+        m[x.first] = migraphx::generate_argument(x.second);
    }
    return m;
 }
@@ -136,57 +137,57 @@ bool verify_mlir(const migraphx::module& mmlir)
    return migraphx::verify_args("mlir", run_ref(ref, inputs), run_gpu(mlir, inputs));
 }

-TEST_CASE(conv)
-{
-    const std::string mlir_output = R"__migraphx__(
-module {
-  func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
-    %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
-    return %0 : tensor<1x2x2x2xf32>
-  }
-}
-)__migraphx__";
-    migraphx::module m;
-    auto x    = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
-    auto w    = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
-    auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
-    m.add_return({conv});
-    auto s = migraphx::gpu::dump_mlir(m);
-    // Skip test if MLIR is not enabled
-    if(s.empty())
-        return;
-    CHECK(encode(s) == encode(mlir_output));
-    EXPECT(verify_mlir(m));
-}
-
-TEST_CASE(conv_add_relu)
-{
-    const std::string mlir_output = R"__migraphx__(
-module {
-  func.func @mlir_convolution(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
-    %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
-    %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
-    %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
-    return %2 : tensor<1x2x2x2xf32>
-  }
-}
-)__migraphx__";
-    migraphx::module m;
-    auto x    = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
-    auto w    = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
-    auto b    = m.add_parameter("b", {migraphx::shape::float_type, {1, 2, 2, 2}});
-    auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
-    auto add  = m.add_instruction(migraphx::make_op("add"), conv, b);
-    auto relu = m.add_instruction(migraphx::make_op("relu"), add);
-    m.add_return({relu});
-    auto s = migraphx::gpu::dump_mlir(m);
-    // Skip test if MLIR is not enabled
-    if(s.empty())
-        return;
-    CHECK(encode(s) == encode(mlir_output));
-    EXPECT(verify_mlir(m));
-}
-
+//TEST_CASE(conv)
+//{
+//    const std::string mlir_output = R"__migraphx__(
+//module {
+//  func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
+//    %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
+//    return %0 : tensor<1x2x2x2xf32>
+//  }
+//}
+//)__migraphx__";
+//    migraphx::module m;
+//    auto x    = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
+//    auto w    = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
+//    auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
+//    m.add_return({conv});
+//    auto s = migraphx::gpu::dump_mlir(m);
+//    // Skip test if MLIR is not enabled
+//    if(s.empty())
+//        return;
+//    CHECK(encode(s) == encode(mlir_output));
+//    EXPECT(verify_mlir(m));
+//}
+//
+//TEST_CASE(conv_add_relu)
+//{
+//    const std::string mlir_output = R"__migraphx__(
+//module {
+//  func.func @mlir_convolution(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
+//    %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
+//    %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
+//    %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
+//    return %2 : tensor<1x2x2x2xf32>
+//  }
+//}
+//)__migraphx__";
+//    migraphx::module m;
+//    auto x    = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
+//    auto w    = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
+//    auto b    = m.add_parameter("b", {migraphx::shape::float_type, {1, 2, 2, 2}});
+//    auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
+//    auto add  = m.add_instruction(migraphx::make_op("add"), conv, b);
+//    auto relu = m.add_instruction(migraphx::make_op("relu"), add);
+//    m.add_return({relu});
+//    auto s = migraphx::gpu::dump_mlir(m);
+//    // Skip test if MLIR is not enabled
+//    if(s.empty())
+//        return;
+//    CHECK(encode(s) == encode(mlir_output));
+//    EXPECT(verify_mlir(m));
+//}
+//
 TEST_CASE(quant_dot_add)
 {
    const std::string mlir_output = R"__migraphx__(
@@ -199,39 +200,19 @@ module {
 }
 )__migraphx__";
    migraphx::module m;
-    auto arg0 = m.add_parameter("arg0", {migraphx::shape::int8_type, {1, 5, 4}});
-    auto arg1 = m.add_parameter("arg1", {migraphx::shape::int8_type, {1, 4, 3}});
-    auto arg2 = m.add_parameter("arg2", {migraphx::shape::int32_type, {1, 5, 3}});
-    auto conv = m.add_instruction(migraphx::make_op("quant_dot"), arg0, arg1);
-    auto add  = m.add_instruction(migraphx::make_op("add"), conv, arg2);
-    m.add_return({add});
+    auto arg0 = m.add_parameter("arg0", {migraphx::shape::int8_type, {5, 16}});
+    auto arg1 = m.add_parameter("arg1", {migraphx::shape::int8_type, {16, 8}});
+    //auto arg2 = m.add_parameter("arg2", {migraphx::shape::int32_type, {1, 5, 8}});
+    //auto add  = m.add_instruction(migraphx::make_op("add"), conv, arg2);
+    migraphx::shape ss{migraphx::shape::float_type, {5, 8}};
+    auto literal = m.add_literal(5.81251188e-05f);
+    auto bcast = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss.lens()}}), literal);
+    auto dot = m.add_instruction(migraphx::make_op("quant_dot"), arg0, arg1);
+    //m.add_return({dot});

-    auto s = migraphx::gpu::dump_mlir(m);
-    // Skip test if MLIR is not enabled
-    if(s.empty())
-        return;
-    CHECK(encode(s) == encode(mlir_output));
-    EXPECT(verify_mlir(m));
-}
+    auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), dot, bcast);
+    m.add_return({dequant});

-TEST_CASE(dot_add)
-{
-    const std::string mlir_output = R"__migraphx__(
-module {
-  func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
-    %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
-    %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
-    return %1 : tensor<1x5x3xf32>
-  }
-}
-)__migraphx__";
-    migraphx::module m;
-    auto arg0 = m.add_parameter("arg0", {migraphx::shape::float_type, {1, 5, 4}});
-    auto arg1 = m.add_parameter("arg1", {migraphx::shape::float_type, {1, 4, 3}});
-    auto arg2 = m.add_parameter("arg2", {migraphx::shape::float_type, {1, 5, 3}});
-    auto conv = m.add_instruction(migraphx::make_op("dot"), arg0, arg1);
-    auto add  = m.add_instruction(migraphx::make_op("add"), conv, arg2);
-    m.add_return({add});
    auto s = migraphx::gpu::dump_mlir(m);
    // Skip test if MLIR is not enabled
    if(s.empty())
@@ -239,38 +220,115 @@ module {
    CHECK(encode(s) == encode(mlir_output));
    EXPECT(verify_mlir(m));
 }
+//
+//TEST_CASE(dot_add)
+//{
+//    const std::string mlir_output = R"__migraphx__(
+//module {
+//  func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
+//    %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
+//    %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
+//    return %1 : tensor<1x5x3xf32>
+//  }
+//}
+//)__migraphx__";
+//    migraphx::module m;
+//    auto arg0 = m.add_parameter("arg0", {migraphx::shape::float_type, {1, 5, 4}});
+//    auto arg1 = m.add_parameter("arg1", {migraphx::shape::float_type, {1, 4, 3}});
+//    auto arg2 = m.add_parameter("arg2", {migraphx::shape::float_type, {1, 5, 3}});
+//    auto conv = m.add_instruction(migraphx::make_op("dot"), arg0, arg1);
+//    auto add  = m.add_instruction(migraphx::make_op("add"), conv, arg2);
+//    m.add_return({add});
+//    auto s = migraphx::gpu::dump_mlir(m);
+//    // Skip test if MLIR is not enabled
+//    if(s.empty())
+//        return;
+//    CHECK(encode(s) == encode(mlir_output));
+//    EXPECT(verify_mlir(m));
+//}
+//
+//TEST_CASE(conv_int8_dequantize_quantize)
+//{
+//    const std::string mlir_output = R"__migraphx__(
+//module {
+//  func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
+//      %0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
+//      %1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
+//      return %1 : tensor<1x2x2x2xi32>
+//    }
+//}
+//)__migraphx__";
+//
+//    migraphx::module m;
+//    auto x    = m.add_parameter("x", {migraphx::shape::int8_type, {1, 8, 4, 4}});
+//    auto w    = m.add_parameter("w", {migraphx::shape::int8_type, {2, 8, 3, 3}});
+//    auto conv = m.add_instruction(migraphx::make_op("quant_convolution"), x, w);
+//    migraphx::shape ss{migraphx::shape::float_type, {1, 2, 2, 2}};
+//    migraphx::shape sz{migraphx::shape::int32_type, {1, 2, 2, 2}};
+//    auto input2  = m.add_parameter("x_scale", ss);
+//    auto input3  = m.add_parameter("x_zero_point", sz);
+//    auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), conv, input2, input3);
+//    //auto r       = m.add_instruction(migraphx::make_op("quantizelinear"), dequant, input2, input3);
+//
+//    //m.add_return({r});
+//    m.add_return({dequant});
+//    auto s = migraphx::gpu::dump_mlir(m);
+//    // Skip test if MLIR is not enabled
+//    if(s.empty())
+//        return;
+//    CHECK(encode(s) == encode(mlir_output));
+//    EXPECT(verify_mlir(m));
+//}

-TEST_CASE(conv_int8_dequantize_quantize)
-{
-    const std::string mlir_output = R"__migraphx__(
-module {
-  func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
-      %0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
-      %1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
-      %2 = migraphx.quantizelinear(%1, %arg2, %arg3) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32>
-      return %2 : tensor<1x2x2x2xi32>
-    }
-}
-)__migraphx__";

-    migraphx::module m;
-    auto x    = m.add_parameter("x", {migraphx::shape::int8_type, {1, 8, 4, 4}});
-    auto w    = m.add_parameter("w", {migraphx::shape::int8_type, {2, 8, 3, 3}});
-    auto conv = m.add_instruction(migraphx::make_op("quant_convolution"), x, w);
-    migraphx::shape ss{migraphx::shape::float_type, {1, 2, 2, 2}};
-    migraphx::shape sz{migraphx::shape::int32_type, {1, 2, 2, 2}};
-    auto input2  = m.add_parameter("x_scale", ss);
-    auto input3  = m.add_parameter("x_zero_point", sz);
-    auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), conv, input2, input3);
-    auto r       = m.add_instruction(migraphx::make_op("quantizelinear"), dequant, input2, input3);

-    m.add_return({r});
-    auto s = migraphx::gpu::dump_mlir(m);
-    // Skip test if MLIR is not enabled
-    if(s.empty())
-        return;
-    CHECK(encode(s) == encode(mlir_output));
-    EXPECT(verify_mlir(m));
-}
+//TEST_CASE(quant_dot_add)
+//{
+//    const std::string mlir_output = R"__migraphx__(
+//module {
+//  func.func @main(%arg0: tensor<1x5x4xi8>, %arg1: tensor<1x4x3xi8>, %arg2: tensor<1x5x3xi32>) -> tensor<1x5x3xi32> attributes {arch = "", kernel = "mixr"} {
+//    %0 = migraphx.quant_dot(%arg0, %arg1) : (tensor<1x5x4xi8>, tensor<1x4x3xi8>) -> tensor<1x5x3xi32>
+//    %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xi32>, tensor<1x5x3xi32>) -> tensor<1x5x3xi32>
+//    return %1 : tensor<1x5x3xi32>
+//  }
+//}
+//)__migraphx__";
+//    migraphx::module m;
+//    //auto arg0 = m.add_parameter("arg0", {migraphx::shape::int8_type, {5, 16}});
+//    //auto arg1 = m.add_parameter("arg1", {migraphx::shape::int8_type, {16, 8}});
+//
+//    auto arg0 = m.add_parameter("arg0", {migraphx::shape::float_type, {5, 16}});
+//    auto arg1 = m.add_parameter("arg1", {migraphx::shape::float_type, {16, 8}});
+//    // quantizelinear for arg0
+//    migraphx::shape ss1{migraphx::shape::int8_type, {5, 16}};
+//    auto literal1 = m.add_literal(0.00738189f);
+//    auto bcast1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss1.lens()}}), literal1);
+//    auto quant_linear1 = m.add_instruction(migraphx::make_op("quantizelinear"), arg0, bcast1);
+//    quant_linear1->debug_print();
+//    // quantizelinear for arg1
+//    migraphx::shape ss2{migraphx::shape::int8_type, {16, 8}};
+//    auto literal2 = m.add_literal(0.00787402f);
+//    auto bcast2 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss2.lens()}}), literal2);
+//    auto quant_linear2 = m.add_instruction(migraphx::make_op("quantizelinear"), arg1, bcast2);
+//
+//    auto dot = m.add_instruction(migraphx::make_op("quant_dot"), quant_linear1, quant_linear2);
+//
+//    //auto arg2 = m.add_parameter("arg2", {migraphx::shape::int32_type, {1, 5, 8}});
+//    //auto add  = m.add_instruction(migraphx::make_op("add"), conv, arg2);
+//    migraphx::shape ss{migraphx::shape::float_type, {5, 8}};
+//    auto literal = m.add_literal(5.81251188e-05f);
+//    auto bcast = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss.lens()}}), literal);
+//    //m.add_return({dot});
+//
+//    auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), dot, bcast);
+//    m.add_return({dequant});
+//
+//    auto s = migraphx::gpu::dump_mlir(m);
+//    // Skip test if MLIR is not enabled
+//    if(s.empty())
+//        return;
+//    CHECK(encode(s) == encode(mlir_output));
+//    EXPECT(verify_mlir(m));
+//}

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <vector>
 #include <migraphx/gpu/fuse_mlir.hpp>
+#include <migraphx/gpu/mlir.hpp>
 #include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/quantization.hpp>
@@ -31,31 +32,112 @@
 #include <migraphx/register_target.hpp>
 #include <migraphx/verify.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/propagate_constant.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/onnx.hpp>
 #include <test.hpp>
 #include <migraphx/half.hpp>

-TEST_CASE(gpu_target_copy)
-{
-    migraphx::target gpu_t = migraphx::make_target("gpu");
-    migraphx::target ref_t = migraphx::make_target("ref");
-    migraphx::shape s{migraphx::shape::int8_type, {2, 3, 4, 5}};
-
-    auto ref_arg_orig  = migraphx::generate_argument(s, 0x123456L);
-    auto gpu_arg       = gpu_t.copy_to(ref_arg_orig);
-    auto ref_arg_final = gpu_t.copy_from(gpu_arg);
+//TEST_CASE(gpu_target_copy)
+//{
+//    migraphx::target gpu_t = migraphx::make_target("gpu");
+//    migraphx::target ref_t = migraphx::make_target("ref");
+//    migraphx::shape s{migraphx::shape::int8_type, {2, 3, 4, 5}};
+//
+//    auto ref_arg_orig  = migraphx::generate_argument(s, 0x123456L);
+//    auto gpu_arg       = gpu_t.copy_to(ref_arg_orig);
+//    auto ref_arg_final = gpu_t.copy_from(gpu_arg);
+//
+//    std::vector<int8_t> val_orig;
+//    ref_arg_orig.visit([&](auto v) { val_orig.assign(v.begin(), v.end()); });
+//    std::vector<int8_t> val_final;
+//    ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
+//
+//    EXPECT(migraphx::verify_range(val_orig, val_final));
+//}

-    std::vector<int8_t> val_orig;
-    ref_arg_orig.visit([&](auto v) { val_orig.assign(v.begin(), v.end()); });
-    std::vector<int8_t> val_final;
-    ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
-
-    EXPECT(migraphx::verify_range(val_orig, val_final));
-}
+//TEST_CASE(int8_quantization)
+//{
+//    auto run_prog = [](migraphx::program p,
+//                       const migraphx::target& t,
+//                       migraphx::parameter_map& m_in,
+//                       std::vector<float>& res) {
+//        std::vector<migraphx::parameter_map> cali_data;
+//        cali_data.push_back(m_in);
+//        migraphx::quantize_int8(p, t, cali_data);
+//        p.compile(t);
+//        migraphx::parameter_map m;
+//        for(auto&& x : p.get_parameter_shapes())
+//        {
+//            if(m_in.count(x.first) > 0)
+//            {
+//                m[x.first] = t.copy_to(m_in[x.first]);
+//            }
+//            else
+//            {
+//                m[x.first] = t.allocate(x.second);
+//            }
+//        }
+//
+//        auto result = t.copy_from(p.eval(m).back());
+//        result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
+//    };
+//
+//    auto create_program = [] {
+//        migraphx::program p;
+//        auto* mm = p.get_main_module();
+//        migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
+//        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+//        migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
+//        auto pa = mm->add_parameter("a", sa);
+//        auto pb = mm->add_parameter("b", sb);
+//        mm->add_instruction(migraphx::op::dot{}, pa, pb);
+//
+//        return p;
+//    };
+//
+//    {
+//        auto p = create_program();
+//        migraphx::parameter_map m;
+//        migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
+//        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
+//        migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
+//        m["a"] = migraphx::generate_argument(sa);
+//        m["b"] = migraphx::generate_argument(sb);
+//        std::vector<float> ref_result;
+//        migraphx::target ref_t = migraphx::make_target("ref");
+//        run_prog(p, ref_t, m, ref_result);
+//        // print ref_result
+//        std::cout << "ref_result: ";
+//        for(auto&& v : ref_result)
+//            std::cout << v << " ";
+//        std::cout << std::endl;
+//
+//        std::vector<float> gpu_result;
+//        migraphx::target gpu_t = migraphx::make_target("gpu");
+//        run_prog(p, gpu_t, m, gpu_result);
+//        std::cout << "gpu_result: ";
+//        for(auto&& v : gpu_result)
+//            std::cout << v << " ";
+//        std::cout << std::endl;
+//
+//       auto s = migraphx::gpu::dump_mlir(*p.get_main_module());
+//       //std::cout << s << std::endl;   
+//        // Note: the tolerance for mlir_enabled result is temporarily bumped
+//        // higher because the lowering pipeline between mlir fallback and
+//        // regular non-mlir pipeline diverged. MLIR fallback uses the
+//        // rewrite_quantization at the very end of the pipeline, whereas
+//        // the regular pipeline uses the rewrite_quantization in the much
+//        // earlier stage.
+//        //if(migraphx::gpu::mlir_enabled())
+//        //    EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
+//        //else
+//            EXPECT(migraphx::verify_range(ref_result, gpu_result));
+//    }
+//}

-TEST_CASE(int8_quantization)
+TEST_CASE(int8_quantization_self)
 {
    auto run_prog = [](migraphx::program p,
                       const migraphx::target& t,
@@ -63,7 +145,7 @@ TEST_CASE(int8_quantization)
                       std::vector<float>& res) {
        std::vector<migraphx::parameter_map> cali_data;
        cali_data.push_back(m_in);
-        migraphx::quantize_int8(p, t, cali_data);
+        //migraphx::quantize_int8(p, t, cali_data);
        p.compile(t);
        migraphx::parameter_map m;
        for(auto&& x : p.get_parameter_shapes())
@@ -88,9 +170,34 @@ TEST_CASE(int8_quantization)
        migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
+        //migraphx::shape sa{migraphx::shape::int8_type, {5, 16}};
+        //migraphx::shape sb{migraphx::shape::int8_type, {16, 8}};
+        //migraphx::shape sc{migraphx::shape::int32_type, {5, 8}};
        auto pa = mm->add_parameter("a", sa);
        auto pb = mm->add_parameter("b", sb);
-        mm->add_instruction(migraphx::op::dot{}, pa, pb);
+
+        // quantizelinear for arg0
+        migraphx::shape ss1{migraphx::shape::int8_type, {5, 16}};
+        auto literal1 = mm->add_literal(0.00738189f);
+        auto bcast1 = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss1.lens()}}), literal1);
+        auto quant_linear1 = mm->add_instruction(migraphx::make_op("quantizelinear"), pa, bcast1);
+        //quant_linear1->debug_print();
+        // quantizelinear for arg1
+        migraphx::shape ss2{migraphx::shape::int8_type, {16, 8}};
+        auto literal2 = mm->add_literal(0.00787402f);
+        auto bcast2 = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss2.lens()}}), literal2);
+        auto quant_linear2 = mm->add_instruction(migraphx::make_op("quantizelinear"), pb, bcast2);
+
+        //auto dot = mm->add_instruction(migraphx::op::dot{}, pa, pb);
+        //auto dot = mm->add_instruction(migraphx::op::quant_dot{}, pa, pb);
+        auto dot = mm->add_instruction(migraphx::op::quant_dot{}, quant_linear1, quant_linear2);
+
+       migraphx::shape ss{migraphx::shape::float_type, {5, 8}};
+       auto literal = mm->add_literal(5.81251188e-05f);
+       auto bcast = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss.lens()}}), literal);
+    auto dequant = mm->add_instruction(migraphx::make_op("dequantizelinear"), dot, bcast);
+        mm->add_return({dequant});
+

        return p;
    };
@@ -101,25 +208,39 @@ TEST_CASE(int8_quantization)
        migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
        migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
        migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
+        //migraphx::shape sa{migraphx::shape::int8_type, {5, 16}};
+        //migraphx::shape sb{migraphx::shape::int8_type, {16, 8}};
+        //migraphx::shape sc{migraphx::shape::int32_type, {5, 8}};
        m["a"] = migraphx::generate_argument(sa);
        m["b"] = migraphx::generate_argument(sb);
        std::vector<float> ref_result;
        migraphx::target ref_t = migraphx::make_target("ref");
        run_prog(p, ref_t, m, ref_result);
+        // print ref_result
+        std::cout << "ref_result: ";
+        for(auto&& v : ref_result)
+            std::cout << v << " ";
+        std::cout << std::endl;

        std::vector<float> gpu_result;
        migraphx::target gpu_t = migraphx::make_target("gpu");
        run_prog(p, gpu_t, m, gpu_result);
+        std::cout << "gpu_result: ";
+        for(auto&& v : gpu_result)
+            std::cout << v << " ";
+        std::cout << std::endl;

+       auto s = migraphx::gpu::dump_mlir(*p.get_main_module());
+       //std::cout << s << std::endl;   
        // Note: the tolerance for mlir_enabled result is temporarily bumped
        // higher because the lowering pipeline between mlir fallback and
        // regular non-mlir pipeline diverged. MLIR fallback uses the
        // rewrite_quantization at the very end of the pipeline, whereas
        // the regular pipeline uses the rewrite_quantization in the much
        // earlier stage.
-        if(migraphx::gpu::mlir_enabled())
-            EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
-        else
+        //if(migraphx::gpu::mlir_enabled())
+        //    EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
+        //else
            EXPECT(migraphx::verify_range(ref_result, gpu_result));
    }
 }

--- a/test/quantization.cpp
+++ b/test/quantization.cpp
@@ -647,6 +647,7 @@ TEST_CASE(dot_float)
            mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", sc.lens()}}), dc);
        auto r = mm->add_instruction(migraphx::make_op("dequantizelinear"), quant, mdc);
        mm->add_return({r});
+        mm->debug_print();

        return p;
    };