[mlir] Adding quantizelinear, dequantizelinear and quant_convolution support (#1675)

7b2a5ccf · Zhuoran Yin · GitHub · 551b927c · 7b2a5ccf · 7b2a5ccf
Unverified Commit 7b2a5ccf authored Apr 13, 2023 by Zhuoran Yin Committed by GitHub Apr 13, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 43 additions and 6 deletions

Dockerfile Dockerfile +1 -1

src/targets/gpu/mlir.cpp src/targets/gpu/mlir.cpp +9 -5

test/gpu/mlir.cpp test/gpu/mlir.cpp +33 -0

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -110,7 +110,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
-RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@acb727b348086b58a7f261b32c0e4f0686a4c0ee -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
+RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@55c6ee66cc7502db7950693b3e845676cbf400b1 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -197,10 +197,14 @@ struct mlir_program
                result = mlirF64TypeGet(ctx.get());
            else if(as.is_integral())
            {
-                if(as.is_signed())
+                // Note: rocMLIR use signless integer type for tensors types. This
-                    result = mlirIntegerTypeSignedGet(ctx.get(), as.size() * 8);
+                // will translate to signed implementation for current supported
-                else
+                // operations.
-                    result = mlirIntegerTypeGet(ctx.get(), as.size() * 8);
+                if(as.is_unsigned())
+                {
+                    MIGRAPHX_THROW("Unsupported type: " + std::to_string(as.type_enum()));
+                }
+                result = mlirIntegerTypeGet(ctx.get(), as.size() * 8);
            }
            else
                MIGRAPHX_THROW("Unsupported type: " + std::to_string(as.type_enum()));
@@ -483,7 +487,7 @@ struct mlir_program
    static value get_operator_value(const operation& op)
    {
        auto v = op.to_value();
-        if(op.name() == "convolution")
+        if(op.name() == "convolution" or op.name() == "quant_convolution")
        {
            // Adjust symetrical padding
            if(v.at("padding").size() == v.at("stride").size())

--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -213,4 +213,37 @@ module {
    EXPECT(verify_mlir(m));
 }
+TEST_CASE(conv_int8_dequantize_quantize)
+{
+    const std::string mlir_output = R"__migraphx__(
+module {
+  func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
+      %0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
+      %1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
+      %2 = migraphx.quantizelinear(%1, %arg2, %arg3) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32>
+      return %2 : tensor<1x2x2x2xi32>
+    }
+}
+)__migraphx__";
+    migraphx::module m;
+    auto x    = m.add_parameter("x", {migraphx::shape::int8_type, {1, 8, 4, 4}});
+    auto w    = m.add_parameter("w", {migraphx::shape::int8_type, {2, 8, 3, 3}});
+    auto conv = m.add_instruction(migraphx::make_op("quant_convolution"), x, w);
+    migraphx::shape ss{migraphx::shape::float_type, {1, 2, 2, 2}};
+    migraphx::shape sz{migraphx::shape::int32_type, {1, 2, 2, 2}};
+    auto input2  = m.add_parameter("x_scale", ss);
+    auto input3  = m.add_parameter("x_zero_point", sz);
+    auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), conv, input2, input3);
+    auto r       = m.add_instruction(migraphx::make_op("quantizelinear"), dequant, input2, input3);
+    m.add_return({r});
+    auto s = migraphx::gpu::dump_mlir(m);
+    // Skip test if MLIR is not enabled
+    if(s.empty())
+        return;
+    CHECK(encode(s) == encode(mlir_output));
+    EXPECT(verify_mlir(m));
+}
 int main(int argc, const char* argv[]) { test::run(argc, argv); }