Merge branch 'develop' into add_parity_check_ci

97d4bb6c · Ted Themistokleous · GitHub · 39b097c7 · bdbc38bc · 97d4bb6c
Unverified Commit 97d4bb6c authored Jul 25, 2023 by Ted Themistokleous Committed by GitHub Jul 25, 2023
20 changed files
--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -216,6 +216,7 @@ struct find_mlir_op
                                                                "quant_dot",
                                                                "add",
                                                                "clip",
+                                                                "relu",
                                                                "sub",
                                                                "mul",
                                                                "div",

--- a/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        if(inputs.front().standard())
-            return inputs.front();
        auto lens = inputs.at(0).lens();
        auto t    = inputs.at(0).type();
        return {t, lens};

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -31,7 +31,7 @@
 #include <migraphx/op/identity.hpp>
 #include <migraphx/op/convolution.hpp>
 #include <migraphx/op/quant_convolution.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <unordered_map>
 #include <migraphx/reflect.hpp>
 #include <migraphx/gpu/context.hpp>
@@ -146,7 +146,8 @@ struct miopen_convolution

    void set_conv_descriptor()
    {
-        cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op);
+        cd =
+            (op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op);
    }

    value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
@@ -159,10 +160,31 @@ struct miopen_convolution
    shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
    {
        shape workspace_shape{};
-        auto x_desc                = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
-        auto w_desc                = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
-        auto y_desc                = make_tensor(reshape_if_1d(output_shape));
+        auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
+        auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
+        auto y_desc = make_tensor(reshape_if_1d(output_shape));
+
+        auto* miopen_stream_handle = ctx.get_stream().get_miopen();
        std::size_t workspace_size = 0;
+        auto status                = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle,
+                                                               w_desc.get(),
+                                                               x_desc.get(),
+                                                               cd.get(),
+                                                               y_desc.get(),
+                                                               &workspace_size);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
+
+        workspace_shape = shape{shape::int8_type, {workspace_size}};
+
+        auto x_shape = inputs[0];
+        auto w_shape = inputs[1];
+        if(int8_x4_format)
+        {
+            x_shape = pack_int8_shape(x_shape);
+            w_shape = pack_int8_shape(w_shape);
+        }
+
 #ifdef MIGRAPHX_HAS_FIND_2_API
        {
            auto conv_problem = make_obj<miopen_problem>(
@@ -170,13 +192,34 @@ struct miopen_convolution

            set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
            set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
+            bool preallocate = false;
+#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
+            // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
+            preallocate = true;
+#endif
+            auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0];
+            auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1];
+            auto y = preallocate ? allocate_gpu(output_shape) : inputs[2];
+            auto workspace =
+                preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
+
            set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);

-            auto* miopen_stream_handle = ctx.get_stream().get_miopen();
+            const miopenTensorArgument_t tensor_args[3] = {
+                {miopenTensorConvolutionX, nullptr, x.implicit()},
+                {miopenTensorConvolutionW, nullptr, w.implicit()},
+                {miopenTensorConvolutionY, nullptr, y.implicit()},
+            };
+
+            solution_ptr = find_solution(miopen_stream_handle,
+                                         3,
+                                         tensor_args,
+                                         workspace.implicit(),
+                                         workspace_size,
+                                         conv_problem.get(),
+                                         ctx.get_exhaustive_tune_flag());

-            solution_ptr = find_solution(
-                miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
-            auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
+            status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
            if(status != miopenStatusSuccess)
                MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");

@@ -195,29 +238,10 @@ struct miopen_convolution
            return shape{shape::int8_type, {workspace_size}};
        }
 #else
-        auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
-                                                               w_desc.get(),
-                                                               x_desc.get(),
-                                                               cd.get(),
-                                                               y_desc.get(),
-                                                               &workspace_size);
-        if(status != miopenStatusSuccess)
-            MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
-
-        workspace_shape = shape{shape::int8_type, {workspace_size}};
-
-        auto x_shape = inputs[0];
-        auto w_shape = inputs[1];
-        if(int8_x4_format)
-        {
-            x_shape = pack_int8_shape(x_shape);
-            w_shape = pack_int8_shape(w_shape);
-        }
        auto x         = to_gpu(generate_argument(x_shape));
        auto w         = to_gpu(generate_argument(w_shape));
        auto y         = allocate_gpu(output_shape);
        auto workspace = allocate_gpu(workspace_shape);
-
        int algo_count = 1;
        miopenConvAlgoPerf_t perf;
        status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
@@ -337,6 +361,7 @@ struct miopen_convolution
        return {s.type(), lens, strides};
    }
 };
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
 using miopen_problem      = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
 using miopen_solution     = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);

-inline miopen_solution
-find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false)
+inline miopen_solution find_solution(miopenHandle_t handle,
+                                     size_t num_inputs,
+                                     const miopenTensorArgument_t* tensor_args,
+                                     void* workspace,
+                                     size_t workspace_size,
+                                     miopenProblem_t problem,
+                                     bool tune = false)
 {
    miopenSolution_t solution;
    size_t found           = 0;
-    miopen_find_options fo = nullptr;
+    miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
    if(tune)
    {
-        fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
        miopenSetFindOptionTuning(fo.get(), 1);
    }
-    auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
+#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
+    for(auto i : range(num_inputs))
+    {
+        auto status = miopenSetFindOptionPreallocatedTensor(
+            fo.get(), tensor_args[i].id, tensor_args[i].buffer);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
+    }
+    auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
+#else
+    miopenStatus_t status;
+    (void)(num_inputs);
+    (void)(tensor_args);
+    (void)(workspace_size);
+    (void)(workspace);
+#endif
+    status      = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
    auto result = miopen_solution{solution};
    if(status != miopenStatusSuccess or found == 0)
-        MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
+        MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
    return result;
 }

@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
 }

 template <class T>
-inline convolution_descriptor make_deconv(const T& op)
+inline convolution_descriptor make_convolution_backwards(const T& op)
 {
    auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
    miopenConvolutionMode_t c_mode = miopenTranspose;

--- a/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
@@ -122,12 +122,14 @@ struct source_location_capture
 {
    T x;
    source_location loc;
-    template <class U, class = decltype(T(U{}))>
+    // declval is a workaround since default constructor for "U" is not working with rocm-5.6
+    template <class U>
+    static U&& declval();
+    template <class U, class = decltype(T(declval<U>()))>
    constexpr source_location_capture(U px, source_location ploc = source_location{})
        : x(px), loc(ploc)
    {
    }
-
    constexpr operator source_location() const { return loc; }

    constexpr operator T() const { return x; }

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -106,7 +106,7 @@ struct miopen_apply
        add_extend_op("topk");

        add_convolution_op("convolution");
-        add_convolution_op("deconvolution");
+        add_convolution_op("convolution_backwards");
        add_convolution_op("quant_convolution");
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -389,14 +389,20 @@ struct mlir_program
        mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
        {
            auto attributes = prog->name_attributes(named_attrs);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

        mlir_operation_state& add_attribute_value(const value& v)
        {
            auto attributes = prog->name_attributes(v);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

@@ -419,13 +425,19 @@ struct mlir_program
                return shape{r.type(), r.lens()};
            });
            auto x = prog->make_tensors(reshaped);
-            mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            if(not x.empty())
+            {
+                mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            }
            return *this;
        }

        mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
        {
-            mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            if(not inputs.empty())
+            {
+                mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            }
            return *this;
        }

@@ -435,7 +447,10 @@ struct mlir_program
            std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
                return r.get();
            });
-            mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            if(not mregions.empty())
+            {
+                mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            }
            mlir_operation op(mlirOperationCreate(&op_state));
            // Release memory since mlir_operation owns it
            for(auto& r : regions)
@@ -607,12 +622,12 @@ struct mlir_program
        mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
        // 1st pipeline to call
        mlirMIGraphXAddHighLevelPipeline(pm_front.get());
-        mlirPassManagerRun(pm_front.get(), mmodule.get());
+        mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));

        // 2nd pipeline to call
        get_module_tuned();
        mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
-        mlirPassManagerRun(pm_back.get(), mmodule.get());
+        mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));

        code_object_op op{};
        op.symbol_name                = sym_name;
@@ -701,6 +716,11 @@ struct mlir_program
    bool get_module_tuned() const
    {
        static mlir_tuning_table tuning_table = create_tuning_table();
+        // The tuning table as currently implemented is currently not
+        // thread safe. This will be fixed in the future. For now,
+        // stick a mutex around all tuning table interaction.
+        static std::mutex lock;
+        std::lock_guard<std::mutex> guard(lock);
        if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
        {
            const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
 {
    adjust_param_shapes(m, inputs);
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
-    // set mutex while llvm thread support is disabled.
-    static std::mutex g_mlirc_mutex; // NOLINT
-    const std::lock_guard<std::mutex> lock(g_mlirc_mutex);

    if(trace)
        std::cout << m << std::endl;

--- a/src/targets/gpu/rocblas.cpp
+++ b/src/targets/gpu/rocblas.cpp
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()

 bool get_int8_x4_format(context& ctx)
 {
+#if ROCBLAS_VERSION_MAJOR >= 3
+    (void)(ctx);
+    return false;
+#else
+    // int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
+    // v3.0 and will be removed in v4.0
    rocblas_gemm_flags flag;
    rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
    return flag == rocblas_gemm_flags_pack_int8x4;
+#endif
 }
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
@@ -27,7 +27,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/op/identity.hpp>
 #include <migraphx/op/convolution.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/quant_dot.hpp>

--- a/src/tf/parse_batchnorm.cpp
+++ b/src/tf/parse_batchnorm.cpp
@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
        auto x_type = args[0]->get_shape().type();

        // unsqueeze tensors of shape (C) to broadcast correctly
-        auto rt  = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
        auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});

        auto scale_unsqueeze =
@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
        auto var_unsqueeze =
            info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]);

-        auto numer   = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
-        auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
-        auto denom   = info.add_broadcastable_binary_op("pow", var_eps, rt);
-        auto div0    = info.add_broadcastable_binary_op("div", numer, denom);
-        auto r0      = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze);
+        auto x_sub_mean = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
+        auto var_eps    = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
+        auto rsqrt      = info.add_instruction(make_op("rsqrt"), var_eps);
+        auto mul0       = info.add_broadcastable_binary_op("mul", scale_unsqueeze, rsqrt);
+        auto r0         = info.add_broadcastable_binary_op("mul", x_sub_mean, mul0);
        return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
    }
 };

--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
    bool passed = true;
    visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
        double error;
-        passed = verify_range(ref, target, tolerance, &error);
+        passed = verify::verify_range(ref, target, tolerance, &error);
        if(not passed)
        {
            // TODO: Check for nans
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
                std::cout << "ref:" << ref << std::endl;
            if(target.size() < 32)
                std::cout << "target:" << target << std::endl;
-            if(range_zero(ref))
+            if(verify::range_zero(ref))
                std::cout << "Ref data is all zeros" << std::endl;
-            if(range_zero(target))
+            if(verify::range_zero(target))
                std::cout << "Target data is all zeros" << std::endl;

-            auto mxdiff = max_diff(ref, target);
+            auto mxdiff = verify::max_diff(ref, target);
            std::cout << "Max diff: " << mxdiff << std::endl;

-            auto idx = mismatch_idx(ref, target, float_equal);
-            if(idx < range_distance(ref))
+            auto idx = verify::mismatch_idx(ref, target, float_equal);
+            if(idx < verify::range_distance(ref))
            {
                std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
                          << std::endl;
            }

-            auto ref_nan_idx = find_idx(ref, not_finite);
+            auto ref_nan_idx = find_idx(ref, verify::not_finite);
            if(ref_nan_idx >= 0)
                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
                          << ref[ref_nan_idx] << std::endl;

-            auto target_nan_idx = find_idx(target, not_finite);
+            auto target_nan_idx = find_idx(target, verify::not_finite);
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
        }
        else
        {
-            if(range_zero(ref))
+            if(verify::range_zero(ref))
                std::cout << "Ref data is all zeros" << std::endl;
-            if(range_zero(target))
+            if(verify::range_zero(target))
                std::cout << "Target data is all zeros" << std::endl;

            // auto mxdiff = max_diff(ref, target);
            // std::cout << "Max diff: " << mxdiff << std::endl;

            // auto idx = mismatch_idx(ref, target, float_equal);
-            // if(idx < range_distance(ref))
+            // if(idx < verify::range_distance(ref))
            // {
            //     std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
            //               << std::endl;
            // }

-            auto ref_nan_idx = find_idx(ref, not_finite);
+            auto ref_nan_idx = find_idx(ref, verify::not_finite);
            if(ref_nan_idx >= 0)
                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
                          << ref[ref_nan_idx] << std::endl;

-            auto target_nan_idx = find_idx(target, not_finite);
+            auto target_nan_idx = find_idx(target, verify::not_finite);
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;

--- a/test/api/test_gpu.cpp
+++ b/test/api/test_gpu.cpp
@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
    auto shapes_before = p.get_output_shapes();
    migraphx::compile_options options;
    options.set_offload_copy();
-    options.set_exhaustive_tune_flag();
    p.compile(migraphx::target("gpu"), options);
    auto shapes_after = p.get_output_shapes();
    CHECK(shapes_before.size() == 1);

--- a/test/gpu/codegen_literal.cpp
+++ b/test/gpu/codegen_literal.cpp
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
    migraphx::target gpu_t = migraphx::make_target("gpu");
    run_prog(p, gpu_t, m, gpu_result);

-    EXPECT(migraphx::verify_range(ref_result, gpu_result));
+    EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
 }

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/manage_host_buffer.cpp
+++ b/test/gpu/manage_host_buffer.cpp
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
    auto result = p.eval(pp).back();
    std::vector<float> results_vector(ss.elements(), -1);
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(c_vec, results_vector));
+    EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
 }

 TEST_CASE(arguments_lifetime)

--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
    std::vector<int8_t> val_final;
    ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });

-    EXPECT(migraphx::verify_range(val_orig, val_final));
+    EXPECT(migraphx::verify::verify_range(val_orig, val_final));
 }

 TEST_CASE(int8_quantization)
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
        // the regular pipeline uses the rewrite_quantization in the much
        // earlier stage.
        if(migraphx::gpu::mlir_enabled())
-            EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
+            EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
        else
-            EXPECT(migraphx::verify_range(ref_result, gpu_result));
+            EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
    }
 }


--- a/test/multi_target/multitarget_test.cpp
+++ b/test/multi_target/multitarget_test.cpp
--- a/test/onnx/.onnxrt-commit
+++ b/test/onnx/.onnxrt-commit
-3be6eb53c8b359703cb645ed2cb1cdf106924b7c
+d3295f4329d744fe1f8419e1220e123807282b99
--- a/test/onnx/conv_transpose_auto_pad_test.onnx
+++ b/test/onnx/conv_transpose_auto_pad_test.onnx
+conv_transpose_auto_pad_test:±
+:
+x
+wyconv1"
ConvTranspose*
+auto_pad"
+SAME_UPPER conv_transpose_auto_pad_testZ
+x
+
+
+
+
+Z
+w
+
+
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
--- a/test/onnx/deconv_bias_test.onnx
+++ b/test/onnx/deconv_bias_test.onnx
-deconv_bias_test:ž
+conv_transpose_bias_test:¦
 "
 x
 w
-byconv1"
ConvTransposedeconv_bias_testZ
+byconv1"
ConvTransposeconv_transpose_bias_testZ
 x


@@ -24,4 +24,4 @@



-B
+B
\ No newline at end of file
--- a/test/onnx/conv_transpose_dyn_asym_padding_test.onnx
+++ b/test/onnx/conv_transpose_dyn_asym_padding_test.onnx