Merge branch 'develop' into test_onnx_zoo

25e8cf0b · Ted Themistokleous · GitHub · a313a68e · 635502be · 25e8cf0b
Unverified Commit 25e8cf0b authored Jan 27, 2023 by Ted Themistokleous Committed by GitHub Jan 27, 2023
20 changed files
--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -32,7 +32,13 @@
 #include <mlir-c/Dialect/MIGraphX.h>
 #include <mlir-c/IntegerSet.h>
 #include <mlir-c/Pass.h>
-#include <mlir-c/Registration.h>
+#include <mutex>
+#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
+#warning "Incompatible version of rocMLIR library used, disabling"
+#undef MIGRAPHX_MLIR
+#else
+#include <mlir-c/RegisterRocMLIR.h>
+#endif
 #endif
 #include <migraphx/env.hpp>
@@ -50,10 +56,6 @@
 #include <deque>
 #include <variant>
-#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
-#define MIGRAPHX_MLIR_BARE_POINTER
-#endif
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -168,9 +170,11 @@ struct mlir_program
          location(mlirLocationUnknownGet(ctx.get())),
          mmodule(mlirModuleCreateEmpty(location))
    {
-        MlirDialectHandle mixr_handle = mlirGetDialectHandle__migraphx__();
+        MlirDialectRegistry registry = mlirDialectRegistryCreate();
-        mlirDialectHandleRegisterDialect(mixr_handle, ctx.get());
+        mlirRegisterRocMLIRDialects(registry);
-        mlirRegisterAllDialects(ctx.get());
+        mlirContextAppendDialectRegistry(ctx.get(), registry);
+        mlirContextLoadAllAvailableDialects(ctx.get());
+        mlirDialectRegistryDestroy(registry);
        mlirContextSetAllowUnregisteredDialects(ctx.get(), true /*allow*/);
    }
@@ -452,7 +456,8 @@ struct mlir_program
        auto ops = create_operation_state("func.func");
        ops.add_attributes({{"function_type", make_function_type(inputs, outputs)},
                            {"sym_name", std::string("main")},
-                            {"kernel", std::string("mixr")}});
+                            {"kernel", std::string("mixr")},
+                            {"arch", target_arch}});
        ops.add_region(std::move(region));
        insert(body, std::move(ops));
@@ -512,7 +517,8 @@ struct mlir_program
                pp =
                    problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
                // check if HW supports xdlops
-                bool xdlops       = contains(get_xdlops_archs(), target_name);
+                auto target_chip  = trim(split_string(target_arch, ':').front());
+                bool xdlops       = contains(get_xdlops_archs(), target_chip);
                std::string tuned = get_tune_params(xdlops);
                if(not tuned.empty())
                    ops.add_attributes({{"perf_config", tuned}});
@@ -540,7 +546,7 @@ struct mlir_program
        // 1st pipeline to call
        mlirMIGraphXAddHighLevelPipeline(pm.get());
        // 2nd pipeline to call
-        mlirMIGraphXAddBackendPipeline(pm.get(), target_name.c_str(), "amdgcn-amd-amdhsa", "");
+        mlirMIGraphXAddBackendPipeline(pm.get(), target_arch.c_str());
        mlirPassManagerRun(pm.get(), mmodule.get());
        code_object_op op{};
@@ -550,16 +556,7 @@ struct mlir_program
        return op;
    }
-    void find_target()
+    void find_target() { target_arch = get_device_name(); }
-    {
-        std::string tname = get_device_name();
-        // HACK: Since MLIR can't handle the full target name
-        target_name = trim(split_string(tname, ':').front());
-        if(tname.size() != target_name.size())
-            std::cout
-                << "*************** WARNING: MLIR may not compile the correct target features for: "
-                << tname << std::endl;
-    }
    std::pair<std::size_t, std::size_t> get_launch_params() const
    {
@@ -588,7 +585,7 @@ struct mlir_program
    mlir_module mmodule;
    problem_params pp;
    std::deque<std::string> strings{};
-    std::string target_name;
+    std::string target_arch;
 };
 std::string dump_mlir(const module& m)
@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
    if(trace)
        std::cout << m << std::endl;
+    // set mutex while llvm thread support is disabled.
+    static std::mutex g_mlirc_mutex; // NOLINT
+    const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
    mlir_program mp;
    mp.find_target();
    mp.parse(m);
@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
    std::vector<instruction_ref> refs;
    std::size_t last = 0;
-#ifdef MIGRAPHX_MLIR_BARE_POINTER
    refs.reserve(inputs.size());
    std::copy(inputs.begin(), inputs.end(), std::back_inserter(refs));
    last               = refs.size() - 1;
-#else
-    refs.reserve(inputs.size() * 15);
-    std::unordered_map<uint64_t, instruction_ref> literal_map{};
-    auto get_literal = [&](uint64_t value) {
-        auto fi = literal_map.find(value);
-        if(fi != literal_map.end())
-            return fi->second;
-        auto lit = m.add_literal(value);
-        literal_map.emplace(value, lit);
-        return lit;
-    };
-    for(auto input : inputs)
-    {
-        const size_t offset = 0;
-        auto s              = input->get_shape();
-        last                = refs.size();
-        refs.push_back(input);
-        refs.push_back(input);
-        refs.push_back(get_literal(offset)); // offset
-        // dim sizes
-        std::transform(s.lens().begin(),
-                       s.lens().end(),
-                       std::back_inserter(refs),
-                       [&](const auto& lval) { return get_literal(lval); });
-        // refs.push_back(get_literal(1)); // G
-        // dim strides
-        std::transform(s.strides().begin(),
-                       s.strides().end(),
-                       std::back_inserter(refs),
-                       [&](const auto& lval) { return get_literal(lval); });
-        // refs.push_back(get_literal(1)); // G
-    }
-#endif
    co.expected_inputs = to_shapes(refs);
    co.output_arg      = last;
    return m.insert_instruction(ins, co, refs);

--- a/src/targets/gpu/perfdb.cpp
+++ b/src/targets/gpu/perfdb.cpp
@@ -27,6 +27,7 @@
 #include <migraphx/stringutils.hpp>
 #include <migraphx/permutation.hpp>
 #include <fstream>
+#include <mutex>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
 auto query_miopen_db(const std::string& query)
 {
+    static std::mutex g_db_mutex; // NOLINT
+    const std::lock_guard<std::mutex> lock(g_db_mutex);
    // TODO: Store db as a static variable
    const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db";
    // Check if db file exists.

--- a/src/targets/gpu/prefuse_ops.cpp
+++ b/src/targets/gpu/prefuse_ops.cpp
@@ -51,17 +51,20 @@ struct layernorm_base
        }
        check_shapes{inputs, static_cast<const Derived&>(*this)}.has(nargs + N);
        auto s = inputs.at(0);
+        auto t = s.type();
+        if(not mods.empty())
+            t = mods.front()->get_output_shapes().front().type();
        if(s.scalar())
        {
            return s;
        }
        else if(s.broadcasted())
        {
-            return {s.type(), s.lens()};
+            return {t, s.lens()};
        }
        else
        {
-            return s.with_lens(s.lens());
+            return s.with_lens(t, s.lens());
        }
    }
 };
@@ -97,7 +100,8 @@ struct find_add_layernorm
 {
    auto matcher() const
    {
-        return match::layernorm()(match::var("x")(match::name("add").bind("add")));
+        return match::layernorm()(
+            match::var("x")(match::name("add")(match::used_once()).bind("add")));
    }
    void apply(module& m, const match::matcher_result& r) const

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -146,8 +146,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        dead_code_elimination{},
        pack_int8_args{},
        dead_code_elimination{},
-        adjust_allocation{gpu_allocation_model{}},
-        dead_code_elimination{},
        fuse_ops{&ctx, options.fast_math},
        dead_code_elimination{},
        replace_allocate{gpu_allocation_model{}, options.offload_copy},

--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
@@ -346,10 +346,10 @@ struct ref_pad
    std::string name() const { return "ref::pad"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        assert(output_shape.standard());
+        assert(dyn_out.computed_shape.standard());
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
@@ -383,9 +383,9 @@ struct ref_gemm
    std::string name() const { return "ref::dot"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        migemm(result, args[0], args[1], 1.0f, 0.0f);
        return result;
@@ -449,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
    {
        return op.normalize_compute_shape(inputs);
    }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
-        auto batch_lens        = output_shape.lens();
+        auto batch_lens        = dyn_out.computed_shape.lens();
        int64_t tuned_axis     = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name());
        std::size_t n_dims     = batch_lens[tuned_axis];
        batch_lens[tuned_axis] = 1;
@@ -475,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[tuned_axis]   = j;
-                    std::size_t index = output_shape.index(idx);
+                    std::size_t index = dyn_out.computed_shape.index(idx);
                    output[index]     = std::exp(input[index] - batch_max[i]);
                }

--- a/test/api/test_cpu.cpp
+++ b/test/api/test_cpu.cpp
@@ -66,7 +66,7 @@ TEST_CASE(load_and_run_init_list)
 TEST_CASE(quantize_fp16)
 {
-    auto p1        = migraphx::parse_onnx("gemm_ex_test.onnx");
+    auto p1        = migraphx::parse_onnx("gemm_test.onnx");
    const auto& p2 = p1;
    const auto& p3 = p1;
    migraphx::quantize_fp16(p1);
@@ -82,7 +82,7 @@ TEST_CASE(quantize_fp16)
 TEST_CASE(quantize_int8)
 {
-    auto p1        = migraphx::parse_onnx("gemm_ex_test.onnx");
+    auto p1        = migraphx::parse_onnx("gemm_test.onnx");
    const auto& p2 = p1;
    auto t         = migraphx::target("ref");
    migraphx::quantize_int8_options options;

--- a/test/gpu/hip.cpp
+++ b/test/gpu/hip.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <test.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/gpu/target.hpp>
+TEST_CASE(tuple_to_from_gpu)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {2, 3}};
+    migraphx::shape s2{migraphx::shape::int32_type, {2, 4}};
+    std::vector<float> p1_data = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6};
+    std::vector<int> p2_data   = {1, 2, 3, 4, 5, 6, 7, 8};
+    auto p1                    = migraphx::argument{s1, p1_data.data()};
+    auto p2                    = migraphx::argument{s2, p2_data.data()};
+    auto p1_gpu                = migraphx::gpu::to_gpu(p1);
+    auto p2_gpu                = migraphx::gpu::to_gpu(p2);
+    auto p_tuple               = migraphx::gpu::from_gpu(migraphx::argument({p1_gpu, p2_gpu}));
+    std::vector<migraphx::argument> results = p_tuple.get_sub_objects();
+    std::vector<float> result1;
+    results[0].visit([&](auto output) { result1.assign(output.begin(), output.end()); });
+    std::vector<int> result2;
+    results[1].visit([&](auto output) { result2.assign(output.begin(), output.end()); });
+    EXPECT(result1 == p1_data);
+    EXPECT(result2 == p2_data);
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -140,7 +140,7 @@ TEST_CASE(conv)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
+  func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
    %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    return %0 : tensor<1x2x2x2xf32>
  }
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
+  func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
    %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
    %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>

--- a/test/instruction.cpp
+++ b/test/instruction.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/instruction.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/make_op.hpp>
+#include "test.hpp"
+TEST_CASE(check_undefined)
+{
+    migraphx::module m;
+    auto und = m.add_instruction(migraphx::make_op("undefined"));
+    auto cov = m.add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::half_type}}), und);
+    auto abs = m.add_instruction(migraphx::make_op("abs"), cov);
+    migraphx::shape xs{migraphx::shape::float_type, {2, 3}};
+    std::vector<float> datax = {1, 2, 3, 4, 5, 6};
+    auto lit = m.add_literal(migraphx::literal(xs, datax));
+    auto mul = m.add_instruction(migraphx::make_op("mul"), lit, lit);
+    EXPECT(und->is_undefined());
+    EXPECT(cov->is_undefined());
+    EXPECT(abs->is_undefined());
+    EXPECT(not lit->is_undefined());
+    EXPECT(not mul->is_undefined());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/literal_test.cpp
+++ b/test/literal_test.cpp
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
    EXPECT(l4.empty());
 }
+TEST_CASE(literal_nstd_shape_vector)
+{
+    migraphx::shape nstd_shape{migraphx::shape::float_type, {1, 3, 2, 2}, {12, 1, 6, 3}};
+    std::vector<float> data(12);
+    std::iota(data.begin(), data.end(), 0);
+    auto l0 = migraphx::literal{nstd_shape, data};
+    // check data buffer is read in correctly
+    std::vector<float> expected_buffer = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11};
+    const auto* start                  = reinterpret_cast<const float*>(l0.data());
+    std::vector<float> l0_data{start, start + 12};
+    EXPECT(l0_data == expected_buffer);
+    // check that using visit() (that uses a tensor view) gives data in correct order
+    std::vector<float> results_vector(12);
+    l0.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(results_vector == data);
+}
 TEST_CASE(literal_os1)
 {
    migraphx::literal l{1};

--- a/test/onnx/.onnxrt-commit
+++ b/test/onnx/.onnxrt-commit
+c9a53c925510a101f5ca94d5ecda0924e40a8463
--- a/test/onnx/argmax_dyn_test.onnx
+++ b/test/onnx/argmax_dyn_test.onnx
--- a/test/onnx/averagepool_dyn_asym_padding_error_test.onnx
+++ b/test/onnx/averagepool_dyn_asym_padding_error_test.onnx
--- a/test/onnx/averagepool_dyn_autopad_error_test.onnx
+++ b/test/onnx/averagepool_dyn_autopad_error_test.onnx
--- a/test/onnx/averagepool_dyn_cip_error_test.onnx
+++ b/test/onnx/averagepool_dyn_cip_error_test.onnx
--- a/test/onnx/averagepool_dyn_test.onnx
+++ b/test/onnx/averagepool_dyn_test.onnx
--- a/test/onnx/conv_dynamic_bias_test.onnx
+++ b/test/onnx/conv_dynamic_bias_test.onnx
--- a/test/onnx/external_constant_test.onnx
+++ b/test/onnx/external_constant_test.onnx
+external_constant_test:¡
+v0"Constant*g
+value*[Bconst_tensorj)
+locationexternal_constant_test.weightj
+offset48j
+length24p external_constant_testb
+0
+B
\ No newline at end of file
--- a/test/onnx/external_constant_test.weight
+++ b/test/onnx/external_constant_test.weight
--- a/test/onnx/flatten_dyn_test.onnx
+++ b/test/onnx/flatten_dyn_test.onnx