Enable threading in MLIR (#1899)

This commit removes the build options to disable threading and removes the mutex in compile_mlir. The commit being tested is a draft PR on rocMLIR that'll get merged if this passes

Enable threading in MLIR (#1899)
This commit removes the build options to disable threading and removes the mutex in compile_mlir. The commit being tested is a draft PR on rocMLIR that'll get merged if this passes
5f5356cc · Krzysztof Drewniak · GitHub · 38a62ed2 · 5f5356cc · 5f5356cc
Unverified Commit 5f5356cc authored Jul 17, 2023 by Krzysztof Drewniak Committed by GitHub Jul 17, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 11 deletions

Dockerfile Dockerfile +2 -1

Jenkinsfile Jenkinsfile +4 -2

src/targets/gpu/mlir.cpp src/targets/gpu/mlir.cpp +25 -8

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -113,7 +113,8 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR

 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh

-RUN env PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@13f81a4e0b557cedcd0bc0898290ba32bb09356b -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
+# Use the /opt/cmake install because LLVM/MLIR need cmake >= 3.20
+RUN env PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@1ad9d6df32acc6d29d58e8ed6710e36746d0a4d6 -DBUILD_FAT_LIBROCKCOMPILER=On

 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -132,8 +132,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
    stage('MLIR Debug') {
        withEnv(['MIGRAPHX_ENABLE_MLIR=1']) {
            def sanitizers = "undefined"
-            def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
-            cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
+            // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
+            def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
+            def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
+            cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
        }
    }
 }, ck_release: rocmnode('mi100+') { cmake_build ->

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -389,14 +389,20 @@ struct mlir_program
        mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
        {
            auto attributes = prog->name_attributes(named_attrs);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

        mlir_operation_state& add_attribute_value(const value& v)
        {
            auto attributes = prog->name_attributes(v);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

@@ -419,13 +425,19 @@ struct mlir_program
                return shape{r.type(), r.lens()};
            });
            auto x = prog->make_tensors(reshaped);
-            mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            if(not x.empty())
+            {
+                mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            }
            return *this;
        }

        mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
        {
-            mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            if(not inputs.empty())
+            {
+                mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            }
            return *this;
        }

@@ -435,7 +447,10 @@ struct mlir_program
            std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
                return r.get();
            });
-            mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            if(not mregions.empty())
+            {
+                mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            }
            mlir_operation op(mlirOperationCreate(&op_state));
            // Release memory since mlir_operation owns it
            for(auto& r : regions)
@@ -701,6 +716,11 @@ struct mlir_program
    bool get_module_tuned() const
    {
        static mlir_tuning_table tuning_table = create_tuning_table();
+        // The tuning table as currently implemented is currently not
+        // thread safe. This will be fixed in the future. For now,
+        // stick a mutex around all tuning table interaction.
+        static std::mutex lock;
+        std::lock_guard<std::mutex> guard(lock);
        if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
        {
            const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
 {
    adjust_param_shapes(m, inputs);
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
-    // set mutex while llvm thread support is disabled.
-    static std::mutex g_mlirc_mutex; // NOLINT
-    const std::lock_guard<std::mutex> lock(g_mlirc_mutex);

    if(trace)
        std::cout << m << std::endl;