Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into nhwc_workaround

cb10ae76 · Khalique Ahmed · 498e6c9d · 75e6618c · cb10ae76 · cb10ae76
Commit cb10ae76 authored Jul 18, 2023 by Khalique Ahmed
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,6 +43,8 @@ else()
    endif()
 endif()

+set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "")
+
 set(CMAKE_BUILD_RPATH "${CMAKE_BINARY_DIR}/lib")

 project(migraphx LANGUAGES C CXX)
@@ -114,6 +116,7 @@ rocm_enable_clang_tidy(
        llvm-namespace-comment
        misc-*
 	-misc-confusable-identifiers
+        -misc-use-anonymous-namespace
        modernize-*
        performance-*
        readability-*

--- a/Dockerfile
+++ b/Dockerfile
@@ -7,10 +7,10 @@ RUN dpkg --add-architecture i386

 # Install rocm key
 RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && \
-    curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - 
+    curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -

 # Add rocm repository
-RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.5/ focal main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list'

 # From docs.amd.com for installing rocm. Needed to install properly
 RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600"
@@ -113,7 +113,8 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR

 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh

-RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@8d25af3b3721c159bb41cc6388e9453b1018c126 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
+# Use the /opt/cmake install because LLVM/MLIR need cmake >= 3.20
+RUN env PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@1ad9d6df32acc6d29d58e8ed6710e36746d0a4d6 -DBUILD_FAT_LIBROCKCOMPILER=On

 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -91,7 +91,7 @@ def rocmnodename(name) {
        node_name = "${rocmtest_name} && navi21";
    } else if(name == "mi100+") {
        node_name = "${rocmtest_name} && (gfx908 || gfx90a)";
-    } else if(name == "anygpu") {
+    } else if(name == "cdna") {
        node_name = "${rocmtest_name} && (gfx908 || gfx90a || vega)";
    } else if(name == "nogpu") {
        node_name = "${rocmtest_name} && nogpu";
@@ -105,35 +105,29 @@ def rocmnode(name, body) {
    }
 }

-rocmtest clang_debug: rocmnode('vega') { cmake_build ->
-    stage('Hip Clang Debug') {
+rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
+    stage('hipRTC Debug') {
        def sanitizers = "undefined"
        def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
-        cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
+        cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
    }
-}, clang_gpu_debug: rocmnode('vega') { cmake_build ->
-    stage('Hip Clang GPU Debug') {
-        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release", gpu_debug: true)
-    }
-}, clang_release: rocmnode('vega') { cmake_build ->
+}, clang_release: rocmnode('cdna') { cmake_build ->
    stage('Hip Clang Release') {
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
        stash includes: 'build/*.deb', name: 'migraphx-package'
    }
-}, hiprtc_gpu_debug: rocmnode('vega') { cmake_build ->
-    stage('HipRTC GPU Debug') {
-        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
-    }
-}, all_targets_debug : rocmnode('vega') { cmake_build ->
+}, all_targets_debug : rocmnode('cdna') { cmake_build ->
    stage('All targets Release') {
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") 
    }
-}, mlir_debug: rocmnode('vega') { cmake_build ->
+}, mlir_debug: rocmnode('cdna') { cmake_build ->
    stage('MLIR Debug') {
        withEnv(['MIGRAPHX_ENABLE_MLIR=1']) {
            def sanitizers = "undefined"
-            def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
-            cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
+            // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
+            def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
+            def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
+            cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
        }
    }
 }, ck_release: rocmnode('mi100+') { cmake_build ->
@@ -163,7 +157,7 @@ def onnxnode(name, body) {
    }
 }

-rocmtest onnx: onnxnode('anygpu') { cmake_build ->
+rocmtest onnx: onnxnode('cdna') { cmake_build ->
    stage("Onnx runtime") {
        sh '''
            apt install half

--- a/docs/dev_intro.rst
+++ b/docs/dev_intro.rst
@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro
    std::vector<float> results_vector(64);
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });

-    EXPECT(migraphx::verify_range(results_vector, sol));
+    EXPECT(migraphx::verify::verify_range(results_vector, sol));

 An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
 By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.

--- a/hip-clang.docker
+++ b/hip-clang.docker
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
 RUN dpkg --add-architecture i386

 # Add rocm repository
-RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.5/ focal main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list'

 # Install dependencies
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \

--- a/src/api/include/migraphx/migraphx.hpp
+++ b/src/api/include/migraphx/migraphx.hpp
@@ -1487,13 +1487,17 @@ quantize_int8(const program& prog, const target& ptarget, const quantize_int8_op

 struct experimental_custom_op_base
 {
+    experimental_custom_op_base()                                   = default;
+    experimental_custom_op_base(const experimental_custom_op_base&) = default;
+    experimental_custom_op_base& operator=(const experimental_custom_op_base&) = default;
+    virtual ~experimental_custom_op_base()                                     = default;
+
    virtual std::string name() const                                            = 0;
    virtual argument compute(context ctx, shape output, arguments inputs) const = 0;
    virtual shape compute_shape(shapes inputs) const                            = 0;
    virtual std::vector<size_t> output_alias(shapes) const { return {}; }
    // TODO: Return target string instead of bool
    virtual bool runs_on_offload_target() const = 0;
-    virtual ~experimental_custom_op_base()      = default;
 };

 struct experimental_custom_op : interface_base<MIGRAPHX_HANDLE_BASE(experimental_custom_op)>

--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
@@ -43,7 +43,7 @@ template <class T, class... Ts>
 using dependent_type = typename select_dependent_type<T, Ts...>::type;

 MIGRAPHX_EXPORT
-bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens);
+bool normalize_attributes(operation& op, const shape& input_shape);

 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -143,7 +143,7 @@ auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
    if(inputs.empty())
        MIGRAPHX_THROW("At least one input is required for " + x.name());
    dependent_type<operation, T> y = x;
-    normalize_attributes(y, inputs[0].max_lens());
+    normalize_attributes(y, inputs[0]);
    return any_cast<T>(y).normalize_compute_shape(inputs);
 }


--- a/src/include/migraphx/verify.hpp
+++ b/src/include/migraphx/verify.hpp
@@ -35,6 +35,7 @@

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+namespace verify {

 // Compute the value of a range
 template <class R>
@@ -196,6 +197,7 @@ bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out
    return error <= threshold;
 }

+} // namespace verify
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
 #endif
--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -467,7 +467,7 @@ operation instruction::normalized_operator() const
    if(this->need_normalization())
    {
        auto s = this->inputs().front()->get_shape();
-        if(not normalize_attributes(o, s.max_lens()))
+        if(not normalize_attributes(o, s))
            return this->get_operator();
    }
    return o;

--- a/src/normalize_attributes.cpp
+++ b/src/normalize_attributes.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -35,8 +35,9 @@ inline namespace MIGRAPHX_INLINE_NS {
 * vec: the vector attribute to normalize
 * axes: the operator's axes attribute if it exists, empty otherwise
 * val: the normalize_axes key and options. Ex: normalize["axes"] =
- * value::array{normalize_attribute::include_min}; lens: shape dimensions passed when calling
- * normalize_attributes(op&, lens)
+ * value::array{normalize_attribute::include_min};
+ * input_shape: input shape passed when calling
+ * normalize_attributes(op&, input_shape)
 *
 * See normalize_attribute.hpp for explaining the options.
 */
@@ -44,11 +45,11 @@ template <class Message>
 auto tune_attribute(const std::vector<int64_t>& vec,
                    const std::vector<int64_t>& axes,
                    const value& val,
-                    const std::vector<std::size_t>& lens,
+                    const shape& input_shape,
                    Message m)
 {
    std::vector<int64_t> result(vec);
-    int64_t n_rank                                 = lens.size();
+    int64_t n_rank                                 = input_shape.ndim();
    std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>();
    if(contains(vec_attrs, op::normalize_attribute::use_output))
    {
@@ -56,9 +57,28 @@ auto tune_attribute(const std::vector<int64_t>& vec,
    }

    std::vector<int64_t> max_vals(vec.size(), n_rank);
+
    if(contains(vec_attrs, op::normalize_attribute::use_len))
    {
-        std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) { return lens[i]; });
+        if(input_shape.dynamic())
+        {
+            std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) {
+                const auto& dd = input_shape.dyn_dims().at(i);
+                if(not dd.is_fixed())
+                {
+                    MIGRAPHX_THROW(
+                        "NORMALIZE_ATTR: 'use_lens' on a non-fixed dynamic dimension, axis=" +
+                        std::to_string(i));
+                }
+                return dd.max;
+            });
+        }
+        else
+        {
+            std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) {
+                return input_shape.lens().at(i);
+            });
+        }
    }

    if(contains(vec_attrs, op::normalize_attribute::clip_max))
@@ -159,9 +179,9 @@ auto tune_pad_attribute(const value& val)
 /**
 * Assumptions:
 *  Dimensions to pad start from the third dimension (index 2).
- *  Called by compute_shape_op() with the `lens` of the first input.
+ *  Called by compute_shape_op() with the shape of the first input.
 */
-bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
+bool normalize_attributes(operation& op, const shape& input_shape)
 {
    bool tuned = false;
    auto attrs = op.attributes();
@@ -172,9 +192,9 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
        auto padding_size  = padding.size();
        auto padding_start = 2;

-        if(padding_size == 2 * (lens.size() - padding_start))
+        if(padding_size == 2 * (input_shape.ndim() - padding_start))
            tuned = true;
-        else if(padding_size != (lens.size() - padding_start))
+        else if(padding_size != (input_shape.ndim() - padding_start))
            MIGRAPHX_THROW("inconsistent padding size");
        else
        {
@@ -205,7 +225,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
                    axes = val.at("axes").without_key().to_vector<int64_t>();
                }
                auto vec    = vv.to_vector<int64_t>();
-                auto result = tune_attribute(vec, axes, rv.without_key(), lens, message);
+                auto result = tune_attribute(vec, axes, rv.without_key(), input_shape, message);
                val[key]    = result;
                op.from_value(val);
                val   = op.to_value();
@@ -214,7 +234,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
            else
            {
                auto num    = vv.to<int64_t>();
-                auto result = tune_attribute({num}, {num}, rv.without_key(), lens, message);
+                auto result = tune_attribute({num}, {num}, rv.without_key(), input_shape, message);
                val[key]    = result.front();
                op.from_value(val);
                val   = op.to_value();

--- a/src/normalize_ops.cpp
+++ b/src/normalize_ops.cpp
@@ -45,7 +45,7 @@ void normalize_ops::apply(module& m) const

        auto s                       = inputs[0]->get_shape();
        migraphx::operation tuned_op = ins->get_operator();
-        if(normalize_attributes(tuned_op, s.max_lens()))
+        if(normalize_attributes(tuned_op, s))
        {
            m.replace_instruction(ins, tuned_op, inputs);
            ins->set_normalized();

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -188,7 +188,9 @@ if(MIGRAPHX_ENABLE_MLIR)
    find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
    message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}")
    target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
-    target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler)
+    # Make this private to avoid multiple inclusions of LLVM symbols.
+    # TODO: Fix rocMLIR's library to hide LLVM internals.
+    target_link_libraries(migraphx_gpu PRIVATE rocMLIR::rockCompiler)
 endif()

 if(MIGRAPHX_USE_HIPRTC)

--- a/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        if(inputs.front().standard())
-            return inputs.front();
        auto lens = inputs.at(0).lens();
        auto t    = inputs.at(0).type();
        return {t, lens};

--- a/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/debug.hpp
@@ -122,12 +122,14 @@ struct source_location_capture
 {
    T x;
    source_location loc;
-    template <class U, class = decltype(T(U{}))>
+    // declval is a workaround since default constructor for "U" is not working with rocm-5.6
+    template <class U>
+    static U&& declval();
+    template <class U, class = decltype(T(declval<U>()))>
    constexpr source_location_capture(U px, source_location ploc = source_location{})
        : x(px), loc(ploc)
    {
    }
-
    constexpr operator source_location() const { return loc; }

    constexpr operator T() const { return x; }

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -389,14 +389,20 @@ struct mlir_program
        mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
        {
            auto attributes = prog->name_attributes(named_attrs);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

        mlir_operation_state& add_attribute_value(const value& v)
        {
            auto attributes = prog->name_attributes(v);
-            mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            if(not attributes.empty())
+            {
+                mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
+            }
            return *this;
        }

@@ -419,13 +425,19 @@ struct mlir_program
                return shape{r.type(), r.lens()};
            });
            auto x = prog->make_tensors(reshaped);
-            mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            if(not x.empty())
+            {
+                mlirOperationStateAddResults(&op_state, x.size(), x.data());
+            }
            return *this;
        }

        mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
        {
-            mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            if(not inputs.empty())
+            {
+                mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
+            }
            return *this;
        }

@@ -435,7 +447,10 @@ struct mlir_program
            std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
                return r.get();
            });
-            mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            if(not mregions.empty())
+            {
+                mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
+            }
            mlir_operation op(mlirOperationCreate(&op_state));
            // Release memory since mlir_operation owns it
            for(auto& r : regions)
@@ -607,12 +622,12 @@ struct mlir_program
        mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
        // 1st pipeline to call
        mlirMIGraphXAddHighLevelPipeline(pm_front.get());
-        mlirPassManagerRun(pm_front.get(), mmodule.get());
+        mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));

        // 2nd pipeline to call
        get_module_tuned();
        mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
-        mlirPassManagerRun(pm_back.get(), mmodule.get());
+        mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));

        code_object_op op{};
        op.symbol_name                = sym_name;
@@ -701,6 +716,11 @@ struct mlir_program
    bool get_module_tuned() const
    {
        static mlir_tuning_table tuning_table = create_tuning_table();
+        // The tuning table as currently implemented is currently not
+        // thread safe. This will be fixed in the future. For now,
+        // stick a mutex around all tuning table interaction.
+        static std::mutex lock;
+        std::lock_guard<std::mutex> guard(lock);
        if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
        {
            const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
 {
    adjust_param_shapes(m, inputs);
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
-    // set mutex while llvm thread support is disabled.
-    static std::mutex g_mlirc_mutex; // NOLINT
-    const std::lock_guard<std::mutex> lock(g_mlirc_mutex);

    if(trace)
        std::cout << m << std::endl;

--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
    bool passed = true;
    visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
        double error;
-        passed = verify_range(ref, target, tolerance, &error);
+        passed = verify::verify_range(ref, target, tolerance, &error);
        if(not passed)
        {
            // TODO: Check for nans
@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
                std::cout << "ref:" << ref << std::endl;
            if(target.size() < 32)
                std::cout << "target:" << target << std::endl;
-            if(range_zero(ref))
+            if(verify::range_zero(ref))
                std::cout << "Ref data is all zeros" << std::endl;
-            if(range_zero(target))
+            if(verify::range_zero(target))
                std::cout << "Target data is all zeros" << std::endl;

-            auto mxdiff = max_diff(ref, target);
+            auto mxdiff = verify::max_diff(ref, target);
            std::cout << "Max diff: " << mxdiff << std::endl;

-            auto idx = mismatch_idx(ref, target, float_equal);
-            if(idx < range_distance(ref))
+            auto idx = verify::mismatch_idx(ref, target, float_equal);
+            if(idx < verify::range_distance(ref))
            {
                std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
                          << std::endl;
            }

-            auto ref_nan_idx = find_idx(ref, not_finite);
+            auto ref_nan_idx = find_idx(ref, verify::not_finite);
            if(ref_nan_idx >= 0)
                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
                          << ref[ref_nan_idx] << std::endl;

-            auto target_nan_idx = find_idx(target, not_finite);
+            auto target_nan_idx = find_idx(target, verify::not_finite);
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;
@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
        }
        else
        {
-            if(range_zero(ref))
+            if(verify::range_zero(ref))
                std::cout << "Ref data is all zeros" << std::endl;
-            if(range_zero(target))
+            if(verify::range_zero(target))
                std::cout << "Target data is all zeros" << std::endl;

            // auto mxdiff = max_diff(ref, target);
            // std::cout << "Max diff: " << mxdiff << std::endl;

            // auto idx = mismatch_idx(ref, target, float_equal);
-            // if(idx < range_distance(ref))
+            // if(idx < verify::range_distance(ref))
            // {
            //     std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
            //               << std::endl;
            // }

-            auto ref_nan_idx = find_idx(ref, not_finite);
+            auto ref_nan_idx = find_idx(ref, verify::not_finite);
            if(ref_nan_idx >= 0)
                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
                          << ref[ref_nan_idx] << std::endl;

-            auto target_nan_idx = find_idx(target, not_finite);
+            auto target_nan_idx = find_idx(target, verify::not_finite);
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;

--- a/test/gpu/codegen_literal.cpp
+++ b/test/gpu/codegen_literal.cpp
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
    migraphx::target gpu_t = migraphx::make_target("gpu");
    run_prog(p, gpu_t, m, gpu_result);

-    EXPECT(migraphx::verify_range(ref_result, gpu_result));
+    EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
 }

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/manage_host_buffer.cpp
+++ b/test/gpu/manage_host_buffer.cpp
@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
    auto result = p.eval(pp).back();
    std::vector<float> results_vector(ss.elements(), -1);
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(c_vec, results_vector));
+    EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
 }

 TEST_CASE(arguments_lifetime)

--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
    std::vector<int8_t> val_final;
    ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });

-    EXPECT(migraphx::verify_range(val_orig, val_final));
+    EXPECT(migraphx::verify::verify_range(val_orig, val_final));
 }

 TEST_CASE(int8_quantization)
@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
        // the regular pipeline uses the rewrite_quantization in the much
        // earlier stage.
        if(migraphx::gpu::mlir_enabled())
-            EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
+            EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
        else
-            EXPECT(migraphx::verify_range(ref_result, gpu_result));
+            EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
    }
 }