Merge remote-tracking branch 'origin/develop' into rewrite-fast-gelu

b878f78f · turneram · 3b414cc2 · 55cb7d3a · b878f78f · b878f78f
Commit b878f78f authored Aug 12, 2022 by turneram
20 changed files
--- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp
@@ -33,6 +33,8 @@ namespace gpu {
 std::string get_device_name();
+int get_device_id();
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
 #define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
+#include <migraphx/argument.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/config.hpp>
 #include <utility>

--- a/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
 #define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
+#include <migraphx/argument.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/config.hpp>
 #include <utility>

--- a/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_GPU_PERFDB_HPP
+#define MIGRAPHX_GUARD_GPU_PERFDB_HPP
+#include <migraphx/config.hpp>
+#include <migraphx/shape.hpp>
+#include <migraphx/operation.hpp>
+#include <string>
+#include <vector>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct problem_params
+{
+    operation op;
+    std::vector<shape> inputs;
+    shape output;
+};
+std::string get_mlir_perf_for_conv(const problem_params& pp);
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_PERFDB_HPP
--- a/src/targets/gpu/jit/pointwise.cpp
+++ b/src/targets/gpu/jit/pointwise.cpp
@@ -138,6 +138,8 @@ struct pointwise_compiler : compiler<pointwise_compiler>
            g.add_point_op("less", "migraphx::abs(${0} < ${1})");
            g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
            g.add_point_op("not", "migraphx::abs(not ${0})");
+            g.add_point_op("mod", "migraphx::mod(${0}, ${1})");
+            g.add_point_op("fmod", "migraphx::fmod(${0}, ${1})");
            // Add explict conversions
            g.fresult([](const shape& s) {
                return "migraphx::convert<" + shape::cpp_type(s.type()) + ">";

--- a/src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
@@ -27,6 +27,7 @@
 #include <migraphx/kernels/hip.hpp>
 #include <migraphx/kernels/types.hpp>
 #include <migraphx/kernels/integral_constant.hpp>
+#include <migraphx/kernels/type_traits.hpp>
 namespace migraphx {
@@ -53,29 +54,51 @@ struct index
        return blockDim.x; // NOLINT
    }
 #endif
+    template <class N, class Stride>
+    static constexpr auto max_stride_iterations(N n, Stride stride)
+    {
+        return (n - _c<1>) / stride + _c<1>;
+    }
-    template <class F>
+    template <class F, class N, class Stride>
-    __device__ void global_stride(index_int n, F f) const
+    static constexpr void for_stride(index_int start, N n, Stride stride, F f)
    {
-        const auto stride = nglobal();
+        if constexpr(not is_integral<N>{} and not is_integral<Stride>{} and
-        for(index_int i = global; i < n; i += stride)
+                     max_stride_iterations(n, stride) == 1)
        {
-            f(i);
+            if constexpr(stride > n)
+            {
+                if(start < n)
+                    f(start);
+            }
+            else
+            {
+                f(start);
+            }
+        }
+        else
+        {
+            for(index_int i = start; i < n; i += stride)
+            {
+                f(i);
+            }
        }
    }
-    template <class F>
+    template <class F, class N>
-    __device__ void local_stride(index_int n, F f) const
+    __device__ void global_stride(N n, F f) const
    {
-        const auto stride = nlocal();
+        for_stride(global, n, nglobal(), f);
-        for(index_int i = local; i < n; i += stride)
+    }
-        {
-            f(i);
+    template <class F, class N>
-        }
+    __device__ void local_stride(N n, F f) const
+    {
+        for_stride(local, n, nlocal(), f);
    }
 };
-inline __device__ index make_index()
+inline __device__ __attribute__((const)) index make_index()
 {
    return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
 }

--- a/src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
 {
    return make_transform([=](auto f, auto... xs) {
        auto invoke = [=](auto... ys) {
-            __syncthreads();
+            if constexpr((Bs or ...))
+                __syncthreads();
            f(ys...);
        };
        join(invoke, preload_copy<Bs>(idx, xs)...);

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -44,9 +44,14 @@
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/device_name.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/perfdb.hpp>
 #include <deque>
 #include <variant>
+#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
+#define MIGRAPHX_MLIR_BARE_POINTER
+#endif
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -145,6 +150,12 @@ std::string mlir_print(F f, T x)
    return ss.str();
 }
+const std::unordered_set<std::string>& get_xdlops_archs()
+{
+    static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
+    return supported_archs;
+}
 struct mlir_program
 {
    mlir_program()
@@ -487,6 +498,17 @@ struct mlir_program
            ops.add_attribute_value(get_operator_value(ins->get_operator()));
            if(ins->name() != "@return")
                ops.add_results({get_shape(ins)});
+            if(ins->name() == "convolution")
+            {
+                pp =
+                    problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
+                std::string tuned = get_tune_params();
+                if(!tuned.empty())
+                    ops.add_attributes({{"perf_config", tuned}});
+                // check if HW supports xdlops
+                if(contains(get_xdlops_archs(), target_name))
+                    ops.add_attributes({{"xdlopsV2", true}});
+            }
            std::vector<MlirValue> inputs;
            transform(
@@ -508,14 +530,7 @@ struct mlir_program
        // 1st pipeline to call
        mlirMIGraphXAddHighLevelPipeline(pm.get());
        // 2nd pipeline to call
-        std::string tname = get_device_name();
+        mlirMIGraphXAddBackendPipeline(pm.get(), target_name.c_str(), "amdgcn-amd-amdhsa", "");
-        // HACK: Since MLIR can't handle the full target name
-        auto hacked_tname = tname.substr(0, tname.find(':'));
-        if(tname.size() != hacked_tname.size())
-            std::cout
-                << "*************** WARNING: MLIR may not compile the correct target features for: "
-                << tname << std::endl;
-        mlirMIGraphXAddBackendPipeline(pm.get(), hacked_tname.c_str(), "amdgcn-amd-amdhsa", "");
        mlirPassManagerRun(pm.get(), mmodule.get());
        code_object_op op{};
@@ -525,6 +540,17 @@ struct mlir_program
        return op;
    }
+    void find_target()
+    {
+        std::string tname = get_device_name();
+        // HACK: Since MLIR can't handle the full target name
+        target_name = trim(split_string(tname, ':').front());
+        if(tname.size() != target_name.size())
+            std::cout
+                << "*************** WARNING: MLIR may not compile the correct target features for: "
+                << tname << std::endl;
+    }
    std::pair<std::size_t, std::size_t> get_launch_params() const
    {
        uint32_t attrs[2];
@@ -545,10 +571,14 @@ struct mlir_program
        MIGRAPHX_THROW("Failed to compile mlir program");
    }
+    std::string get_tune_params() { return get_mlir_perf_for_conv(pp); }
    mlir_context ctx;
    MlirLocation location;
    mlir_module mmodule;
+    problem_params pp;
    std::deque<std::string> strings{};
+    std::string target_name;
 };
 std::string dump_mlir(const module& m)
@@ -565,6 +595,7 @@ code_object_op compile_mlir(const context&, const module& m)
    if(trace)
        std::cout << m << std::endl;
    mlir_program mp;
+    mp.find_target();
    mp.parse(m);
    auto mod_op = mlirModuleGetOperation(mp.mmodule.get());
    if(trace)
@@ -579,9 +610,15 @@ instruction_ref insert_mlir(module& m,
                            code_object_op co,
                            const std::vector<instruction_ref>& inputs)
 {
    std::vector<instruction_ref> refs;
+    std::size_t last = 0;
+#ifdef MIGRAPHX_MLIR_BARE_POINTER
+    refs.reserve(inputs.size());
+    std::copy(inputs.begin(), inputs.end(), std::back_inserter(refs));
+    last = refs.size() - 1;
+#else
    refs.reserve(inputs.size() * 15);
    std::unordered_map<uint64_t, instruction_ref> literal_map{};
    auto get_literal = [&](uint64_t value) {
        auto fi = literal_map.find(value);
@@ -592,7 +629,6 @@ instruction_ref insert_mlir(module& m,
        return lit;
    };
-    std::size_t last = 0;
    for(auto input : inputs)
    {
        const size_t offset = 0;
@@ -616,6 +652,7 @@ instruction_ref insert_mlir(module& m,
                       [&](const auto& lval) { return get_literal(lval); });
        // refs.push_back(get_literal(1)); // G
    }
+#endif
    co.expected_inputs = to_shapes(refs);
    co.output_arg      = last;
    return m.insert_instruction(ins, co, refs);

--- a/src/targets/gpu/perfdb.cpp
+++ b/src/targets/gpu/perfdb.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/perfdb.hpp>
+#include <migraphx/value.hpp>
+#include <migraphx/sqlite.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/permutation.hpp>
+#include <fstream>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace {
+std::string get_layout(const shape& s, std::string labels)
+{
+    auto result = labels;
+    auto p      = find_permutation(s);
+    std::transform(p.begin(), p.end(), result.begin(), [&](auto i) { return labels[i]; });
+    return "'" + result + "'";
+}
+std::string get_type(const shape& s)
+{
+    static const std::unordered_map<shape::type_t, std::string> m = {
+        {shape::float_type, "'FP32'"},
+        {shape::half_type, "'FP16'"},
+        {shape::double_type, "'FP64'"},
+        {shape::int8_type, "'INT8'"},
+        {shape::int32_type, "'INT32'"},
+    };
+    auto it = m.find(s.type());
+    if(it == m.end())
+        return "UNKNOWN";
+    return it->second;
+}
+std::string generate_miopen_config(const problem_params& pp)
+{
+    value v       = pp.op.to_value();
+    auto input    = pp.inputs[0].lens();
+    auto weights  = pp.inputs[1].lens();
+    auto padding  = v["padding"].to_vector<std::size_t>();
+    auto stride   = v["stride"].to_vector<std::size_t>();
+    auto dilation = v["dilation"].to_vector<std::size_t>();
+    if(padding.size() != stride.size())
+        padding.erase(padding.begin() + padding.size() / 2, padding.end());
+    return to_string_range({std::string{" C.in_channels="},       to_string(input[1]),
+                            std::string{" AND C.in_h="},          to_string(input[2]),
+                            std::string{" AND C.in_w="},          to_string(input[3]),
+                            std::string{" AND C.fil_h="},         to_string(weights[2]),
+                            std::string{" AND C.fil_w="},         to_string(weights[3]),
+                            std::string{" AND C.out_channels="},  to_string(weights[0]),
+                            std::string{" AND C.batchsize="},     to_string(input[0]),
+                            std::string{" AND C.pad_h="},         to_string(padding[0]),
+                            std::string{" AND C.pad_w="},         to_string(padding[2]),
+                            std::string{" AND C.dilation_h="},    to_string(dilation[0]),
+                            std::string{" AND C.dilation_w="},    to_string(dilation[1]),
+                            std::string{" AND C.conv_stride_h="}, to_string(stride[0]),
+                            std::string{" AND C.conv_stride_w="}, to_string(stride[1]),
+                            std::string{" AND C.layout="},        get_layout(pp.inputs[0], "NCHW"),
+                            std::string{" AND C.data_type="},     get_type(pp.inputs[0]),
+                            std::string{" AND C.direction="},     std::string{"'F'"}},
+                           " ");
+}
+auto query_miopen_db(const std::string& query)
+{
+    // TODO: Store db as a static variable
+    const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db";
+    // Check if db file exists.
+    std::ifstream dbs(dbpath);
+    if(dbs.is_open())
+    {
+        dbs.close();
+    }
+    else
+    {
+        std::vector<std::unordered_map<std::string, std::string>> empty;
+        return empty;
+    }
+    auto db = sqlite::read(dbpath);
+    return db.execute(query);
+}
+} // namespace
+std::string get_mlir_perf_for_conv(const problem_params& pp)
+{
+    std::string query = "select P.* \
+                             from perf_db P, config C \
+                             where P.config = C.id AND \
+                             P.solver = 'ConvMlirIgemmFwdXdlops' AND \
+                             ${config}";
+    auto results =
+        query_miopen_db(interpolate_string(query, {{"config", generate_miopen_config(pp)}}));
+    if(results.empty())
+        return "";
+    return results.front().at("params");
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -54,6 +54,7 @@
 #include <migraphx/gpu/compile_ops.hpp>
 #include <migraphx/gpu/concat_gpu_opt.hpp>
 #include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device_name.hpp>
 #include <migraphx/gpu/fuse_mlir.hpp>
 #include <migraphx/gpu/fuse_ops.hpp>
 #include <migraphx/gpu/prefuse_ops.hpp>
@@ -165,7 +166,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
 std::string target::name() const { return "gpu"; }
-migraphx::context target::get_context() const { return context{}; }
+migraphx::context target::get_context() const { return context(gpu::get_device_id()); }
 argument target::copy_to(const argument& arg) const { return gpu::to_gpu(arg); }

--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
@@ -51,6 +51,8 @@
 #include <migraphx/register_op.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/tune_axis.hpp>
+#include <migraphx/pad_calc.hpp>
 #include <unordered_map>
 #include <utility>
 #include <iostream>
@@ -231,8 +233,31 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
    {
        return op.normalize_compute_shape(inputs);
    }
    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
+        std::vector<std::size_t> padding;
+        if(op.use_dynamic_same_auto_pad)
+        {
+            auto input_lens = args[0].get_shape().lens();
+            std::vector<std::size_t> img_lens{input_lens.begin() + 2, input_lens.end()};
+            auto weights_lens = args[1].get_shape().lens();
+            std::vector<std::size_t> k_lens{weights_lens.begin() + 2, weights_lens.end()};
+            padding = calc_dyn_auto_pad(img_lens, k_lens, op.stride, op.dilation);
+            std::cout << "[ ";
+            output_shape =
+                compute_padded_shape({args.at(0).get_shape(), args.at(1).get_shape()}, padding);
+        }
+        else
+        {
+            padding = op.padding;
+            if(output_shape.dynamic())
+            {
+                output_shape =
+                    op.normalize_compute_shape({args.at(0).get_shape(), args.at(1).get_shape()});
+            }
+        }
        argument result{output_shape};
        visit_quantize(result, args[0], args[1])([&](auto output, auto input, auto weights) {
            auto in_lens = input.get_shape().lens();
@@ -252,7 +277,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
                {
                    auto d_2 = dim - 2;
                    win_start.push_back(std::ptrdiff_t(idx_o[dim] * op.stride[d_2]) -
-                                        std::ptrdiff_t(op.padding[d_2]));
+                                        std::ptrdiff_t(padding[d_2]));
                }
                const auto group_id = w / (wei_n / op.group);
@@ -289,6 +314,34 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
        });
        return result;
    }
+    private:
+    /*!
+     * Used for dynamic auto padding since padding needs to be computed at evaulation time.
+     * \param inputs two fixed shape inputs [input_tensor, weights]
+     * \param padding from auto_pad calculation
+     */
+    shape compute_padded_shape(const std::vector<shape>& inputs,
+                               const std::vector<std::size_t>& padding) const
+    {
+        const shape& input            = inputs.at(0);
+        const shape& weights          = inputs.at(1);
+        const size_t num_spatial_dims = input.lens().size() - 2;
+        std::vector<size_t> output_lens{input.lens()[0], weights.lens()[0]};
+        // calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
+        for(size_t i = 0; i < num_spatial_dims; i++)
+        {
+            auto padding_factor = padding[i] + padding[i + num_spatial_dims];
+            output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(
+                1,
+                (input.lens()[i + 2] - (1 + op.dilation[i] * (weights.lens()[i + 2] - 1)) +
+                 padding_factor) /
+                        op.stride[i] +
+                    1)));
+        }
+        return inputs[0].with_lens(output_lens);
+    }
 };
 struct ref_im2col

--- a/src/tf/tf_parser.cpp
+++ b/src/tf/tf_parser.cpp
@@ -216,7 +216,7 @@ static std::vector<T> get_data_vals(const google::protobuf::RepeatedField<T>& da
        std::fill(data_vals.begin(), data_vals.end(), data[0]);
    }
    else
-        copy(data.begin(), data.end(), std::back_inserter(data_vals));
+        copy(data.begin(), data.end(), data_vals.begin());
    return data_vals;
 }
@@ -329,33 +329,37 @@ void tf_parser::parse_node(const std::string& name)
        auto&& node = nodes.at(name);
        if(not is_valid_op(node))
            return;
        std::vector<instruction_ref> args;
        for(auto&& input : node.input())
        {
            // control dependencies (signified by ^ before the name) are ignored
            if(contains(input, "^"))
                continue;
-            if(nodes.count(input) > 0)
+            std::string input_name = input;
+            // if input has trailing `:0` index then remove it
+            auto multi_out_idx = input.find(':');
+            if(multi_out_idx != std::string::npos && input.substr(multi_out_idx + 1) == "0")
+            {
+                input_name = input.substr(0, multi_out_idx);
+            }
+            if(nodes.count(input_name) > 0)
            {
-                std::string iname;
                // input was from a node with multiple outputs
-                if(contains(input, ':'))
+                if(contains(input_name, ':'))
                {
-                    iname = input.substr(0, input.find(':'));
+                    input_name = input_name.substr(0, input.find(':'));
                }
                else
                {
-                    iname = get_name(nodes.at(input));
+                    input_name = get_name(nodes.at(input_name));
                }
-                assert(name != iname);
+                assert(name != input_name);
-                this->parse_node(iname);
+                this->parse_node(input_name);
-                args.push_back(instructions.at(input));
+                args.push_back(instructions.at(input_name));
            }
            else
            {
-                args.push_back(instructions.at(input));
+                args.push_back(instructions.at(input_name));
            }
        }
        std::vector<instruction_ref> result;

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -137,6 +137,22 @@ if(MIGRAPHX_ENABLE_GPU)
    endforeach()
 endif()
+if(MIGRAPHX_ENABLE_FPGA)
+    # fpga tests
+    file(GLOB FPGA_TESTS ${CONFIGURE_DEPENDS} fpga/*.cpp)
+    foreach(TEST ${FPGA_TESTS})
+        get_filename_component(BASE_NAME ${TEST} NAME_WE)
+        add_test_executable(test_fpga_${BASE_NAME} ${TEST})
+        rocm_clang_tidy_check(test_fpga_${BASE_NAME})
+        set_tests_properties(test_fpga_${BASE_NAME} PROPERTIES 
+            COST 10 
+            RESOURCE_LOCK fpga
+        )
+        target_link_libraries(test_fpga_${BASE_NAME} migraphx_fpga)
+    endforeach()
+endif()
 # Onnx test
 set(TEST_ONNX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
 file (GLOB ONNX_TESTS ${TEST_ONNX_DIR}/*.cpp)

--- a/test/api/CMakeLists.txt
+++ b/test/api/CMakeLists.txt
@@ -32,6 +32,17 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
    add_dependencies(check ${NAME})
 endfunction()
+# Workaround: C file dont work with clang-tidy right now, need a fix in rocm-cmake
+function(add_c_api_test TEST_NAME TEST_SRC TEST_DIR)
+    set(NAME test_api_${TEST_NAME})
+    add_executable(${NAME} EXCLUDE_FROM_ALL ${TEST_SRC})
+    target_link_libraries(${NAME} migraphx_c migraphx)
+    target_include_directories(${NAME} PUBLIC ../include)
+    add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR}) 
+    add_dependencies(tests ${NAME})
+    add_dependencies(check ${NAME})
+endfunction()
 add_api_test(array_base test_array_base.cpp ${TEST_ONNX_DIR})
 add_api_test(assign test_assign.cpp ${TEST_ONNX_DIR})
 add_api_test(compile_options test_compile_options.cpp ${TEST_ONNX_DIR})
@@ -40,6 +51,7 @@ add_api_test(module_construct test_module_construct.cpp ${TEST_ONNX_DIR})
 add_api_test(ref test_cpu.cpp ${TEST_ONNX_DIR})
 add_api_test(save_load test_save_load.cpp ${TEST_ONNX_DIR})
 add_api_test(op test_op_construct.cpp ${TEST_ONNX_DIR})
+add_c_api_test(c_op test_c_op_construct.c ${TEST_ONNX_DIR})
 add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR})
 add_api_test(tf_parser test_tf_parser.cpp ${TEST_TF_DIR})
 # GPU-based tests

--- a/test/api/test_c_op_construct.c
+++ b/test/api/test_c_op_construct.c
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/migraphx.h>
+#include <string.h>
+void expect_equal(const char* x, const char* y)
+{
+    if(strcmp(x, y) != 0)
+        abort();
+}
+int main()
+{
+    char name[1024];
+    migraphx_operation_t op;
+    migraphx_operation_create(&op, "add", 0);
+    migraphx_operation_name(name, 1024, op);
+    migraphx_operation_destroy(op);
+    expect_equal(name, "add");
+}
--- a/test/api/test_custom_op.cpp
+++ b/test/api/test_custom_op.cpp
@@ -23,8 +23,10 @@
 */
 #include <algorithm>
 #include <cmath>
+#include <exception>
 #include <migraphx/migraphx.h>
 #include <migraphx/migraphx.hpp>
+#include <stdexcept>
 #include "test.hpp"
 struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
@@ -43,10 +45,22 @@ struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
    virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
    {
-        CHECK(inputs.size() == 2);
+        if(inputs.size() != 2)
-        CHECK(inputs[0].lengths().size() == 1);
+        {
-        CHECK(inputs[0].type() == migraphx_shape_float_type);
+            throw std::runtime_error("op must have two inputs");
-        CHECK(bool{inputs[0] == inputs[1]});
+        }
+        if(inputs[0].lengths().size() != 1)
+        {
+            throw std::runtime_error("input arg must be a vector or scalar");
+        }
+        if(inputs[0].type() != migraphx_shape_float_type)
+        {
+            throw std::runtime_error("input arg must be of type float");
+        }
+        if(inputs[0] != inputs[1])
+        {
+            throw std::runtime_error("input arg and buffer allocation must be of same shape");
+        }
        return inputs.back();
    }
 };
@@ -83,4 +97,18 @@ TEST_CASE(run_sigmoid_custom_op)
    EXPECT(bool{result == migraphx::argument(s, expected_result.data())});
 }
+extern "C" void migraphx_test_private_disable_exception_catch(bool b);
+TEST_CASE(run_sigmoid_with_incorrect_shape)
+{
+    migraphx::program p;
+    migraphx::shape s{migraphx_shape_float_type, {12}};
+    migraphx::module m = p.get_main_module();
+    auto x             = m.add_parameter("x", s);
+    migraphx_test_private_disable_exception_catch(true);
+    EXPECT(test::throws<std::exception>(
+        [&] { m.add_instruction(migraphx::operation("sigmoid_custom_op"), {x}); },
+        "Error in compute_shape of: sigmoid_custom_op: op must have two inputs"));
+}
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/api/test_custom_op_gpu.cpp
+++ b/test/api/test_custom_op_gpu.cpp
@@ -24,6 +24,7 @@
 #include <hip/hip_runtime_api.h>
 #include <migraphx/migraphx.h>
 #include <migraphx/migraphx.hpp>
+#include <stdexcept>
 #include "test.hpp"
 #define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
@@ -54,6 +55,14 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
    virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
    {
+        if(!inputs[0].standard())
+        {
+            throw std::runtime_error("first arg must be standard shaped");
+        }
+        if(inputs.size() != 2)
+        {
+            throw std::runtime_error("number of inputs must be 2");
+        }
        return inputs.back();
    }
 };
@@ -64,12 +73,17 @@ TEST_CASE(run_simple_custom_op)
    migraphx::register_experimental_custom_op(simple_op);
    migraphx::program p;
    migraphx::shape s{migraphx_shape_int32_type, {4, 3}};
+    migraphx::shape trans_shape{migraphx_shape_int32_type, {3, 4}};
    migraphx::module m = p.get_main_module();
    auto x             = m.add_parameter("x", s);
    auto neg           = m.add_instruction(migraphx::operation("neg"), x);
-    auto alloc         = m.add_allocation(s);
+    auto alloc         = m.add_allocation(trans_shape);
-    auto custom_kernel = m.add_instruction(migraphx::operation("simple_custom_op"), {neg, alloc});
+    auto neg_trans =
-    auto relu          = m.add_instruction(migraphx::operation("relu"), custom_kernel);
+        m.add_instruction(migraphx::operation("transpose", "{permutation: [1, 0]}"), {neg});
+    auto neg_cont = m.add_instruction(migraphx::operation("contiguous"), {neg_trans});
+    auto custom_kernel =
+        m.add_instruction(migraphx::operation("simple_custom_op"), {neg_cont, alloc});
+    auto relu = m.add_instruction(migraphx::operation("relu"), custom_kernel);
    m.add_return({relu});
    migraphx::compile_options options;
    options.set_offload_copy();
@@ -82,7 +96,7 @@ TEST_CASE(run_simple_custom_op)
    auto result_vec = result.as_vector<int>();
    std::vector<int> expected_result(12, 0);
    std::fill(expected_result.begin() + 6, expected_result.end(), 3);
-    EXPECT(bool{result == migraphx::argument(s, expected_result.data())});
+    EXPECT(bool{result == migraphx::argument(trans_shape, expected_result.data())});
 }
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/check_shapes_test.cpp
+++ b/test/check_shapes_test.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "test.hpp"
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/make_op.hpp>
+/*!
+ * Tests for check_shapes object handling dynamic shapes
+ */
+using migraphx::shape;
+bool create_shapes(bool dynamic_allowed)
+{
+    try
+    {
+        shape a{shape::int64_type, {3}};
+        shape b{shape::float_type, {{3, 6, 0}, {4, 4, 0}}};
+        auto op = migraphx::make_op("add");
+        migraphx::check_shapes{{a, b}, op, dynamic_allowed}.has(2);
+        return true;
+    }
+    catch(...)
+    {
+        return false;
+    }
+}
+TEST_CASE(allow_dynamic_shape) { EXPECT(create_shapes(true)); }
+TEST_CASE(fail_dynamic_shape) { EXPECT(!create_shapes(false)); }
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fpga/test_compile.cpp
+++ b/test/fpga/test_compile.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "test.hpp"
+#include <migraphx/make_op.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/fpga/target.hpp>
+#include <migraphx/target_assignments.hpp>
+migraphx::program create_program()
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::float_type, {3}};
+    auto x     = mm->add_parameter("x", s);
+    auto y     = mm->add_parameter("y", s);
+    auto z     = mm->add_parameter("z", s);
+    auto sum   = mm->add_instruction(migraphx::make_op("add"), x, y);
+    auto sum_2 = mm->add_instruction(migraphx::make_op("add"), sum, z);
+    mm->add_return({sum_2});
+    return p;
+}
+TEST_CASE(compile)
+{
+    auto p = create_program();
+    auto t = migraphx::make_target("fpga");
+    p.compile(t);
+    EXPECT(p.is_compiled());
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/manage_host_buffer.cpp
+++ b/test/gpu/manage_host_buffer.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <iostream>
+#include <vector>
+#include <hip/hip_runtime_api.h>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/verify.hpp>
+#include <test.hpp>
+#include <basic_ops.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/make_op.hpp>
+#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
+TEST_CASE(host_same_buffer_copy)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape ss{migraphx::shape::float_type, {4, 2}};
+    auto a           = mm->add_parameter("a", ss);
+    auto b           = mm->add_parameter("b", ss);
+    auto aa          = mm->add_instruction(migraphx::make_op("add"), a, a);
+    auto gpu_out     = mm->add_instruction(migraphx::make_op("hip::copy_from_gpu"), aa);
+    auto stream_sync = mm->add_instruction(migraphx::make_op("hip::sync_stream"), gpu_out);
+    auto pass        = mm->add_instruction(unary_pass_op{}, stream_sync);
+    auto alloc       = mm->add_instruction(
+        migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ss)}}));
+    auto gpu_in = mm->add_instruction(migraphx::make_op("hip::copy_to_gpu"), pass, alloc);
+    auto aab    = mm->add_instruction(migraphx::make_op("add"), gpu_in, b);
+    mm->add_return({aab});
+    migraphx::parameter_map pp;
+    std::vector<float> a_vec(ss.elements(), -1);
+    std::vector<float> b_vec(ss.elements(), 2);
+    std::vector<float> c_vec(ss.elements(), 0);
+    pp["a"] = migraphx::argument(ss, a_vec.data());
+    pp["b"] = migraphx::argument(ss, b_vec.data());
+    std::vector<float> gpu_result;
+    migraphx::target gpu_t = migraphx::gpu::target{};
+    migraphx::compile_options options;
+    options.offload_copy = true;
+    p.compile(gpu_t, options);
+    auto result = p.eval(pp).back();
+    std::vector<float> results_vector(ss.elements(), -1);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(c_vec, results_vector));
+}
+TEST_CASE(arguments_lifetime)
+{
+    auto use_on_gpu = [](const migraphx::argument& arg, int c) {
+        auto* arg_ptr = arg.data();
+        MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
+        MIGRAPHX_HIP_ASSERT(hipMemset(arg_ptr, c, arg.get_shape().bytes()));
+        MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
+        return;
+    };
+    auto f = [use_on_gpu](const migraphx::argument& input) {
+        auto a = migraphx::gpu::register_on_gpu(input);
+        auto s = a.get_shape();
+        {
+            auto b = migraphx::gpu::register_on_gpu(input);
+            use_on_gpu(b, 0);
+            std::vector<float> expected_b(s.elements(), 0);
+            auto gold = migraphx::argument(s, expected_b.data());
+        }
+        use_on_gpu(a, 1);
+        return true;
+    };
+    migraphx::shape ss{migraphx::shape::float_type, {4, 2}};
+    std::vector<float> x_data(ss.elements(), -1);
+    migraphx::argument x{ss, x_data.data()};
+    EXPECT(f(x));
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }