Merge branch 'develop' into threaded_nms

40fbef9b · Ted Themistokleous · GitHub · d164b151 · aeb9f78c · 40fbef9b
Unverified Commit 40fbef9b authored Aug 05, 2023 by Ted Themistokleous Committed by GitHub Aug 05, 2023
20 changed files
--- a/src/py/include/migraphx/py.hpp
+++ b/src/py/include/migraphx/py.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_PY_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_PY_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/program.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+program load_py(const std::string& filename);
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_PY_HPP
--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -95,6 +95,10 @@ void visit_py(T x, F f)
    {
        f(x.template cast<std::string>());
    }
+    else if(py::isinstance<migraphx::shape::dynamic_dimension>(x))
+    {
+        f(migraphx::to_value(x.template cast<migraphx::shape::dynamic_dimension>()));
+    }
    else
    {
        MIGRAPHX_THROW("VISIT_PY: Unsupported data type!");
@@ -165,7 +169,10 @@ template <class T>
 py::buffer_info to_buffer_info(T& x)
 {
    migraphx::shape s = x.get_shape();
-    auto strides      = s.strides();
+    assert(s.type() != migraphx::shape::tuple_type);
+    if(s.dynamic())
+        MIGRAPHX_THROW("MIGRAPHX PYTHON: dynamic shape argument passed to to_buffer_info");
+    auto strides = s.strides();
    std::transform(
        strides.begin(), strides.end(), strides.begin(), [&](auto i) { return i * s.type_size(); });
    py::buffer_info b;
@@ -177,7 +184,7 @@ py::buffer_info to_buffer_info(T& x)
            b = py::buffer_info(x.data(),
                                as.size(),
                                py::format_descriptor<bool>::format(),
-                                s.lens().size(),
+                                s.ndim(),
                                s.lens(),
                                strides);
        }
@@ -186,7 +193,7 @@ py::buffer_info to_buffer_info(T& x)
            b = py::buffer_info(x.data(),
                                as.size(),
                                py::format_descriptor<decltype(as())>::format(),
-                                s.lens().size(),
+                                s.ndim(),
                                s.lens(),
                                strides);
        }
@@ -239,8 +246,15 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
    py::class_<migraphx::shape> shape_cls(m, "shape");
    shape_cls
        .def(py::init([](py::kwargs kwargs) {
-            auto v    = migraphx::to_value(kwargs);
-            auto t    = migraphx::shape::parse_type(v.get("type", "float"));
+            auto v = migraphx::to_value(kwargs);
+            auto t = migraphx::shape::parse_type(v.get("type", "float"));
+            if(v.contains("dyn_dims"))
+            {
+                auto dyn_dims =
+                    migraphx::from_value<std::vector<migraphx::shape::dynamic_dimension>>(
+                        v.at("dyn_dims"));
+                return migraphx::shape(t, dyn_dims);
+            }
            auto lens = v.get<std::size_t>("lens", {1});
            if(v.contains("strides"))
                return migraphx::shape(t, lens, v.at("strides").to_vector<std::size_t>());
@@ -250,15 +264,18 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        .def("type", &migraphx::shape::type)
        .def("lens", &migraphx::shape::lens)
        .def("strides", &migraphx::shape::strides)
+        .def("ndim", &migraphx::shape::ndim)
        .def("elements", &migraphx::shape::elements)
        .def("bytes", &migraphx::shape::bytes)
        .def("type_string", &migraphx::shape::type_string)
        .def("type_size", &migraphx::shape::type_size)
+        .def("dyn_dims", &migraphx::shape::dyn_dims)
        .def("packed", &migraphx::shape::packed)
        .def("transposed", &migraphx::shape::transposed)
        .def("broadcasted", &migraphx::shape::broadcasted)
        .def("standard", &migraphx::shape::standard)
        .def("scalar", &migraphx::shape::scalar)
+        .def("dynamic", &migraphx::shape::dynamic)
        .def("__eq__", std::equal_to<migraphx::shape>{})
        .def("__ne__", std::not_equal_to<migraphx::shape>{})
        .def("__repr__", [](const migraphx::shape& s) { return migraphx::to_string(s); });
@@ -266,6 +283,15 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
    py::enum_<migraphx::shape::type_t>(shape_cls, "type_t")
        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_PYTHON_GENERATE_SHAPE_ENUM);

+    py::class_<migraphx::shape::dynamic_dimension>(shape_cls, "dynamic_dimension")
+        .def(py::init<>())
+        .def(py::init<std::size_t, std::size_t>())
+        .def(py::init<std::size_t, std::size_t, std::set<std::size_t>>())
+        .def_readwrite("min", &migraphx::shape::dynamic_dimension::min)
+        .def_readwrite("max", &migraphx::shape::dynamic_dimension::max)
+        .def_readwrite("optimals", &migraphx::shape::dynamic_dimension::optimals)
+        .def("is_fixed", &migraphx::shape::dynamic_dimension::is_fixed);
+
    py::class_<migraphx::argument>(m, "argument", py::buffer_protocol())
        .def_buffer([](migraphx::argument& x) -> py::buffer_info { return to_buffer_info(x); })
        .def(py::init([](py::buffer b) {
@@ -287,7 +313,9 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)

    py::class_<migraphx::target>(m, "target");

-    py::class_<migraphx::instruction_ref>(m, "instruction_ref");
+    py::class_<migraphx::instruction_ref>(m, "instruction_ref")
+        .def("shape", [](migraphx::instruction_ref i) { return i->get_shape(); })
+        .def("op", [](migraphx::instruction_ref i) { return i->get_operator(); });

    py::class_<migraphx::module, std::unique_ptr<migraphx::module, py::nodelete>>(m, "module")
        .def("print", [](const migraphx::module& mm) { std::cout << mm << std::endl; })
@@ -438,13 +466,18 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        "parse_onnx",
        [](const std::string& filename,
           unsigned int default_dim_value,
+           migraphx::shape::dynamic_dimension default_dyn_dim_value,
           std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims,
+           std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>
+               map_dyn_input_dims,
           bool skip_unknown_operators,
           bool print_program_on_error,
           int64_t max_loop_iterations) {
            migraphx::onnx_options options;
            options.default_dim_value      = default_dim_value;
+            options.default_dyn_dim_value  = default_dyn_dim_value;
            options.map_input_dims         = map_input_dims;
+            options.map_dyn_input_dims     = map_dyn_input_dims;
            options.skip_unknown_operators = skip_unknown_operators;
            options.print_program_on_error = print_program_on_error;
            options.max_loop_iterations    = max_loop_iterations;
@@ -452,8 +485,11 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        },
        "Parse onnx file",
        py::arg("filename"),
-        py::arg("default_dim_value") = 1,
-        py::arg("map_input_dims")    = std::unordered_map<std::string, std::vector<std::size_t>>(),
+        py::arg("default_dim_value")     = 0,
+        py::arg("default_dyn_dim_value") = migraphx::shape::dynamic_dimension{1, 1},
+        py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<std::size_t>>(),
+        py::arg("map_dyn_input_dims") =
+            std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>(),
        py::arg("skip_unknown_operators") = false,
        py::arg("print_program_on_error") = false,
        py::arg("max_loop_iterations")    = 10);
@@ -462,20 +498,28 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        "parse_onnx_buffer",
        [](const std::string& onnx_buffer,
           unsigned int default_dim_value,
+           migraphx::shape::dynamic_dimension default_dyn_dim_value,
           std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims,
+           std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>
+               map_dyn_input_dims,
           bool skip_unknown_operators,
           bool print_program_on_error) {
            migraphx::onnx_options options;
            options.default_dim_value      = default_dim_value;
+            options.default_dyn_dim_value  = default_dyn_dim_value;
            options.map_input_dims         = map_input_dims;
+            options.map_dyn_input_dims     = map_dyn_input_dims;
            options.skip_unknown_operators = skip_unknown_operators;
            options.print_program_on_error = print_program_on_error;
            return migraphx::parse_onnx_buffer(onnx_buffer, options);
        },
        "Parse onnx file",
        py::arg("filename"),
-        py::arg("default_dim_value") = 1,
-        py::arg("map_input_dims")    = std::unordered_map<std::string, std::vector<std::size_t>>(),
+        py::arg("default_dim_value")     = 0,
+        py::arg("default_dyn_dim_value") = migraphx::shape::dynamic_dimension{1, 1},
+        py::arg("map_input_dims") = std::unordered_map<std::string, std::vector<std::size_t>>(),
+        py::arg("map_dyn_input_dims") =
+            std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>(),
        py::arg("skip_unknown_operators") = false,
        py::arg("print_program_on_error") = false);

@@ -503,6 +547,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        py::arg("format") = "msgpack");

    m.def("get_target", &migraphx::make_target);
+    m.def("create_argument", [](const migraphx::shape& s, const std::vector<double>& values) {
+        if(values.size() != s.elements())
+            MIGRAPHX_THROW("Values and shape elements do not match");
+        migraphx::argument a{s};
+        a.fill(values.begin(), values.end());
+        return a;
+    });
    m.def("generate_argument", &migraphx::generate_argument, py::arg("s"), py::arg("seed") = 0);
    m.def("fill_argument", &migraphx::fill_argument, py::arg("s"), py::arg("value"));
    m.def("quantize_fp16",

--- a/src/py/py.cpp
+++ b/src/py/py.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/config.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/dynamic_loader.hpp>
+#include <migraphx/file_buffer.hpp>
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
+#endif
+// extern "C" is used to disable name mangling, but the function will still be called from C++
+extern "C" program migraphx_load_py(const std::string& filename);
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+const std::string& python_path()
+{
+    static const auto path = dynamic_loader::path(&migraphx_load_py).parent_path().string();
+    return path;
+}
+
+static py::dict run_file(const std::string& file)
+{
+    py::object scope = py::module_::import("__main__").attr("__dict__");
+    std::string buffer;
+    buffer.append("import sys\n");
+    buffer.append("sys.path.insert(0, '" + python_path() + "')\n");
+    buffer.append("import migraphx\n");
+    buffer.append(read_string(file));
+    py::exec(buffer, scope);
+    return scope.cast<py::dict>();
+}
+
+extern "C" program migraphx_load_py(const std::string& filename)
+{
+    py::scoped_interpreter guard{};
+    py::dict vars = run_file(filename);
+    auto it       = std::find_if(vars.begin(), vars.end(), [](const auto& p) {
+        return py::isinstance<migraphx::program>(p.second);
+    });
+    if(it == vars.end())
+        MIGRAPHX_THROW("No program variable found");
+    return it->second.cast<migraphx::program>();
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/py/py_loader.cpp
+++ b/src/py/py_loader.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/py.hpp>
+#include <migraphx/dynamic_loader.hpp>
+#include <migraphx/process.hpp>
+#include <migraphx/ranges.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+static std::vector<fs::path> find_available_python_versions()
+{
+    std::vector<fs::path> result;
+    auto path = dynamic_loader::path(&load_py).parent_path();
+    for(const auto& entry : fs::directory_iterator{path})
+    {
+        auto p = entry.path();
+        if(not fs::is_regular_file(p))
+            continue;
+        if(not contains(p.stem().string(), "migraphx_py_"))
+            continue;
+        result.push_back(p);
+    }
+    std::sort(result.begin(), result.end(), std::greater<>{});
+    return result;
+}
+
+static dynamic_loader load_py_lib()
+{
+    auto libs = find_available_python_versions();
+    for(const auto& lib : libs)
+    {
+        auto result = dynamic_loader::try_load(lib);
+        if(result.has_value())
+            return *result;
+    }
+    MIGRAPHX_THROW("Cant find a viable version of python");
+}
+
+static dynamic_loader py_lib()
+{
+    static dynamic_loader lib = load_py_lib();
+    return lib;
+}
+
+program load_py(const std::string& filename)
+{
+    static auto f = py_lib().get_function<program(const std::string&)>("migraphx_load_py");
+    return f(filename);
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -29,6 +29,7 @@
 #include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/simplify_qdq.hpp>
 #include <migraphx/eliminate_common_subexpression.hpp>
+#include <migraphx/optimize_module.hpp>
 #include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/instruction.hpp>
@@ -48,19 +49,12 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_INT8_QUANTIZATION_PARAMS)

 // This function is to convert any instructions specified in the input
 // from double or float to float16 by inserting a convert operator.
-// For the conversion, there could be cases of overflowing, but it
-// is very rare in the area of deeping learning, so we just do a
-// truncate of the input to get the fp16.
+// For the conversion, there could be cases of overflowing or underflowing, but it
+// is uncommon. Run optimize_module() before converting to fp16 to const eval and fold in FP32 to
+// avoid loss of precision.
 void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
 {
-    run_passes(prog,
-               {quantize_fp16_pass{ins_names},
-                eliminate_common_subexpression{},
-                dead_code_elimination{},
-                simplify_reshapes{},
-                dead_code_elimination{},
-                simplify_qdq{},
-                dead_code_elimination{}});
+    run_passes(prog, {optimize_module{}, quantize_fp16_pass{ins_names}, optimize_module{}});
 }

 void quantize_int8(program& prog,

--- a/src/quantize_fp16.cpp
+++ b/src/quantize_fp16.cpp
@@ -52,14 +52,6 @@ static void quantize_module(module& m, const std::vector<std::string>& ins_names

        auto mod_inputs = ins->module_inputs();
        auto s          = ins->get_shape();
-        // Convert back to original type before quantizing the inputs
-        if(mod_inputs.empty())
-        {
-            auto r = m.insert_instruction(
-                std::next(ins), make_op("convert", {{"target_type", s.type()}}), ins);
-            m.replace_instruction(ins, r);
-        }
-
        // Convert each of the inputs that are floating point to fp16
        auto inputs = ins->inputs();
        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
@@ -70,8 +62,17 @@ static void quantize_module(module& m, const std::vector<std::string>& ins_names
                ins, make_op("convert", {{"target_type", shape::half_type}}), input);
        });

-        // Replace inputs
-        m.replace_instruction(ins, ins->get_operator(), inputs, mod_inputs);
+        // Insert quantized ins
+        auto converted_ins = m.insert_instruction(ins, ins->get_operator(), inputs, mod_inputs);
+
+        // Convert back to original type after quantizing
+        if(mod_inputs.empty())
+        {
+            converted_ins = m.insert_instruction(
+                ins, make_op("convert", {{"target_type", s.type()}}), converted_ins);
+        }
+        // Replace original instruction
+        m.replace_instruction(ins, converted_ins);
    }
 }


--- a/src/replace_allocate.cpp
+++ b/src/replace_allocate.cpp
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+#include <migraphx/pass_manager.hpp>
 #include <migraphx/replace_allocate.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/program.hpp>
@@ -84,10 +85,11 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
    mod.replace_instruction(ins, ins->get_operator(), inputs, mod_args);
 }

-void replace_allocate::apply(module& m) const
+void replace_allocate::apply(module_pass_manager& mpm) const
 {
+    module& m              = mpm.get_module();
    auto mod_output_names  = create_output_names(m);
-    bool main_offload_copy = m.name() == "main" ? this->offload_copy : false;
+    bool root_offload_copy = (*mpm.get_root_module() == m) ? this->offload_copy : false;
    for(auto ins : iterator_for(m))
    {
        auto op      = ins->get_operator();
@@ -104,7 +106,7 @@ void replace_allocate::apply(module& m) const
            continue;

        auto s = ins->get_shape();
-        if(not main_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
+        if(not root_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
        {
            auto out_param = m.add_parameter(mod_output_names[ins], s);
            m.replace_instruction(ins, out_param);

--- a/src/rewrite_quantization.cpp
+++ b/src/rewrite_quantization.cpp
@@ -40,15 +40,18 @@ void apply_quantizelinear(module& m, instruction_ref ins)

    if(x->get_shape().type() != y_scale->get_shape().type())
    {
-        x = m.insert_instruction(ins, make_op("convert", {{"target_type", shape::float_type}}), x);
+        x = m.insert_instruction(
+            ins, make_op("convert", {{"target_type", y_scale->get_shape().type()}}), x);
    }
    auto div            = m.insert_instruction(ins, make_op("div"), x, y_scale);
    auto add_zero_point = m.insert_instruction(ins, make_op("round"), div);

    if(ins->inputs().size() == 3)
    {
-        auto zero_point = m.insert_instruction(
-            ins, make_op("convert", {{"target_type", shape::float_type}}), ins->inputs()[2]);
+        auto zero_point =
+            m.insert_instruction(ins,
+                                 make_op("convert", {{"target_type", y_scale->get_shape().type()}}),
+                                 ins->inputs()[2]);
        add_zero_point = m.insert_instruction(ins, make_op("add"), add_zero_point, zero_point);
    }

@@ -72,14 +75,16 @@ void apply_quantizelinear(module& m, instruction_ref ins)
 void apply_dequantizelinear(module& m, instruction_ref ins)
 {
    assert(ins->name() == "dequantizelinear");
-    auto x = m.insert_instruction(
-        ins, make_op("convert", {{"target_type", shape::float_type}}), ins->inputs()[0]);
    auto x_scale = ins->inputs()[1];
+    auto x       = m.insert_instruction(
+        ins, make_op("convert", {{"target_type", x_scale->get_shape().type()}}), ins->inputs()[0]);

    if(ins->inputs().size() == 3)
    {
-        auto x_zero_point = m.insert_instruction(
-            ins, make_op("convert", {{"target_type", shape::float_type}}), ins->inputs()[2]);
+        auto x_zero_point =
+            m.insert_instruction(ins,
+                                 make_op("convert", {{"target_type", x_scale->get_shape().type()}}),
+                                 ins->inputs()[2]);
        x = m.insert_instruction(ins, make_op("sub"), x, x_zero_point);
    }


--- a/src/shape.cpp
+++ b/src/shape.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -273,9 +273,23 @@ shape shape::from_permutation(type_t t,

 shape::type_t shape::type() const { return impl->m_type; }

-const std::vector<std::size_t>& shape::lens() const { return impl->m_lens; }
+const std::vector<std::size_t>& shape::lens() const
+{
+    if(this->dynamic())
+    {
+        MIGRAPHX_THROW("SHAPE: lens() called on a dynamic shape");
+    }
+    return impl->m_lens;
+}

-const std::vector<std::size_t>& shape::strides() const { return impl->m_strides; }
+const std::vector<std::size_t>& shape::strides() const
+{
+    if(this->dynamic())
+    {
+        MIGRAPHX_THROW("SHAPE: strides() called on a dynamic shape");
+    }
+    return impl->m_strides;
+}

 std::size_t shape::ndim() const
 {
@@ -361,29 +375,26 @@ std::size_t shape::index(std::size_t i) const
    }
 }

-std::vector<std::size_t> shape::multi(std::size_t i) const
+std::vector<std::size_t> shape::multi(std::size_t idx) const
 {
-    assert(this->standard());
-
+    assert(idx < elements());
    std::vector<std::size_t> indices(lens().size());
-    multi_copy(i, indices.data(), indices.data() + lens().size());
-
+    multi_copy(idx, indices.data(), indices.data() + lens().size());
    return indices;
 }

-void shape::multi_copy(std::size_t i, std::size_t* start, const std::size_t* end) const
+void shape::multi_copy(std::size_t idx, std::size_t* start, const std::size_t* end) const
 {
-    assert(this->standard());
+    size_t tidx = idx;
    (void)end;
+    assert(idx < elements());
    assert(lens().size() <= (end - start));
-    std::transform(strides().begin(),
-                   strides().end(),
-                   lens().begin(),
-                   start,
-                   [&](std::size_t stride, std::size_t len) {
-                       assert(len > 0 and stride > 0);
-                       return (i / stride) % len;
-                   });
+    for(size_t ii = lens().size() - 1; ii > 0; ii--)
+    {
+        *(start + ii) = tidx % lens()[ii];
+        tidx          = tidx / lens()[ii];
+    }
+    *start = tidx;
 }

 bool shape::packed() const
@@ -538,7 +549,14 @@ bool shape::any_of_dynamic() const
    });
 }

-const std::vector<shape::dynamic_dimension>& shape::dyn_dims() const { return impl->m_dyn_dims; }
+const std::vector<shape::dynamic_dimension>& shape::dyn_dims() const
+{
+    if(not this->dynamic())
+    {
+        MIGRAPHX_THROW("SHAPE: dyn_dims() called on a static shape");
+    }
+    return impl->m_dyn_dims;
+}

 std::vector<std::size_t> shape::min_lens() const
 {
@@ -682,12 +700,22 @@ const std::vector<shape>& shape::sub_shapes() const { return impl->m_shapes; }
 void migraphx_to_value(value& v, const shape& s)
 {
    value result;
-    result["type"]               = migraphx::to_value(s.type_string());
-    result["lens"]               = migraphx::to_value(s.lens());
-    result["strides"]            = migraphx::to_value(s.strides());
-    result["sub_shapes"]         = migraphx::to_value(s.sub_shapes());
-    result["dynamic_dimensions"] = migraphx::to_value(s.dyn_dims());
-    v                            = result;
+    result["type"]       = migraphx::to_value(s.type_string());
+    result["sub_shapes"] = migraphx::to_value(s.sub_shapes());
+    // avoid calling functions that will throw
+    if(s.dynamic())
+    {
+        result["lens"]               = {};
+        result["strides"]            = {};
+        result["dynamic_dimensions"] = migraphx::to_value(s.dyn_dims());
+    }
+    else
+    {
+        result["lens"]               = migraphx::to_value(s.lens());
+        result["strides"]            = migraphx::to_value(s.strides());
+        result["dynamic_dimensions"] = {};
+    }
+    v = result;
 }

 void migraphx_from_value(const value& v, shape& s)
@@ -709,14 +737,10 @@ void migraphx_from_value(const value& v, shape& s)
        {
            auto v_dd = v.at("dynamic_dimensions");
            std::vector<shape::dynamic_dimension> dyn_dims(v.at("dynamic_dimensions").size());
-            std::transform(v_dd.begin(), v_dd.end(), dyn_dims.begin(), [](migraphx::value x) {
-                auto x_min      = x.at("min").template to<size_t>();
-                auto x_max      = x.at("max").template to<size_t>();
-                auto v_optimals = x.at("optimals");
-                std::set<size_t> set_x_optimals =
-                    from_value<std::set<std::size_t>>(x.at("optimals"));
-                return shape::dynamic_dimension{x_min, x_max, set_x_optimals};
-            });
+            std::transform(
+                v_dd.begin(), v_dd.end(), dyn_dims.begin(), [](const migraphx::value& x) {
+                    return from_value<shape::dynamic_dimension>(x);
+                });

            s = shape{shape::parse_type(t), dyn_dims};
        }

--- a/src/simplify_algebra.cpp
+++ b/src/simplify_algebra.cpp
@@ -204,6 +204,131 @@ struct find_mul_slice_conv
    }
 };

+struct find_mul_dot
+{
+    auto matcher() const
+    {
+        auto is_dot_const_inputs =
+            match::name("dot")(match::any_of[match::inputs()](match::is_constant()));
+        return match::name("mul")(match::either_arg(0, 1)(
+            is_dot_const_inputs.bind("dot"), match::name("broadcast", "multibroadcast").bind("c")));
+    }
+
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins     = r.result;
+        auto dot_ins = r.instructions["dot"];
+        auto a_ins   = dot_ins->inputs()[0];
+        auto b_ins   = dot_ins->inputs()[1];
+        auto c_ins   = r.instructions["c"];
+
+        const auto& c_strides = c_ins->get_shape().strides();
+
+        // There should only be one stride that is not zero
+        if(std::count_if(c_strides.begin(), c_strides.end(), [](auto s) { return s != 0; }) > 1)
+            return;
+
+        auto add_mul_const = [&](instruction_ref x_ins) {
+            if(not x_ins->can_eval())
+                return m.end();
+            auto broadcast_v        = c_ins->get_operator().to_value();
+            broadcast_v["out_lens"] = x_ins->get_shape().lens();
+
+            auto cb_ins =
+                m.insert_instruction(ins, make_op(c_ins->name(), broadcast_v), c_ins->inputs());
+            return m.insert_instruction(ins, make_op("mul"), x_ins, cb_ins);
+        };
+
+        if(c_strides.back() == 1)
+        {
+            b_ins = add_mul_const(b_ins);
+        }
+        else if(c_strides[c_strides.size() - 2] == 1)
+        {
+            a_ins = add_mul_const(a_ins);
+        }
+        else if(c_ins->get_shape().scalar())
+        {
+            if(a_ins->can_eval())
+                a_ins = add_mul_const(a_ins);
+            else
+                b_ins = add_mul_const(b_ins);
+        }
+        else
+        {
+            return;
+        }
+
+        if(contains({a_ins, b_ins}, m.end()))
+            return;
+
+        m.replace_instruction(ins, make_op("dot"), a_ins, b_ins);
+    }
+};
+
+struct find_dot_mul
+{
+    auto matcher() const
+    {
+        auto const_broadcast = match::name("broadcast", "multibroadcast")(match::is_constant());
+        auto mul             = match::name("mul")(
+            match::used_once(),
+            match::either_arg(0, 1)(const_broadcast.bind("d"),
+                                    match::none_of(match::is_constant()).bind("z")));
+        return match::name("dot")(match::either_arg(0, 1)(mul, match::is_constant().bind("c")));
+    }
+
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins   = r.result;
+        auto a_ins = ins->inputs()[0];
+        auto b_ins = ins->inputs()[1];
+        auto d_ins = r.instructions["d"];
+        auto c_ins = r.instructions["c"];
+        auto z_ins = r.instructions["z"];
+
+        const auto& d_strides = d_ins->get_shape().strides();
+
+        // There should only be one stride that is not zero
+        if(std::count_if(d_strides.begin(), d_strides.end(), [](auto s) { return s != 0; }) > 1)
+            return;
+
+        if(not d_ins->get_shape().scalar())
+        {
+            if(d_strides.back() == 1 and not b_ins->can_eval())
+                return;
+            if(d_strides[d_strides.size() - 2] == 1 and not a_ins->can_eval())
+                return;
+        }
+
+        auto broadcast_v = d_ins->get_operator().to_value();
+        auto c_lens      = c_ins->get_shape().lens();
+        std::vector<int64_t> permutation(c_lens.size());
+        std::iota(permutation.begin(), permutation.end(), 0);
+        std::swap(permutation.back(), permutation[permutation.size() - 2]);
+        c_lens                  = reorder_dims(c_lens, permutation);
+        broadcast_v["out_lens"] = c_lens;
+        auto db_ins =
+            m.insert_instruction(ins, make_op(d_ins->name(), broadcast_v), d_ins->inputs());
+        auto db_transpose_ins =
+            m.insert_instruction(ins, make_op("transpose", {{"permutation", permutation}}), db_ins);
+        auto cd_ins = m.insert_instruction(ins, make_op("mul"), c_ins, db_transpose_ins);
+
+        if(c_ins == b_ins)
+        {
+            a_ins = z_ins;
+            b_ins = cd_ins;
+        }
+        else
+        {
+            a_ins = cd_ins;
+            b_ins = z_ins;
+        }
+
+        m.replace_instruction(ins, make_op("dot"), a_ins, b_ins);
+    }
+};
+
 // ******************************
 //  a * (x + b) => a * x + a * b
 // ******************************
@@ -361,30 +486,123 @@ struct find_inner_broadcast
 {
    auto matcher() const { return pointwise(match::all_of[match::inputs()](match::broadcast())); }

+    static auto non_scalar_op(const std::string& name)
+    {
+        return [=](instruction_ref ins) {
+            if(ins->get_shape().scalar())
+                return false;
+            return ins->name() == name;
+        };
+    }
+
    void apply(module& m, const match::matcher_result& r) const
    {
        auto ins        = r.result;
        auto broadcasts = ins->inputs();
        if(broadcasts.empty())
            return;
+        // Skip if different data types are used
+        if(any_of(broadcasts, [&](auto i) {
+               return i->get_shape().type() != broadcasts.front()->get_shape().type();
+           }))
+            return;
+        bool mixed_broadcasts = any_of(broadcasts, non_scalar_op("broadcast")) and
+                                any_of(broadcasts, non_scalar_op("multibroadcast"));
+        // If the broadcast is not a single dimension, then dont perform inner_broadcast
+        if(mixed_broadcasts and any_of(broadcasts, [&](instruction_ref i) {
+               if(i->get_shape().scalar())
+                   return false;
+               if(i->name() == "multibroadcast")
+                   return false;
+               auto input       = i->inputs().at(0);
+               const auto& lens = input->get_shape().lens();
+               return std::count_if(lens.begin(), lens.end(), [&](std::size_t d) {
+                          return d == 1;
+                      }) < (lens.size() - 1);
+           }))
+            return;
        std::vector<instruction_ref> inputs;
        std::transform(broadcasts.begin(),
                       broadcasts.end(),
                       std::back_inserter(inputs),
-                       [](auto i) { return i->inputs().front(); });
-        if(std::any_of(inputs.begin(), inputs.end(), [&](auto i) {
-               return i->get_shape() != inputs.front()->get_shape() and
-                      i->get_shape().elements() != 1;
-           }))
-            return;
-
-        auto b_it = std::find_if(broadcasts.begin(), broadcasts.end(), [&](auto i) {
-            return not i->get_shape().scalar();
-        });
-        if(b_it == broadcasts.end())
-            b_it = broadcasts.begin();
+                       [&](instruction_ref i) {
+                           auto input = i->inputs().front();
+                           if(mixed_broadcasts and not i->get_shape().scalar() and
+                              i->get_shape().lens().size() > 1)
+                               return m.insert_instruction(i, make_op("squeeze"), input);
+                           return input;
+                       });
+
+        std::sort(broadcasts.begin(), broadcasts.end(), by(std::less<>{}, [](instruction_ref i) {
+                      if(i->get_shape().scalar())
+                          return 2;
+                      else if(i->name() == "broadcast")
+                          return 0;
+                      if(i->name() == "multibroadcast")
+                          return 1;
+                      return 3;
+                  }));
        auto op = insert_common_op(m, ins, ins->get_operator(), inputs);
-        m.replace_instruction(ins, (*b_it)->get_operator(), op);
+        m.replace_instruction(ins, broadcasts.front()->get_operator(), op);
+    }
+};
+
+struct find_dot_broadcast
+{
+    auto matcher() const
+    {
+        return match::name("dot")(match::all_of[match::inputs()](match::broadcast()));
+    }
+
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins = r.result;
+        auto a   = ins->inputs()[0];
+        auto b   = ins->inputs()[1];
+        if(a->get_operator().name() != b->get_operator().name())
+            return;
+        if(ins->get_shape().lens().size() < 3)
+            return;
+        auto nbatch_axes      = ins->get_shape().lens().size() - 2;
+        const auto& a_strides = a->get_shape().strides();
+        const auto& b_strides = b->get_shape().strides();
+        // Find leading batch axes that are broadcasted
+        auto p =
+            std::mismatch(a_strides.begin(),
+                          a_strides.begin() + nbatch_axes,
+                          b_strides.begin(),
+                          b_strides.begin() + nbatch_axes,
+                          [](auto astride, auto bstride) { return astride == 0 and bstride == 0; });
+        auto naxes = p.first - a_strides.begin();
+        assert(naxes <= nbatch_axes);
+        std::vector<std::size_t> axes(naxes);
+        std::iota(axes.begin(), axes.end(), 0);
+
+        auto insert_broadcast = [&](instruction_ref b_ins) -> instruction_ref {
+            auto input = b_ins->inputs()[0];
+            std::vector<std::size_t> lens(b_ins->get_shape().lens().begin() + naxes,
+                                          b_ins->get_shape().lens().end());
+            if(b_ins->name() == "multibroadcast")
+            {
+                return m.insert_instruction(
+                    ins, make_op("multibroadcast", {{"out_lens", lens}}), input);
+            }
+            else if(b_ins->name() == "broadcast")
+            {
+                auto v    = b_ins->get_operator().to_value();
+                auto axis = v.at("axis").to<std::size_t>() - naxes;
+                return m.insert_instruction(
+                    ins, make_op("broadcast", {{"axis", axis}, {"out_lens", lens}}), input);
+            }
+            assert(false);
+            return m.end();
+        };
+        auto a1        = insert_broadcast(a);
+        auto b1        = insert_broadcast(b);
+        auto dot       = m.insert_instruction(ins, make_op("dot"), a1, b1);
+        auto broadcast = m.insert_instruction(
+            ins, make_op("multibroadcast", {{"out_lens", ins->get_shape().lens()}}), dot);
+        m.replace_instruction(ins, broadcast);
    }
 };

@@ -393,7 +611,8 @@ struct find_concat_op
    auto matcher() const
    {
        return match::name("concat")(match::any_of[match::inputs()](
-            match::any_of(match::pointwise(), match::name("broadcast")), match::used_once()));
+            match::any_of(match::pointwise(), match::name("broadcast", "multibroadcast")),
+            match::used_once()));
    }

    template <class Iterator>
@@ -412,7 +631,8 @@ struct find_concat_op

    static bool is_valid_op(const operation& op)
    {
-        return op.name() == "broadcast" or op.attributes().contains("pointwise");
+        return contains({"broadcast", "multibroadcast"}, op.name()) or
+               op.attributes().contains("pointwise");
    }

    void apply(module& m, const match::matcher_result& r) const
@@ -440,6 +660,16 @@ struct find_concat_op
                op               = b;
                iaxis            = 0;
            }
+            else if(op.name() == "multibroadcast")
+            {
+                shape bshape = (*start)->get_shape();
+                auto input   = (*start)->inputs()[0];
+                if(iaxis >= bshape.strides().size() or bshape.strides()[iaxis] == 0)
+                    return {start, last};
+                op.from_value({{"out_lens", get_output_lens(start, last, iaxis)}});
+                auto delta = bshape.lens().size() - input->get_shape().lens().size();
+                iaxis -= delta;
+            }

            std::vector<instruction_ref> concats;
            for(std::size_t i = 0; i < x->inputs().size(); i++)
@@ -1260,12 +1490,15 @@ void simplify_algebra::apply(module& m) const
    {
        match::find_matches(m,
                            find_inner_broadcast{},
+                            find_dot_broadcast{},
                            find_double_add_lit_broadcast{},
                            find_add_lit_broadcast{},
                            find_add_convs{},
                            find_conv_dot_horiz_fusion{},
                            find_mul_conv{},
                            find_mul_slice_conv{},
+                            find_mul_dot{},
+                            find_dot_mul{},
                            find_mul_add{},
                            find_unit_ops{},
                            find_neg_unit_ops{},

--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
@@ -89,38 +89,23 @@ struct find_reshaper
 {
    auto matcher() const
    {
-        return match::name(reshaper_names())(
-            match::any_of[match::outputs()](match::name(reshaper_names())));
+        auto reshaper          = match::name(reshaper_names());
+        auto contiguous        = match::name("contiguous");
+        auto no_output_reshape = match::none_of[match::outputs()](reshaper);
+        auto input_reshape     = match::arg(0)(match::skip(contiguous)(reshaper));
+        auto input             = match::skip(reshaper, contiguous)(match::any().bind("x"));
+        return reshaper(no_output_reshape, input_reshape, input);
    }

    void apply(module& m, const match::matcher_result& mr) const
    {
-        auto ins = mr.result;
-        std::vector<instruction_ref> reshapes{ins};
-        while(is_reshaper(reshapes.back()))
-        {
-            assert(not reshapes.back()->inputs().empty());
-            assert(m.has_instruction(reshapes.back()->inputs().front()));
-            auto input = reshapes.back()->inputs().front();
-            reshapes.push_back(input);
-        }
+        auto ins   = mr.result;
+        auto input = mr.instructions["x"];
+        auto dims  = ins->get_shape().lens();

-        std::pair<instruction_ref, instruction_ref> r{m.end(), m.end()};
-        for(auto start : iterator_for(reshapes))
-        {
-            auto last = std::find_if(reshapes.rbegin(), reshapes.rend(), [&](auto&& i) {
-                return i->get_shape() == (*start)->get_shape() and i != (*start);
-            });
-            if(last != reshapes.rend())
-            {
-                r = std::make_pair(*start, *last);
-                break;
-            }
-        }
-        if(r.first != r.second)
-        {
-            m.replace_instruction(r.first, r.second);
-        }
+        if(not input->get_shape().standard())
+            input = m.insert_instruction(ins, make_op("contiguous"), input);
+        m.replace_instruction(ins, make_op("reshape", {{"dims", dims}}), input);
    }
 };

@@ -804,9 +789,9 @@ void simplify_reshapes::apply(module& m) const
        match::find_matches(m,
                            find_where_op{},
                            find_resize{},
-                            find_reshape_cont{},
                            find_nop_reshapes{},
                            find_reshaper{},
+                            find_reshape_cont{},
                            find_transpose{},
                            find_concat_transpose{},
                            find_concat_multibroadcasts{},

--- a/src/split_single_dyn_dim.cpp
+++ b/src/split_single_dyn_dim.cpp
@@ -100,10 +100,10 @@ struct find_static_2in_broadcasts
 } // namespace

 /**
- * Makes all the shapes in the dynamic_dimension range.
- * Probably won't work for `if` and `loop` instructions, depending on how the submodules for those
+ * Makes all the shapes in the dynamic_dimension range.  Probably won't work for `if`
+ * and `loop` instructions, depending on how the submodules for those
 * work. Inserts select_module instruction to the top. Replaces return, bypassing other
- * instructions.
+ * instructions. Skips if the dynamic parameter outputs to a select_module operator.
 */
 void split_single_dyn_dim::apply(module_pass_manager& mpm) const
 {
@@ -111,7 +111,13 @@ void split_single_dyn_dim::apply(module_pass_manager& mpm) const
    auto param_names                            = mm->get_parameter_names();
    auto param_shapes                           = mm->get_parameter_shapes();
    optional<dynamic_dimensions_check> dd_check = has_one_dyn_dim(param_shapes);
-    if(dd_check.has_value())
+    auto any_sm_next                            = [&](auto ddc) {
+        auto p_outputs = mm->get_parameter(ddc->dyn_param_str)->outputs();
+        return std::any_of(p_outputs.cbegin(), p_outputs.cend(), [](auto ins) {
+            return ins->name() == "select_module";
+        });
+    };
+    if(dd_check.has_value() and not any_sm_next(dd_check))
    {
        const auto& dyn_param = mm->get_parameter(dd_check->dyn_param_str);
        auto dyn_param_shape  = mm->get_parameter_shape(dd_check->dyn_param_str);

--- a/src/target.cpp
+++ b/src/target.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/target.hpp>
+#include <migraphx/register_target.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+void migraphx_to_value(value& v, const target& t) { v["name"] = t.name(); }
+void migraphx_from_value(const value& v, target& t)
+{
+    t = make_target(v.at("name").to<std::string>());
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/cpu/CMakeLists.txt
+++ b/src/targets/cpu/CMakeLists.txt
@@ -78,6 +78,8 @@ else()
 endif()
 target_link_libraries(migraphx_cpu PRIVATE migraphx)

+migraphx_generate_export_header(migraphx_cpu)
+
 find_package(OpenMP)
 target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX)
 # Add library path to rpath to workaround issues with our broken packages
@@ -88,8 +90,6 @@ foreach(LIBRARY ${OpenMP_CXX_LIBRARIES})
    endif()
 endforeach()

-target_link_libraries(migraphx_all_targets INTERFACE migraphx_cpu)
-
 rocm_install_targets(
  TARGETS migraphx_cpu
  INCLUDE

--- a/src/targets/cpu/deconvolution.cpp
+++ b/src/targets/cpu/deconvolution.cpp
@@ -23,14 +23,14 @@
 */
 #include <migraphx/config.hpp>
 #include <migraphx/cpu/dnnl.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

 struct dnnl_deconvolution
-    : dnnl_extend_op<dnnl_deconvolution, dnnl::deconvolution_forward, op::deconvolution>
+    : dnnl_extend_op<dnnl_deconvolution, dnnl::deconvolution_forward, op::convolution_backwards>
 {
    std::vector<int> arg_map(int) const
    {

--- a/src/targets/cpu/include/migraphx/cpu/context.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/context.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/cpu/dnnl.hpp>
 #include <migraphx/cpu/parallel.hpp>
 #include <migraphx/par_for.hpp>
+#include <migraphx/cpu/export.h>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/cpu/include/migraphx/cpu/lowering.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/lowering.hpp
@@ -24,8 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP

-#include <string>
-#include <migraphx/config.hpp>
+#include <migraphx/cpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -34,7 +33,7 @@ struct module;

 namespace cpu {

-struct lowering
+struct MIGRAPHX_CPU_EXPORT lowering
 {
    std::string name() const { return "cpu::lowering"; }
    void apply(module& m) const;

--- a/src/targets/cpu/include/migraphx/cpu/target.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/target.hpp
@@ -28,14 +28,13 @@
 #include <migraphx/register_target.hpp>
 #include <migraphx/compile_options.hpp>
 #include <migraphx/cpu/context.hpp>
-#include <migraphx/config.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 struct pass;
 namespace cpu {

-struct target
+struct MIGRAPHX_CPU_EXPORT target
 {
    std::string name() const;
    std::vector<pass> get_passes(migraphx::context& gctx, const compile_options&) const;

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -27,7 +27,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/op/identity.hpp>
 #include <migraphx/op/convolution.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/quant_dot.hpp>
@@ -345,7 +345,7 @@ struct cpu_apply
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
 #ifndef MIGRAPHX_ENABLE_ZENDNN
-        extend_op("deconvolution", "dnnl::deconvolution");
+        extend_op("convolution_backwards", "dnnl::convolution_backwards");
        extend_op("dot", "dnnl::dot");
 #endif
        extend_op("erf", "cpu::erf");

--- a/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
@@ -41,7 +41,7 @@ class x_model
    void set_shape(migraphx::shape);
 };

-x_model create_xmodel(migraphx::module_ref mod);
+x_model create_xmodel(migraphx::const_module_ref mod);

 migraphx::argument execute(const x_model& xmodel,
                           const migraphx::shape& output_shape,