Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_dim_onnx_parser

5f215b71 · charlie · 42601741 · adbafc06 · 5f215b71 · 5f215b71
Commit 5f215b71 authored Jul 08, 2022 by charlie
20 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR

 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh

-RUN PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@02078ce236ad90e3aec04c0c770ef5bfc99e49c2
+RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@26a4b3cfc0a1a15181490f24ae461608fef1b04e -DBUILD_MIXR_TARGET=On

 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/examples/migraphx/custom_op_hip_kernel/CMakeLists.txt
+++ b/examples/migraphx/custom_op_hip_kernel/CMakeLists.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+cmake_minimum_required(VERSION 3.5)
+project (custom_hip_kernel)
+
+set (CMAKE_CXX_STANDARD 14)
+set (EXAMPLE custom_op_hip_kernel)
+
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+find_package (migraphx REQUIRED)
+find_package (hip REQUIRED)
+
+message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
+add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
+
+target_link_libraries(${EXAMPLE} migraphx::c hip::device)
--- a/examples/migraphx/custom_op_hip_kernel/README.md
+++ b/examples/migraphx/custom_op_hip_kernel/README.md
+# Custom Kernel using MIGraphX API. 
+This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build  and run MIGraphX program on GPU. 
+
+Kernels can be written in either HIP, MIOpen, or by using RocBLAS library. This particular example uses **HIP**.
+
+ To build the example, ensure ROCm is installed at `/opt/rocm`. 
+ 1.  `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
+ 2.  `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_hip_kernel/`
+ 3.  `mkdir build && cd build`
+ 4.  `CXX=/opt/rocm/llvm/bin/clang++ cmake ..  && make`
+ 5.  `./custom_op_hip_kernel`
\ No newline at end of file
--- a/examples/migraphx/custom_op_hip_kernel/custom_op_hip_kernel.cpp
+++ b/examples/migraphx/custom_op_hip_kernel/custom_op_hip_kernel.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <algorithm>
+#include <hip/hip_runtime.h>
+#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
+#include <numeric>
+
+#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
+/*
+ * Square each element in the array A and write to array C.
+ */
+template <typename T>
+__global__ void vector_square(T* C_d, const T* A_d, size_t N)
+{
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x;
+
+    for(size_t i = offset; i < N; i += stride)
+    {
+        C_d[i] = A_d[i] * A_d[i];
+    }
+}
+
+struct square_custom_op final : migraphx::experimental_custom_op_base
+{
+    virtual std::string name() const override { return "square_custom_op"; }
+    virtual migraphx::argument
+    compute(migraphx::context ctx, migraphx::shape, migraphx::arguments inputs) const override
+    {
+        // if compile options has offload_copy = true then, parameters and outputs will be
+        // automatically copied to and from GPUs' memory. Here assume that `inputs` arguments are
+        // already in the GPU, so no need to do Malloc, Free or Memcpy. Last element in the `inputs`
+        // is output argument, so it should be returned from compute method.
+        auto* input_buffer  = reinterpret_cast<float*>(inputs[0].data());
+        auto* output_buffer = reinterpret_cast<float*>(inputs[1].data());
+        size_t n_elements   = inputs[0].get_shape().bytes() / sizeof(inputs[0].get_shape().type());
+        MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
+        const unsigned blocks            = 512;
+        const unsigned threads_per_block = 256;
+        // cppcheck-suppress UseDeviceLaunch
+        hipLaunchKernelGGL(vector_square,
+                           dim3(blocks),
+                           dim3(threads_per_block),
+                           0,
+                           ctx.get_queue<hipStream_t>(),
+                           output_buffer,
+                           input_buffer,
+                           n_elements);
+        return inputs[1];
+    }
+    virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
+    {
+        if(inputs.size() != 2)
+        {
+            throw std::runtime_error("square_custom_op must have 2 arguments");
+        }
+        if(inputs[0] != inputs[1])
+        {
+            throw std::runtime_error("Inputs to the square_custom_op must have same Shape");
+        }
+        return inputs.back();
+    }
+};
+
+int main(int argc, const char* argv[])
+{
+    square_custom_op square_op;
+    migraphx::register_experimental_custom_op(square_op);
+    migraphx::program p;
+    migraphx::shape s{migraphx_shape_float_type, {32, 256}};
+    migraphx::module m = p.get_main_module();
+    auto x             = m.add_parameter("x", s);
+    auto neg_ins       = m.add_instruction(migraphx::operation("neg"), x);
+    // add allocation for the custom_kernel's output buffer
+    auto alloc = m.add_allocation(s);
+    auto custom_kernel =
+        m.add_instruction(migraphx::operation("square_custom_op"), {neg_ins, alloc});
+    auto relu_ins = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
+    m.add_return({relu_ins});
+    migraphx::compile_options options;
+    // set offload copy to true for GPUs
+    options.set_offload_copy();
+    p.compile(migraphx::target("gpu"), options);
+    migraphx::program_parameters pp;
+    std::vector<float> x_data(s.bytes() / sizeof(s.type()));
+    std::iota(x_data.begin(), x_data.end(), 0);
+    pp.add("x", migraphx::argument(s, x_data.data()));
+    auto results                       = p.eval(pp);
+    auto result                        = results[0];
+    std::vector<float> expected_result = x_data;
+    std::transform(expected_result.begin(),
+                   expected_result.end(),
+                   expected_result.begin(),
+                   [](auto i) { return std::pow(i, 2); });
+    if(bool{result == migraphx::argument(s, expected_result.data())})
+    {
+        std::cout << "Successfully executed custom HIP kernel example\n";
+    }
+    else
+    {
+        std::cout << "Custom HIP kernel example failed\n";
+    }
+    return 0;
+}
--- a/examples/migraphx/custom_op_miopen_kernel/CMakeLists.txt
+++ b/examples/migraphx/custom_op_miopen_kernel/CMakeLists.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+cmake_minimum_required(VERSION 3.5)
+project (custom_miopen_kernel)
+
+set (CMAKE_CXX_STANDARD 14)
+set (EXAMPLE custom_op_miopen_kernel)
+
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+find_package (migraphx REQUIRED)
+find_package (miopen REQUIRED)
+
+message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
+add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
+
+target_link_libraries(${EXAMPLE} migraphx::c MIOpen)
--- a/examples/migraphx/custom_op_miopen_kernel/README.md
+++ b/examples/migraphx/custom_op_miopen_kernel/README.md
+# Custom MIOpen Kernel using MIGraphX API. 
+ This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build  and run MIGraphX program on GPU. 
+ Kernels can be written in either HIP, MIOpen, or by using RocBLAS library.  This particular example uses **MIOpen** library calls.
+
+ To build and run example, ensure ROCm is installed at `/opt/rocm`. 
+ 1.  `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
+ 2.  `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_miopen_kernel/`
+ 3.  `mkdir build && cd build`
+ 4.  `cmake ..  && make`
+ 5.  `./custom_op_miopen_kernel`
--- a/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp
+++ b/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <algorithm>
+#include <hip/hip_runtime.h>
+#include <migraphx/migraphx.h>
+#include <miopen/miopen.h>
+#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
+#include <numeric>
+#include <stdexcept>
+
+#define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess))
+#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
+
+inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, bool pack = false)
+{
+    miopenTensorDescriptor_t t;
+    MIGRAPHX_MIOPEN_ASSERT(miopenCreateTensorDescriptor(&t));
+    // Convert to ints
+    auto s_lens = s.lengths();
+    std::vector<int> lens(s_lens.begin(), s_lens.end());
+    auto s_strides = s.strides();
+    std::vector<int> strides(s_strides.begin(), s_strides.end());
+    miopenDataType_t d;
+    if(s.type() == migraphx_shape_float_type)
+        d = miopenFloat;
+    else if(s.type() == migraphx_shape_half_type)
+        d = miopenHalf;
+    else if(s.type() == migraphx_shape_int32_type)
+        d = miopenInt32;
+    else if(s.type() == migraphx_shape_int8_type)
+    {
+        if(pack)
+        {
+            // update the lens and corresponding strides
+            d          = miopenInt8x4;
+            lens[1]    = ((lens[1] + 3) / 4) * 4;
+            strides[0] = strides[1] * lens[1];
+        }
+        else
+        {
+            d = miopenInt8;
+        }
+    }
+    else
+    {
+        throw("MAKE_TENSOR: unsupported type");
+    }
+    miopenSetTensorDescriptor(t, d, s_lens.size(), lens.data(), strides.data());
+    return t;
+}
+
+inline auto make_miopen_handle(migraphx::context& ctx)
+{
+    MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
+    auto* stream = ctx.get_queue<hipStream_t>();
+    miopenHandle_t out;
+    MIGRAPHX_MIOPEN_ASSERT(miopenCreateWithStream(&out, stream));
+    return out;
+}
+
+inline auto make_activation_descriptor(miopenActivationMode_t mode,
+                                       double alpha = 0,
+                                       double beta  = 0,
+                                       double gamma = 0)
+{
+    miopenActivationDescriptor_t ad;
+    MIGRAPHX_MIOPEN_ASSERT(miopenCreateActivationDescriptor(&ad));
+    miopenSetActivationDescriptor(ad, mode, alpha, beta, gamma);
+    return ad;
+}
+
+struct abs_custom_op final : migraphx::experimental_custom_op_base
+{
+    virtual std::string name() const override { return "abs_custom_op"; }
+    virtual migraphx::argument compute(migraphx::context ctx,
+                                       migraphx::shape output_shape,
+                                       migraphx::arguments args) const override
+    {
+        float alpha = 1;
+        float beta  = 0;
+        // MIOpen kernel call takes raw buffer pointers for the TensorData. These Buffer pointers
+        // must be accompanied with Tensor Description e.g. shape, type, strides, dimensionality.
+        // Following `make_miopen_tensor` makes such tensor descriptors to pass as parameter to
+        // MIOpen kernel call.
+        auto y_desc = make_miopen_tensor(output_shape);
+        auto x_desc = make_miopen_tensor(args[0].get_shape());
+        // create MIOpen stream handle
+        auto miopen_handle = make_miopen_handle(ctx);
+        // MIOpen has generic kernel for many different kinds of activation functions.
+        // Each such generic call must be accompanied with description of what kind of activation
+        // computation to perform
+        auto ad = make_activation_descriptor(miopenActivationABS, 0, 0, 0);
+        miopenActivationForward(
+            miopen_handle, ad, &alpha, x_desc, args[0].data(), &beta, y_desc, args[1].data());
+        return args[1];
+    }
+
+    virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
+    {
+        if(inputs.size() != 2)
+        {
+            throw std::runtime_error("abs_custom_op must have two input arguments");
+        }
+        if(inputs[0] != inputs[1])
+        {
+            throw std::runtime_error("Input arguments to abs_custom_op must have same shape");
+        }
+        return inputs.back();
+    }
+};
+
+int main(int argc, const char* argv[])
+{
+    abs_custom_op abs_op;
+    migraphx::register_experimental_custom_op(abs_op);
+    migraphx::program p;
+    migraphx::shape s{migraphx_shape_float_type, {32, 256}};
+    migraphx::module m = p.get_main_module();
+    auto x             = m.add_parameter("x", s);
+    auto neg_ins       = m.add_instruction(migraphx::operation("neg"), {x});
+    // add allocation for the custom_kernel's output buffer
+    auto alloc         = m.add_allocation(s);
+    auto custom_kernel = m.add_instruction(migraphx::operation("abs_custom_op"), {neg_ins, alloc});
+    auto relu_ins      = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
+    m.add_return({relu_ins});
+
+    migraphx::compile_options options;
+    // set offload copy to true for GPUs
+    options.set_offload_copy();
+    p.compile(migraphx::target("gpu"), options);
+    migraphx::program_parameters prog_params;
+    std::vector<float> x_data(s.bytes() / sizeof(s.type()));
+    std::iota(x_data.begin(), x_data.end(), 0);
+    prog_params.add("x", migraphx::argument(s, x_data.data()));
+    auto results                       = p.eval(prog_params);
+    auto result                        = results[0];
+    std::vector<float> expected_result = x_data;
+    std::transform(expected_result.begin(),
+                   expected_result.end(),
+                   expected_result.begin(),
+                   [](auto i) { return std::abs(i); });
+    if(bool{result == migraphx::argument(s, expected_result.data())})
+    {
+        std::cout << "Successfully executed custom MIOpen kernel example with MIGraphX\n";
+    }
+    else
+    {
+        std::cout << "Custom MIOpen kernel example failed\n";
+    }
+    return 0;
+}
--- a/examples/migraphx/custom_op_rocblas_kernel/CMakeLists.txt
+++ b/examples/migraphx/custom_op_rocblas_kernel/CMakeLists.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+cmake_minimum_required(VERSION 3.5)
+project (custom_rocblas_kernel)
+
+set (CMAKE_CXX_STANDARD 14)
+set (EXAMPLE custom_op_rocblas_kernel)
+
+
+list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
+find_package (migraphx REQUIRED)
+find_package (rocblas REQUIRED)
+
+message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
+add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
+
+target_link_libraries(${EXAMPLE} migraphx::c roc::rocblas) 
--- a/examples/migraphx/custom_op_rocblas_kernel/README.md
+++ b/examples/migraphx/custom_op_rocblas_kernel/README.md
+# Custom rocBLAS Kernel using MIGraphX API. 
+ This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build  and run MIGraphX program on GPU. 
+
+ Kernels can be written in either HIP, MIOpen, or by using RocBLAS library.  This particular example uses **rocBLAS** library calls.
+
+ To build and run the example, ensure ROCm is installed at `/opt/rocm`. 
+ 1.  `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
+ 2.  `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_rocblas_kernel/`
+ 3.  `mkdir build && cd build`
+ 4.  `cmake ..  && make`
+ 5.  `./custom_op_rocblas_kernel`
--- a/examples/migraphx/custom_op_rocblas_kernel/custom_op_rocblas_kernel.cpp
+++ b/examples/migraphx/custom_op_rocblas_kernel/custom_op_rocblas_kernel.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <algorithm>
+#include <hip/hip_runtime.h>
+#include <rocblas.h>
+#include <migraphx/migraphx.h>
+#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
+#include <numeric>
+#include <stdexcept>
+
+#define MIGRAPHX_ROCBLAS_ASSERT(x) (assert((x) == rocblas_status::rocblas_status_success))
+#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
+
+rocblas_handle create_rocblas_handle_ptr()
+{
+    rocblas_handle handle;
+    MIGRAPHX_ROCBLAS_ASSERT(rocblas_create_handle(&handle));
+    return rocblas_handle{handle};
+}
+
+rocblas_handle create_rocblas_handle_ptr(migraphx::context& ctx)
+{
+    MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
+    rocblas_handle rb = create_rocblas_handle_ptr();
+    auto* stream      = ctx.get_queue<hipStream_t>();
+    MIGRAPHX_ROCBLAS_ASSERT(rocblas_set_stream(rb, stream));
+    return rb;
+}
+
+struct sscal_custom_op final : migraphx::experimental_custom_op_base
+{
+    virtual std::string name() const override { return "sscal_custom_op"; }
+    virtual migraphx::argument compute(migraphx::context ctx,
+                                       migraphx::shape output_shape,
+                                       migraphx::arguments args) const override
+    {
+        // create rocblas stream handle
+        auto rocblas_handle = create_rocblas_handle_ptr(ctx);
+        rocblas_int n       = args[1].get_shape().lengths()[0];
+        float* alpha        = reinterpret_cast<float*>(args[0].data());
+        float* vec_ptr      = reinterpret_cast<float*>(args[1].data());
+        MIGRAPHX_ROCBLAS_ASSERT(rocblas_sscal(rocblas_handle, n, alpha, vec_ptr, 1));
+        return args[1];
+    }
+
+    virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
+    {
+        if(inputs.size() != 2)
+        {
+            throw std::runtime_error("sscal_custom_op must have 2 input arguments");
+        }
+        if(inputs[0].lengths().size() != 1 || inputs[0].lengths()[0] != 1)
+        {
+            throw std::runtime_error("first input argument to sscal_custom_op must be a scalar");
+        }
+        if(inputs[1].lengths().size() != 1)
+        {
+            throw std::runtime_error(
+                "second input argument to sscal_custom_op must be a vector with dimension one");
+        }
+        return inputs.back();
+    }
+};
+
+int main(int argc, const char* argv[])
+{
+    // computes ReLU(neg(x) * scale)
+    sscal_custom_op sscal_op;
+    migraphx::register_experimental_custom_op(sscal_op);
+    migraphx::program p;
+    migraphx::shape x_shape{migraphx_shape_float_type, {8192}};
+    migraphx::shape scale_shape{migraphx_shape_float_type, {1}};
+    migraphx::module m = p.get_main_module();
+    auto x             = m.add_parameter("x", x_shape);
+    auto scale         = m.add_parameter("scale", scale_shape);
+    auto neg_ins       = m.add_instruction(migraphx::operation("neg"), {x});
+    auto custom_kernel =
+        m.add_instruction(migraphx::operation("sscal_custom_op"), {scale, neg_ins});
+    auto relu_ins = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
+    m.add_return({relu_ins});
+
+    migraphx::compile_options options;
+    // set offload copy to true for GPUs
+    options.set_offload_copy();
+    p.compile(migraphx::target("gpu"), options);
+    migraphx::program_parameters pp;
+    std::vector<float> x_data(x_shape.bytes() / sizeof(x_shape.type()));
+    std::vector<float> scale_data{-1};
+    std::iota(x_data.begin(), x_data.end(), 0);
+    pp.add("x", migraphx::argument(x_shape, x_data.data()));
+    pp.add("scale", migraphx::argument(scale_shape, scale_data.data()));
+    auto results                       = p.eval(pp);
+    auto result                        = results[0];
+    std::vector<float> expected_result = x_data;
+    if(bool{result == migraphx::argument(x_shape, expected_result.data())})
+    {
+        std::cout << "Successfully executed custom rocBLAS kernel example\n";
+    }
+    else
+    {
+        std::cout << "Custom rocBLAS kernel example failed\n";
+    }
+    return 0;
+}
--- a/examples/vision/python_yolov4/yolov4_inference.ipynb
+++ b/examples/vision/python_yolov4/yolov4_inference.ipynb
@@ -80,7 +80,7 @@
   "outputs": [],
   "source": [
    "if not os.path.exists(\"yolov4_fp16.mxr\"):\n",
-    "    !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --fp16ref --binary -o yolov4_fp16.mxr\n",
+    "    !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --fp16 --binary -o yolov4_fp16.mxr\n",
    "if not os.path.exists(\"yolov4.mxr\"):\n",
    "    !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --binary -o yolov4.mxr"
   ]

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -88,6 +88,7 @@ add_library(migraphx
    shape.cpp
    simplify_algebra.cpp
    simplify_reshapes.cpp
+    target_assignments.cpp
    tmp_dir.cpp
    value.cpp
    verify_args.cpp

--- a/src/driver/alexnet.cpp
+++ b/src/driver/alexnet.cpp
+
 /*
 * The MIT License (MIT)
 *
@@ -21,10 +22,10 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/operators.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/generate.hpp>
-#include <migraphx/apply_alpha_beta.hpp>
+#include <migraphx/json.hpp>
 #include "models.hpp"

 namespace migraphx {
@@ -34,173 +35,189 @@ inline namespace MIGRAPHX_INLINE_NS {
 migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
 {
    migraphx::program p;
-    auto* mm = p.get_main_module();
-    auto m0 =
-        mm->add_parameter("0", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
-    auto mx0 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 0));
-    auto mx1 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 1));
-    auto mx2 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 2));
-    auto mx3 = mm->add_literal(
+    migraphx::module_ref mmain = p.get_main_module();
+    auto x_main_module_0       = mmain->add_literal(migraphx::abs(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 0)));
+    auto x_main_module_1       = mmain->add_literal(migraphx::abs(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 1)));
+    auto x_main_module_2       = mmain->add_literal(migraphx::abs(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 2)));
+    auto x_input_1             = mmain->add_parameter(
+        "input.1", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
+    auto x_main_module_4 = mmain->add_literal(
        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 3));
-    auto mx4 = mm->add_literal(
+    auto x_main_module_5 = mmain->add_literal(
        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 4));
-    auto mx5 = mm->add_literal(
+    auto x_main_module_6 = mmain->add_literal(
        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 5));
-    auto mx6 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 6));
-    auto mx7 = mm->add_literal(migraphx::generate_literal(
-        migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 7));
-    auto mx8 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 8));
-    auto mx9  = mm->add_literal(migraphx::generate_literal(
+    auto x_main_module_7 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 6));
+    auto x_main_module_8 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 7));
+    auto x_main_module_9 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 8));
+    auto x_main_module_10 = mmain->add_literal(migraphx::generate_literal(
        migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 9));
-    auto mx10 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 10));
-    auto mx11 = mm->add_literal(migraphx::generate_literal(
+    auto x_main_module_11 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 10));
+    auto x_main_module_12 = mmain->add_literal(migraphx::generate_literal(
        migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 11));
-    auto mx12 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 12));
-    auto mx13 = mm->add_literal(migraphx::generate_literal(
+    auto x_main_module_13 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 12));
+    auto x_main_module_14 = mmain->add_literal(migraphx::generate_literal(
        migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 13));
-    auto mx14 = mm->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 14));
-    auto mx15 = mm->add_literal(migraphx::generate_literal(
-        migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 15));
-    migraphx::op::convolution convolution16;
-    convolution16.padding  = {2, 2};
-    convolution16.stride   = {4, 4};
-    convolution16.dilation = {1, 1};
-    convolution16.group    = 1;
-    auto mx16              = mm->add_instruction(convolution16, m0, mx15);
-    migraphx::op::broadcast broadcast17;
-    broadcast17.axis           = 1;
-    broadcast17.broadcast_lens = {batch, 64, 55, 55};
-    auto mx17                  = mm->add_instruction(broadcast17, mx14);
-    migraphx::op::add add18;
-    auto mx18 = mm->add_instruction(add18, mx16, mx17);
-    migraphx::op::relu relu19;
-    auto mx19 = mm->add_instruction(relu19, mx18);
-    migraphx::op::pooling pooling20;
-    pooling20.mode    = migraphx::op::pooling_mode::max;
-    pooling20.padding = {0, 0};
-    pooling20.stride  = {2, 2};
-    pooling20.lengths = {3, 3};
-    auto mx20         = mm->add_instruction(pooling20, mx19);
-    migraphx::op::convolution convolution21;
-    convolution21.padding  = {2, 2};
-    convolution21.stride   = {1, 1};
-    convolution21.dilation = {1, 1};
-    convolution21.group    = 1;
-    auto mx21              = mm->add_instruction(convolution21, mx20, mx13);
-    migraphx::op::broadcast broadcast22;
-    broadcast22.axis           = 1;
-    broadcast22.broadcast_lens = {batch, 192, 27, 27};
-    auto mx22                  = mm->add_instruction(broadcast22, mx12);
-    migraphx::op::add add23;
-    auto mx23 = mm->add_instruction(add23, mx21, mx22);
-    migraphx::op::relu relu24;
-    auto mx24 = mm->add_instruction(relu24, mx23);
-    migraphx::op::pooling pooling25;
-    pooling25.mode    = migraphx::op::pooling_mode::max;
-    pooling25.padding = {0, 0};
-    pooling25.stride  = {2, 2};
-    pooling25.lengths = {3, 3};
-    auto mx25         = mm->add_instruction(pooling25, mx24);
-    migraphx::op::convolution convolution26;
-    convolution26.padding  = {1, 1};
-    convolution26.stride   = {1, 1};
-    convolution26.dilation = {1, 1};
-    convolution26.group    = 1;
-    auto mx26              = mm->add_instruction(convolution26, mx25, mx11);
-    migraphx::op::broadcast broadcast27;
-    broadcast27.axis           = 1;
-    broadcast27.broadcast_lens = {batch, 384, 13, 13};
-    auto mx27                  = mm->add_instruction(broadcast27, mx10);
-    migraphx::op::add add28;
-    auto mx28 = mm->add_instruction(add28, mx26, mx27);
-    migraphx::op::relu relu29;
-    auto mx29 = mm->add_instruction(relu29, mx28);
-    migraphx::op::convolution convolution30;
-    convolution30.padding  = {1, 1};
-    convolution30.stride   = {1, 1};
-    convolution30.dilation = {1, 1};
-    convolution30.group    = 1;
-    auto mx30              = mm->add_instruction(convolution30, mx29, mx9);
-    migraphx::op::broadcast broadcast31;
-    broadcast31.axis           = 1;
-    broadcast31.broadcast_lens = {batch, 256, 13, 13};
-    auto mx31                  = mm->add_instruction(broadcast31, mx8);
-    migraphx::op::add add32;
-    auto mx32 = mm->add_instruction(add32, mx30, mx31);
-    migraphx::op::relu relu33;
-    auto mx33 = mm->add_instruction(relu33, mx32);
-    migraphx::op::convolution convolution34;
-    convolution34.padding  = {1, 1};
-    convolution34.stride   = {1, 1};
-    convolution34.dilation = {1, 1};
-    convolution34.group    = 1;
-    auto mx34              = mm->add_instruction(convolution34, mx33, mx7);
-    migraphx::op::broadcast broadcast35;
-    broadcast35.axis           = 1;
-    broadcast35.broadcast_lens = {batch, 256, 13, 13};
-    auto mx35                  = mm->add_instruction(broadcast35, mx6);
-    migraphx::op::add add36;
-    auto mx36 = mm->add_instruction(add36, mx34, mx35);
-    migraphx::op::relu relu37;
-    auto mx37 = mm->add_instruction(relu37, mx36);
-    migraphx::op::pooling pooling38;
-    pooling38.mode    = migraphx::op::pooling_mode::max;
-    pooling38.padding = {0, 0};
-    pooling38.stride  = {2, 2};
-    pooling38.lengths = {3, 3};
-    auto mx38         = mm->add_instruction(pooling38, mx37);
-    migraphx::op::flatten flatten39;
-    flatten39.axis = 1;
-    auto mx39      = mm->add_instruction(flatten39, mx38);
-    migraphx::op::identity identity40;
-    auto mx40 = mm->add_instruction(identity40, mx39);
-    migraphx::op::transpose transpose41;
-    transpose41.dims = {1, 0};
-    auto mx41        = mm->add_instruction(transpose41, mx5);
-    migraphx::op::multibroadcast multibroadcast42;
-    multibroadcast42.output_lens = {batch, 4096};
-    auto mx42                    = mm->add_instruction(multibroadcast42, mx4);
-    float dot43_alpha            = 1;
-    float dot43_beta             = 1;
-    auto mx43                    = migraphx::add_apply_alpha_beta(
-        *mm, {mx40, mx41, mx42}, migraphx::make_op("dot"), dot43_alpha, dot43_beta);
-    migraphx::op::relu relu44;
-    auto mx44 = mm->add_instruction(relu44, mx43);
-    migraphx::op::identity identity45;
-    auto mx45 = mm->add_instruction(identity45, mx44);
-    migraphx::op::transpose transpose46;
-    transpose46.dims = {1, 0};
-    auto mx46        = mm->add_instruction(transpose46, mx3);
-    migraphx::op::multibroadcast multibroadcast47;
-    multibroadcast47.output_lens = {batch, 4096};
-    auto mx47                    = mm->add_instruction(multibroadcast47, mx2);
-    float dot48_alpha            = 1;
-    float dot48_beta             = 1;
-    auto mx48                    = migraphx::add_apply_alpha_beta(
-        *mm, {mx45, mx46, mx47}, migraphx::make_op("dot"), dot48_alpha, dot48_beta);
-    migraphx::op::relu relu49;
-    auto mx49 = mm->add_instruction(relu49, mx48);
-    migraphx::op::transpose transpose50;
-    transpose50.dims = {1, 0};
-    auto mx50        = mm->add_instruction(transpose50, mx1);
-    migraphx::op::multibroadcast multibroadcast51;
-    multibroadcast51.output_lens = {batch, 1000};
-    auto mx51                    = mm->add_instruction(multibroadcast51, mx0);
-    float dot52_alpha            = 1;
-    float dot52_beta             = 1;
-    migraphx::add_apply_alpha_beta(
-        *mm, {mx49, mx50, mx51}, migraphx::make_op("dot"), dot52_alpha, dot52_beta);
+    auto x_main_module_15 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 14));
+    auto x_main_module_16 = mmain->add_literal(migraphx::generate_literal(
+        migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 15));
+    auto x_main_module_17 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 16));
+    auto x_main_module_18 = mmain->add_literal(migraphx::generate_literal(
+        migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 17));
+    auto x_main_module_19 = mmain->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 18));
+    auto x_main_module_20 = mmain->add_instruction(
+        migraphx::make_op(
+            "convolution",
+            migraphx::from_json_string(
+                "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,4]}")),
+        x_input_1,
+        x_main_module_18);
+    auto x_main_module_21 = mmain->add_instruction(
+        migraphx::make_op("broadcast",
+                          migraphx::from_json_string("{axis:1,out_lens:[1,64,55,55]}")),
+        x_main_module_19);
+    auto x_main_module_22 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_20, x_main_module_21);
+    auto x_main_module_23 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_22);
+    auto x_main_module_24 = mmain->add_instruction(
+        migraphx::make_op(
+            "pooling",
+            migraphx::from_json_string(
+                "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
+        x_main_module_23);
+    auto x_main_module_25 = mmain->add_instruction(
+        migraphx::make_op(
+            "convolution",
+            migraphx::from_json_string(
+                "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,1]}")),
+        x_main_module_24,
+        x_main_module_14);
+    auto x_main_module_26 = mmain->add_instruction(
+        migraphx::make_op("broadcast",
+                          migraphx::from_json_string("{axis:1,out_lens:[1,192,27,27]}")),
+        x_main_module_15);
+    auto x_main_module_27 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_25, x_main_module_26);
+    auto x_main_module_28 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_27);
+    auto x_main_module_29 = mmain->add_instruction(
+        migraphx::make_op(
+            "pooling",
+            migraphx::from_json_string(
+                "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
+        x_main_module_28);
+    auto x_main_module_30 = mmain->add_instruction(
+        migraphx::make_op(
+            "convolution",
+            migraphx::from_json_string(
+                "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
+        x_main_module_29,
+        x_main_module_12);
+    auto x_main_module_31 = mmain->add_instruction(
+        migraphx::make_op("broadcast",
+                          migraphx::from_json_string("{axis:1,out_lens:[1,384,13,13]}")),
+        x_main_module_13);
+    auto x_main_module_32 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31);
+    auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32);
+    auto x_main_module_34 = mmain->add_instruction(
+        migraphx::make_op(
+            "convolution",
+            migraphx::from_json_string(
+                "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
+        x_main_module_33,
+        x_main_module_10);
+    auto x_main_module_35 = mmain->add_instruction(
+        migraphx::make_op("broadcast",
+                          migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
+        x_main_module_11);
+    auto x_main_module_36 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35);
+    auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36);
+    auto x_main_module_38 = mmain->add_instruction(
+        migraphx::make_op(
+            "convolution",
+            migraphx::from_json_string(
+                "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
+        x_main_module_37,
+        x_main_module_16);
+    auto x_main_module_39 = mmain->add_instruction(
+        migraphx::make_op("broadcast",
+                          migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
+        x_main_module_17);
+    auto x_main_module_40 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_38, x_main_module_39);
+    auto x_main_module_41 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_40);
+    auto x_main_module_42 = mmain->add_instruction(
+        migraphx::make_op(
+            "pooling",
+            migraphx::from_json_string(
+                "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
+        x_main_module_41);
+    auto x_main_module_43 = mmain->add_instruction(
+        migraphx::make_op("reshape", migraphx::from_json_string("{dims:[1,9216]}")),
+        x_main_module_42);
+    auto x_main_module_44 = mmain->add_instruction(
+        migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
+        x_main_module_6);
+    auto x_main_module_45 =
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_43, x_main_module_44);
+    auto x_main_module_46 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
+        x_main_module_7);
+    auto x_main_module_47 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
+        x_main_module_2);
+    auto x_main_module_48 =
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_46, x_main_module_47);
+    auto x_main_module_49 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_45, x_main_module_48);
+    auto x_main_module_50 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_49);
+    auto x_main_module_51 = mmain->add_instruction(
+        migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
+        x_main_module_4);
+    auto x_main_module_52 =
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_50, x_main_module_51);
+    auto x_main_module_53 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
+        x_main_module_5);
+    auto x_main_module_54 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
+        x_main_module_1);
+    auto x_main_module_55 =
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_53, x_main_module_54);
+    auto x_main_module_56 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_52, x_main_module_55);
+    auto x_main_module_57 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_56);
+    auto x_main_module_58 = mmain->add_instruction(
+        migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
+        x_main_module_8);
+    auto x_main_module_59 =
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_57, x_main_module_58);
+    auto x_main_module_60 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")),
+        x_main_module_9);
+    auto x_main_module_61 = mmain->add_instruction(
+        migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")),
+        x_main_module_0);
+    auto x_main_module_62 =
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_60, x_main_module_61);
+    auto x_main_module_63 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_59, x_main_module_62);
+    mmain->add_return({x_main_module_63});
+
    return p;
 }
-
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace driver
 } // namespace migraphx
--- a/src/driver/inceptionv3.cpp
+++ b/src/driver/inceptionv3.cpp
--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -210,6 +210,9 @@ struct loader
            auto last = std::prev(mm->end(), trim);
            mm->remove_instructions(last, mm->end());
        }
+        // Remove unused variable when exporting to cpp
+        if(output_type == "cpp")
+            migraphx::run_passes(*p.get_main_module(), {migraphx::dead_code_elimination{}});
        if(optimize)
        {
            migraphx::run_passes(*p.get_main_module(),

--- a/src/driver/resnet50.cpp
+++ b/src/driver/resnet50.cpp
--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -93,9 +93,11 @@ static bool try_compute_shape(instruction_ref ins,
    return try_compute_shape(ins, inputs, mods);
 }

-void eliminate_contiguous::apply(module& m) const
+template <class F>
+static void remove_contiguous(const std::string& op_name, module& m, F f)
 {
-    std::vector<instruction_ref> const_instruction;
+    auto last = std::prev(m.end());
+    std::vector<instruction_ref> const_instructions;

    for(auto ins : iterator_for(m))
    {
@@ -103,6 +105,12 @@ void eliminate_contiguous::apply(module& m) const
        if(ins->name() == "@return")
            continue;

+        if(ins != last and ins->outputs().empty())
+            continue;
+
+        if(not f(ins))
+            continue;
+
        // Make a copy so we can modify it while we iterate
        auto args     = ins->inputs();
        auto new_args = args;
@@ -110,36 +118,46 @@ void eliminate_contiguous::apply(module& m) const

        for(auto arg : ins->inputs())
        {
-            if(arg->name() == op_name)
+            if(arg->name() != op_name)
+                continue;
+            auto prev = arg->inputs().front();
+            replace(new_args, arg, prev);
+            if(try_compute_shape(ins, new_args, mod_args))
+            {
+                instruction::replace_argument(ins, arg, prev);
+            }
+            else if(prev->can_eval())
            {
-                auto prev = arg->inputs().front();
-                replace(new_args, arg, prev);
-                if(try_compute_shape(ins, new_args, mod_args))
-                {
-                    instruction::replace_argument(ins, arg, prev);
-                }
-                else if(prev->can_eval())
-                {
-                    const_instruction.push_back(arg);
-                }
+                const_instructions.push_back(arg);
            }
        }
    }

    // Perform evaluations in parallel
-    std::vector<argument> literals(const_instruction.size());
-    par_for(const_instruction.size(), 1, [&](const auto i) {
+    std::vector<argument> literals(const_instructions.size());
+    par_for(const_instructions.size(), 1, [&](const auto i) {
        auto c      = op::contiguous{};
-        auto prev   = const_instruction[i]->inputs().front();
+        auto prev   = const_instructions[i]->inputs().front();
        literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
    });

-    for(size_t i = 0; i < const_instruction.size(); i++)
+    for(size_t i = 0; i < const_instructions.size(); i++)
    {
        auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
-        m.replace_instruction(const_instruction[i], l);
+        m.replace_instruction(const_instructions[i], l);
    }
 }

+void eliminate_contiguous::apply(module& m) const
+{
+    // Skip contiguous from splits first
+    remove_contiguous(op_name, m, [](auto ins) {
+        if(ins->name() != "slice")
+            return true;
+        return (ins->inputs().front()->outputs().size() == 1);
+    });
+    remove_contiguous(op_name, m, [](auto) { return true; });
+}
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -142,7 +142,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
            input_map[input] = map_ins[param];
        }
    }
-    pm->replace_return(pm->insert_module_instructions(last, xm, map_ins));
+    pm->replace_return(pm->insert_instructions(last, xm, map_ins));
    return inputs;
 }


--- a/src/include/migraphx/assignment_options.hpp
+++ b/src/include/migraphx/assignment_options.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_ASSIGNMENT_OPTIONS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_ASSIGNMENT_OPTIONS_HPP
+
+#include <migraphx/support_metric.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct assignment_options
+{
+    support_metric metric = support_metric::latency;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif // MIGRAPHX_GUARD_RTGLIB_ASSIGNMENT_OPTIONS_HPP
--- a/src/include/migraphx/iota_iterator.hpp
+++ b/src/include/migraphx/iota_iterator.hpp
@@ -81,8 +81,9 @@ struct basic_iota_iterator
        index--;
        return it;
    }
-    // TODO: operator->
    reference operator*() const { return f(index); }
+    pointer operator->() const { return &f(index); }
+    reference operator[](int n) const { return f(index + n); }
 };

 template <class T, class F>