Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into pad_op

fa4cf4c4 · Khalique · e067dce0 · 31b2c735 · fa4cf4c4 · fa4cf4c4
Commit fa4cf4c4 authored Jan 21, 2019 by Khalique
20 changed files
--- a/src/include/migraphx/check_context.hpp
+++ b/src/include/migraphx/check_context.hpp
@@ -15,11 +15,19 @@ struct check_context
        std::string name() const { return "check_context"; }
        shape compute_shape(const std::vector<shape>&) const { return {}; }
        argument compute(context& ctx, const shape&, const std::vector<argument>&) const
+        {
+            this->check(ctx);
+            return {};
+        }
+        void finalize(context& ctx, const shape&, const std::vector<shape>&) const
+        {
+            this->check(ctx);
+        }
+        void check(context& ctx) const
        {
            T* x = any_cast<T>(&ctx);
            if(x == nullptr)
                MIGRAPHX_THROW(std::string("Unexpected context type: ") + ctx.type_id().name());
-            return {};
        }
    };


--- a/src/include/migraphx/concat_opt.hpp
+++ b/src/include/migraphx/concat_opt.hpp
@@ -119,6 +119,13 @@ struct concat_optimization
        return (*this).private_detail_te_get_handle().get_concat(op);
    }

+    friend bool is_shared(const concat_optimization& private_detail_x,
+                          const concat_optimization& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
+    }
+
    private:
    struct private_detail_te_handle_base_type
    {

--- a/src/include/migraphx/context.hpp
+++ b/src/include/migraphx/context.hpp
@@ -95,7 +95,13 @@ struct context
    void finish() const
    {
        assert((*this).private_detail_te_handle_mem_var);
-        return (*this).private_detail_te_get_handle().finish();
+        (*this).private_detail_te_get_handle().finish();
+    }
+
+    friend bool is_shared(const context& private_detail_x, const context& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
    }

    private:
@@ -136,7 +142,7 @@ struct context

        const std::type_info& type() const override { return typeid(private_detail_te_value); }

-        void finish() const override { return private_detail_te_value.finish(); }
+        void finish() const override { private_detail_te_value.finish(); }

        PrivateDetailTypeErasedT private_detail_te_value;
    };

--- a/src/include/migraphx/functional.hpp
+++ b/src/include/migraphx/functional.hpp
@@ -94,6 +94,12 @@ constexpr void each_args(F)
 {
 }

+template <class F, class T>
+auto unpack(F f, T& x)
+{
+    return sequence_c<std::tuple_size<T>{}>([&](auto... is) { f(std::get<is>(x)...); });
+}
+
 /// Implements a fix-point combinator
 template <class R, class F>
 detail::fix_f<R, F> fix(F f)

--- a/src/include/migraphx/instruction.hpp
+++ b/src/include/migraphx/instruction.hpp
@@ -14,6 +14,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 shape compute_shape(const operation& op, const std::vector<instruction_ref>& args);
+std::vector<shape> to_shapes(const std::vector<instruction_ref>& args);

 struct instruction
 {
@@ -73,6 +74,8 @@ struct instruction

    argument eval() const;

+    void finalize(context& ctx);
+
    static instruction_ref get_output_alias(instruction_ref ins, bool shallow = false);

    private:

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -26,6 +26,8 @@ struct operation
 {
    /// A unique name identifying the operation
    std::string name() const;
+    /// An optional method that can be used to finalize the operator before running
+    void finalize(context& ctx);
    /// This is used to compute the resulting shape from an operation. If an
    /// operation cannot be run with input shapes, then it should throw an
    /// exception.
@@ -55,6 +57,8 @@ struct operation

 /// Returns true if operation does not require a context to run compute
 bool is_context_free(const operation& x);
+/// Returns true if the operation has a finalize method
+bool has_finalize(const operation& x);

 #else

@@ -189,6 +193,44 @@ int output_alias_op(const T& x, const std::vector<shape>& shapes)
    return output_alias_op(rank<1>{}, x, shapes);
 }

+template <class T>
+auto finalize_op(
+    rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+    -> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), void())
+{
+    x.finalize(auto_any_cast(ctx), output_shape, input);
+}
+
+template <class T>
+void finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
+{
+}
+
+template <class T>
+void finalize_op(T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+{
+    finalize_op(rank<1>{}, x, ctx, output_shape, input);
+}
+
+template <class T>
+auto has_finalize_op(
+    rank<1>, T& x, context& ctx, const shape& output_shape, const std::vector<shape>& input)
+    -> decltype(x.finalize(auto_any_cast(ctx), output_shape, input), std::true_type{});
+
+template <class T>
+auto has_finalize_op(rank<0>, T&, context&, const shape&, const std::vector<shape>&)
+    -> std::false_type;
+
+template <class T>
+auto has_finalize_op(const T&) -> decltype(has_finalize_op(rank<1>{},
+                                                           std::declval<T&>(),
+                                                           std::declval<context&>(),
+                                                           std::declval<const shape&>(),
+                                                           std::declval<std::vector<shape>>()))
+{
+    return {};
+}
+
 /*
 * Type-erased interface for:
 *
@@ -196,7 +238,9 @@ int output_alias_op(const T& x, const std::vector<shape>& shapes)
 * {
 *      std::string name() const;
 *      bool is_context_free() const;
+ *      bool has_finalize() const;
 *      int output_alias(const std::vector<shape>& input) const;
+ *      void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
 *      shape compute_shape(const std::vector<shape>& input) const;
 *      argument compute(context& ctx,const shape& output,const std::vector<argument>& input) const;
 *      argument compute(const shape& output,const std::vector<argument>& input) const;
@@ -275,12 +319,24 @@ struct operation
        return (*this).private_detail_te_get_handle().is_context_free();
    }

+    bool has_finalize() const
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        return (*this).private_detail_te_get_handle().has_finalize();
+    }
+
    int output_alias(const std::vector<shape>& input) const
    {
        assert((*this).private_detail_te_handle_mem_var);
        return (*this).private_detail_te_get_handle().output_alias(input);
    }

+    void finalize(context& ctx, const shape& output, const std::vector<shape>& input)
+    {
+        assert((*this).private_detail_te_handle_mem_var);
+        (*this).private_detail_te_get_handle().finalize(ctx, output, input);
+    }
+
    shape compute_shape(const std::vector<shape>& input) const
    {
        assert((*this).private_detail_te_handle_mem_var);
@@ -311,6 +367,12 @@ struct operation
        return x.private_detail_te_get_handle().operator==(y);
    }

+    friend bool is_shared(const operation& private_detail_x, const operation& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
+    }
+
    private:
    struct private_detail_te_handle_base_type
    {
@@ -318,10 +380,13 @@ struct operation
        virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
        virtual const std::type_info& type() const                                = 0;

-        virtual std::string name() const                                   = 0;
-        virtual bool is_context_free() const                               = 0;
-        virtual int output_alias(const std::vector<shape>& input) const    = 0;
-        virtual shape compute_shape(const std::vector<shape>& input) const = 0;
+        virtual std::string name() const                                = 0;
+        virtual bool is_context_free() const                            = 0;
+        virtual bool has_finalize() const                               = 0;
+        virtual int output_alias(const std::vector<shape>& input) const = 0;
+        virtual void
+        finalize(context& ctx, const shape& output, const std::vector<shape>& input) = 0;
+        virtual shape compute_shape(const std::vector<shape>& input) const           = 0;
        virtual argument
        compute(context& ctx, const shape& output, const std::vector<argument>& input) const    = 0;
        virtual argument compute(const shape& output, const std::vector<argument>& input) const = 0;
@@ -365,12 +430,20 @@ struct operation
            return is_context_free_op(private_detail_te_value);
        }

+        bool has_finalize() const override { return has_finalize_op(private_detail_te_value); }
+
        int output_alias(const std::vector<shape>& input) const override
        {

            return output_alias_op(private_detail_te_value, input);
        }

+        void finalize(context& ctx, const shape& output, const std::vector<shape>& input) override
+        {
+
+            finalize_op(private_detail_te_value, ctx, output, input);
+        }
+
        shape compute_shape(const std::vector<shape>& input) const override
        {

@@ -478,6 +551,14 @@ bool is_context_free(const T& x)
    return is_context_free_op(x);
 }

+inline bool has_finalize(const operation& op) { return op.has_finalize(); }
+
+template <class T>
+bool has_finalize(const T& x)
+{
+    return has_finalize_op(x);
+}
+
 #endif

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/par_dfor.hpp
+++ b/src/include/migraphx/par_dfor.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_PAR_DFOR_HPP
+
+#include <migraphx/par_for.hpp>
+#include <migraphx/functional.hpp>
+#include <array>
+#include <numeric>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+template <class... Ts>
+auto par_dfor(Ts... xs)
+{
+    return [=](auto f) {
+        using array_type = std::array<std::size_t, sizeof...(Ts)>;
+        array_type lens  = {{static_cast<std::size_t>(xs)...}};
+        auto n = std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>{});
+        const std::size_t min_grain = 8;
+        if(n > 2 * min_grain)
+        {
+            array_type strides;
+            strides.fill(1);
+            std::partial_sum(lens.rbegin(),
+                             lens.rend() - 1,
+                             strides.rbegin() + 1,
+                             std::multiplies<std::size_t>());
+            auto size =
+                std::accumulate(lens.begin(), lens.end(), 1, std::multiplies<std::size_t>());
+            par_for(size, min_grain, [&](std::size_t i) {
+                array_type indices;
+                std::transform(strides.begin(),
+                               strides.end(),
+                               lens.begin(),
+                               indices.begin(),
+                               [&](size_t stride, size_t len) { return (i / stride) % len; });
+                migraphx::unpack(f, indices);
+            });
+        }
+        else
+        {
+            dfor(xs...)(f);
+        }
+
+    };
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/par_for.hpp
+++ b/src/include/migraphx/par_for.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
+
+#include <thread>
+#include <cmath>
+#include <algorithm>
+#include <vector>
+#include <cassert>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct joinable_thread : std::thread
+{
+    template <class... Xs>
+    joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) // NOLINT
+    {
+    }
+
+    joinable_thread& operator=(joinable_thread&& other) = default;
+    joinable_thread(joinable_thread&& other)            = default;
+
+    ~joinable_thread()
+    {
+        if(this->joinable())
+            this->join();
+    }
+};
+
+template <class F>
+void par_for_impl(std::size_t n, std::size_t threadsize, F f)
+{
+    if(threadsize <= 1)
+    {
+        for(std::size_t i = 0; i < n; i++)
+            f(i);
+    }
+    else
+    {
+        std::vector<joinable_thread> threads(threadsize);
+// Using const here causes gcc 5 to ICE
+#if(!defined(__GNUC__) || __GNUC__ != 5)
+        const
+#endif
+            std::size_t grainsize = std::ceil(static_cast<double>(n) / threads.size());
+
+        std::size_t work = 0;
+        std::generate(threads.begin(), threads.end(), [=, &work] {
+            auto result = joinable_thread([=] {
+                std::size_t start = work;
+                std::size_t last  = std::min(n, work + grainsize);
+                for(std::size_t i = start; i < last; i++)
+                {
+                    f(i);
+                }
+            });
+            work += grainsize;
+            return result;
+        });
+        assert(work >= n);
+    }
+}
+
+template <class F>
+void par_for(std::size_t n, std::size_t min_grain, F f)
+{
+    const auto threadsize =
+        std::min<std::size_t>(std::thread::hardware_concurrency(), n / min_grain);
+    par_for_impl(n, threadsize, f);
+}
+
+template <class F>
+void par_for(std::size_t n, F f)
+{
+    const int min_grain = 8;
+    par_for(n, min_grain, f);
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/pass.hpp
+++ b/src/include/migraphx/pass.hpp
@@ -105,7 +105,13 @@ struct pass
    void apply(program& p) const
    {
        assert((*this).private_detail_te_handle_mem_var);
-        return (*this).private_detail_te_get_handle().apply(p);
+        (*this).private_detail_te_get_handle().apply(p);
+    }
+
+    friend bool is_shared(const pass& private_detail_x, const pass& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
    }

    private:
@@ -149,7 +155,7 @@ struct pass

        std::string name() const override { return private_detail_te_value.name(); }

-        void apply(program& p) const override { return private_detail_te_value.apply(p); }
+        void apply(program& p) const override { private_detail_te_value.apply(p); }

        PrivateDetailTypeErasedT private_detail_te_value;
    };

--- a/src/include/migraphx/program.hpp
+++ b/src/include/migraphx/program.hpp
@@ -91,10 +91,14 @@ struct program

    shape get_shape() const;

+    context& get_context() const;
+
    instruction_ref validate() const;

    void compile(const target& t, tracer trace = tracer{});

+    void finalize();
+
    void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;

    void debug_print() const;

--- a/src/include/migraphx/target.hpp
+++ b/src/include/migraphx/target.hpp
@@ -127,6 +127,12 @@ struct target
        return (*this).private_detail_te_get_handle().get_context();
    }

+    friend bool is_shared(const target& private_detail_x, const target& private_detail_y)
+    {
+        return private_detail_x.private_detail_te_handle_mem_var ==
+               private_detail_y.private_detail_te_handle_mem_var;
+    }
+
    private:
    struct private_detail_te_handle_base_type
    {

--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -162,14 +162,6 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
    old->remove_output(*this);
 }

-std::vector<shape> compute_shapes(const std::vector<instruction_ref>& args)
-{
-    std::vector<shape> shapes(args.size());
-    std::transform(
-        args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
-    return shapes;
-}
-
 argument instruction::eval() const
 {
    if(op.name() == "@literal")
@@ -191,9 +183,15 @@ argument instruction::eval() const
    return {};
 }

+void instruction::finalize(context& ctx)
+{
+    if(has_finalize(this->op))
+        this->op.finalize(ctx, this->get_shape(), to_shapes(this->inputs()));
+}
+
 instruction_ref instruction::get_output_alias(instruction_ref ins, bool shallow)
 {
-    auto i = ins->get_operator().output_alias(compute_shapes(ins->inputs()));
+    auto i = ins->get_operator().output_alias(to_shapes(ins->inputs()));
    if(i < 0)
        return ins;
    if(shallow)
@@ -201,9 +199,17 @@ instruction_ref instruction::get_output_alias(instruction_ref ins, bool shallow)
    return get_output_alias(ins->inputs().at(i));
 }

+std::vector<shape> to_shapes(const std::vector<instruction_ref>& args)
+{
+    std::vector<shape> shapes(args.size());
+    std::transform(
+        args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->get_shape(); });
+    return shapes;
+}
+
 shape compute_shape(const operation& op, const std::vector<instruction_ref>& args)
 {
-    return op.compute_shape(compute_shapes(args));
+    return op.compute_shape(to_shapes(args));
 }

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -271,6 +271,8 @@ instruction_ref program::end() const { return impl->instructions.end(); }

 shape program::get_shape() const { return impl->instructions.back().get_shape(); }

+context& program::get_context() const { return impl->ctx; }
+
 instruction_ref program::validate() const
 {
    return std::find_if(impl->instructions.begin(),
@@ -309,6 +311,15 @@ void program::compile(const target& t, tracer trace)
        auto index = std::distance(impl->instructions.begin(), invalid);
        MIGRAPHX_THROW("Invalid program from compilation at instruction " + std::to_string(index));
    }
+    this->finalize();
+}
+
+void program::finalize()
+{
+    for(auto ins : iterator_for(*this))
+    {
+        ins->finalize(this->impl->ctx);
+    }
 }

 template <class F>

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -5,6 +5,7 @@
 #include <migraphx/operators.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/par_dfor.hpp>
 #include <migraphx/cpu/gemm.hpp>
 #include <unordered_map>
 #include <utility>
@@ -72,7 +73,7 @@ struct cpu_batch_norm_inference
            visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
                [&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {

-                    dfor(num_batch, num_channels, image_height, image_width)(
+                    par_dfor(num_batch, num_channels, image_height, image_width)(
                        [&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
                            assert((variance(c) + epsilon) > 0);
                            result(n, c, h, w) = gamma(c) * (buffer(n, c, h, w) - mean(c)) /
@@ -87,7 +88,7 @@ struct cpu_batch_norm_inference
            visit_all(output, input, mini_batch_mean, mini_batch_mean, arg_gamma, arg_bias)(
                [&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {

-                    dfor(num_batch, num_channels, image_height, image_width)(
+                    par_dfor(num_batch, num_channels, image_height, image_width)(
                        [&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
                            assert((variance(c, h, w) + epsilon) > 0);
                            result(n, c, h, w) = gamma(c, h, w) *
@@ -122,10 +123,10 @@ struct cpu_convolution
            auto wei_h = wei[2];
            auto wei_w = wei[3];

-            dfor(output_shape.lens()[0],
-                 output_shape.lens()[1],
-                 output_shape.lens()[2],
-                 output_shape.lens()[3])(
+            par_dfor(output_shape.lens()[0],
+                     output_shape.lens()[1],
+                     output_shape.lens()[2],
+                     output_shape.lens()[3])(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x  = i * op.stride[0] - op.padding[0];
                    const int start_y  = j * op.stride[1] - op.padding[1];
@@ -245,10 +246,10 @@ struct cpu_pooling
            auto in_h  = input.get_shape().lens()[2];
            auto in_w  = input.get_shape().lens()[3];

-            dfor(output_shape.lens()[0],
-                 output_shape.lens()[1],
-                 output_shape.lens()[2],
-                 output_shape.lens()[3])(
+            par_dfor(output_shape.lens()[0],
+                     output_shape.lens()[1],
+                     output_shape.lens()[2],
+                     output_shape.lens()[3])(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x0 = i * op.stride[0] - op.padding[0];
                    const int start_y0 = j * op.stride[1] - op.padding[1];

--- a/src/targets/gpu/convolution.cpp
+++ b/src/targets/gpu/convolution.cpp
@@ -41,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,

 shape miopen_convolution::compile(context& ctx,
                                  const shape& output_shape,
-                                  std::vector<instruction_ref> inputs)
+                                  std::vector<shape> inputs)
 {
    shape workspace_shape{};
-    auto x_desc = make_tensor(inputs[0]->get_shape());
-    auto w_desc = make_tensor(inputs[1]->get_shape());
+    auto x_desc = make_tensor(inputs[0]);
+    auto w_desc = make_tensor(inputs[1]);
    auto y_desc = make_tensor(output_shape);

    std::size_t workspace_size = 0;
@@ -57,8 +57,8 @@ shape miopen_convolution::compile(context& ctx,
                                             &workspace_size);
    workspace_shape = shape{shape::int8_type, {workspace_size}};

-    auto x         = to_gpu(generate_argument(inputs[0]->get_shape()));
-    auto w         = to_gpu(generate_argument(inputs[1]->get_shape()));
+    auto x         = to_gpu(generate_argument(inputs[0]));
+    auto w         = to_gpu(generate_argument(inputs[1]));
    auto y         = allocate_gpu(output_shape);
    auto workspace = allocate_gpu(workspace_shape);

@@ -80,10 +80,21 @@ shape miopen_convolution::compile(context& ctx,
                                                        false);
    if(status != miopenStatusSuccess)
        MIGRAPHX_THROW("Find convolution failed");
-    algo = perf.fwd_algo;
+    handle = ctx.get_stream().get_miopen();
+    algo   = perf.fwd_algo;
    return shape{shape::int8_type, {perf.memory}};
 }

+void miopen_convolution::finalize(context& ctx,
+                                  const shape& output_shape,
+                                  std::vector<shape> inputs)
+{
+    if(handle == ctx.get_stream().get_miopen())
+        return;
+    // TODO: Check that workspace hasn't changed
+    compile(ctx, output_shape, std::move(inputs));
+}
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -274,11 +274,8 @@ struct miopen_conv_bias
        return f.execute(ctx, fargs, args[0], args[4]);
    }

-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -318,12 +315,8 @@ struct miopen_conv_bias_relu
        miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0);
        return f.execute(ctx, fargs, args[0], args[4]);
    }
-
-    shape compile(context& ctx)
-    {
-        f.compile(ctx);
-        return f.get_workspace(ctx);
-    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
+    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };

@@ -350,7 +343,7 @@ void apply_conv_bias(context& ctx, program& p, match::matcher_result r)

    Op cb{conv_op, input_ins->get_shape(), weights_ins->get_shape(), bias_ins->get_shape()};
    // TODO: Insert ws allocation
-    auto ws = cb.compile(ctx);
+    auto ws = cb.get_workspace(ctx);

    p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
 }

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -27,6 +27,7 @@ struct miopen_convolution
    op::convolution op;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
+    miopenHandle_t handle = nullptr;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -39,7 +40,8 @@ struct miopen_convolution
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    shape compile(context& ctx, const shape& output_shape, std::vector<instruction_ref> inputs);
+    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
 };


--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -132,7 +132,7 @@ struct miopen_apply
            auto&& op = any_cast<op::convolution>(ins->get_operator());

            auto conv = miopen_convolution{op, make_conv(op)};
-            auto ws   = conv.compile(ctx, ins->get_shape(), ins->inputs());
+            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());

--- a/test/eval_test.cpp
+++ b/test/eval_test.cpp
@@ -8,9 +8,57 @@

 struct id_target
 {
+    struct context
+    {
+        void finish() const {}
+    };
+    migraphx::context ctx = context{};
    std::string name() const { return "id"; }
    std::vector<migraphx::pass> get_passes(migraphx::context&) const { return {}; }
-    migraphx::context get_context() const { return {}; }
+    migraphx::context get_context() const { return ctx; }
+};
+
+struct id_ctx_op
+{
+    std::string name() const { return "id_ctx_op"; }
+    migraphx::argument
+    compute(id_target::context&, const migraphx::shape&, std::vector<migraphx::argument> args) const
+    {
+        if(args.empty())
+            return {};
+        return args.front();
+    }
+
+    migraphx::shape compute_shape(std::vector<migraphx::shape> inputs) const
+    {
+        if(inputs.empty())
+            return {};
+        return inputs.front();
+    }
+    int output_alias(const std::vector<migraphx::shape>&) const { return 0; }
+};
+
+struct id_ctx_final_op
+{
+    std::string name() const { return "id_ctx_final_op"; }
+    migraphx::argument compute(const migraphx::shape&, std::vector<migraphx::argument> args) const
+    {
+        if(args.empty())
+            return {};
+        return args.front();
+    }
+
+    void finalize(id_target::context&, const migraphx::shape&, const std::vector<migraphx::shape>&)
+    {
+    }
+
+    migraphx::shape compute_shape(std::vector<migraphx::shape> inputs) const
+    {
+        if(inputs.empty())
+            return {};
+        return inputs.front();
+    }
+    int output_alias(const std::vector<migraphx::shape>&) const { return 0; }
 };

 struct reverse_pass
@@ -224,4 +272,52 @@ TEST_CASE(double_reverse_target_test)
    EXPECT(result != migraphx::literal{4});
 }

+// Check that the program doesnt modify the context directly, and only the operators modify the
+// context
+TEST_CASE(eval_context1)
+{
+    migraphx::program p;
+    id_target t{};
+    EXPECT(is_shared(t.ctx, t.get_context()));
+    auto one = p.add_literal(1);
+    auto two = p.add_literal(2);
+    p.add_instruction(sum_op{}, one, two);
+    p.compile(t);
+    EXPECT(is_shared(t.ctx, p.get_context()));
+    p.eval({});
+    EXPECT(is_shared(t.ctx, p.get_context()));
+}
+
+TEST_CASE(eval_context2)
+{
+    migraphx::program p;
+    id_target t{};
+    EXPECT(is_shared(t.ctx, t.get_context()));
+    auto one = p.add_literal(1);
+    auto two = p.add_literal(2);
+    p.add_instruction(id_ctx_op{}, one, two);
+    p.compile(t);
+    EXPECT(is_shared(t.ctx, p.get_context()));
+    p.eval({});
+    // id_ctx_op will modify the context
+    EXPECT(not is_shared(t.ctx, p.get_context()));
+}
+
+TEST_CASE(eval_context3)
+{
+    migraphx::program p;
+    id_target t{};
+    EXPECT(is_shared(t.ctx, t.get_context()));
+    auto one = p.add_literal(1);
+    auto two = p.add_literal(2);
+    p.add_instruction(id_ctx_final_op{}, one, two);
+    p.compile(t);
+    // Finalizer will modify the context
+    EXPECT(not is_shared(t.ctx, p.get_context()));
+    auto ctx = p.get_context();
+    p.eval({});
+    EXPECT(is_shared(ctx, p.get_context()));
+    EXPECT(not is_shared(t.ctx, p.get_context()));
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/include/test.hpp
+++ b/test/include/test.hpp
@@ -224,7 +224,13 @@ inline void run(int argc, const char* argv[])
        std::unordered_map<std::string, std::function<void()>> m(get_test_cases().begin(),
                                                                 get_test_cases().end());
        for(auto&& name : cases)
-            run_test_case(name, m[name]);
+        {
+            auto f = m.find(name);
+            if(f == m.end())
+                std::cout << "[  ERROR   ] Test case '" << name << "' not found." << std::endl;
+            else
+                run_test_case(name, f->second);
+        }
    }
 }