Preallocate parameters on the CPU and unify preallocations (#840)

* Add preallocate method * Add preallocate_param pass * Preallocate buffers on the cpu * Formatting * Preallocate on the gpu * Add missing cpp file * Formatting * Add lifetime function * Formatting * Always allocate * Fix tidy warning * Add const * Add missing lifetime annotations Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>

Preallocate parameters on the CPU and unify preallocations (#840)
* Add preallocate method * Add preallocate_param pass * Preallocate buffers on the cpu * Formatting * Preallocate on the gpu * Add missing cpp file * Formatting * Add lifetime function * Formatting * Always allocate * Fix tidy warning * Add const * Add missing lifetime annotations Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>
427fc25c · Paul Fultz II · GitHub · f60c3815 · 427fc25c · 427fc25c
Unverified Commit 427fc25c authored Jul 08, 2021 by Paul Fultz II Committed by GitHub Jul 08, 2021
11 changed files
--- a/src/targets/cpu/include/migraphx/cpu/allocation_model.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/allocation_model.hpp
@@ -14,6 +14,7 @@ struct cpu_allocation_model
    std::string name() const;
    std::string copy() const;
    operation allocate(const shape& s) const;
+    operation preallocate(const shape& s, const std::string& id) const;
 };

 } // namespace cpu

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -440,7 +440,7 @@ struct cpu_apply
        }
    }

-    instruction_ref apply_pow(instruction_ref ins)
+    instruction_ref apply_pow(instruction_ref ins) const
    {
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
@@ -451,7 +451,7 @@ struct cpu_apply
                       {ins->inputs().front()});
    }

-    instruction_ref apply_pooling(instruction_ref ins)
+    instruction_ref apply_pooling(instruction_ref ins) const
    {
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
@@ -479,30 +479,20 @@ struct cpu_apply
        return {r.at<T>()};
    }

-    instruction_ref replace(instruction_ref ins, const operation& op)
+    instruction_ref replace(instruction_ref ins, const operation& op) const
    {
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
-    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs)
+    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
    {
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
        return modl->replace_instruction(ins, op, inputs);
    }

-    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
+    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
    {
-        auto ins_alias = instruction::get_output_alias(ins);
-        if(last->name() == "@return" and prog_output_names.count(ins_alias) > 0)
-        {
-            return modl->add_parameter(prog_output_names[ins_alias], s);
-        }
-        else if(ins == last)
-        {
-            return modl->add_parameter("output", s);
-        }
-
        return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
    }
 };

--- a/src/targets/cpu/preallocate.cpp
+++ b/src/targets/cpu/preallocate.cpp
+#include <migraphx/config.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/context.hpp>
+#include <migraphx/cpu/context.hpp>
+#include <migraphx/register_op.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace cpu {
+
+struct cpu_preallocate : auto_register_op<cpu_preallocate>
+{
+    shape s;
+    std::string id = "";
+    argument data;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.s, "shape"), f(self.id, "id"));
+    }
+
+    std::string name() const { return "cpu::preallocate"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(0);
+        return s;
+    }
+    argument compute(context&, const shape&, const std::vector<argument>&) const { return data; }
+    void finalize(context&, const shape&, const std::vector<shape>&) { data = argument(s); }
+    lifetime get_lifetime() const { return lifetime::global; }
+};
+
+} // namespace cpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/cpu/target.cpp
+++ b/src/targets/cpu/target.cpp
@@ -22,6 +22,7 @@
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/simplify_algebra.hpp>
 #include <migraphx/simplify_reshapes.hpp>
+#include <migraphx/preallocate_param.hpp>
 #include <migraphx/cpu/fuse_ops.hpp>
 #include <migraphx/cpu/write_literals.hpp>
 #include <migraphx/cpu/allocation_model.hpp>
@@ -76,6 +77,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
            write_literals{},
            dead_code_elimination{},
            memory_coloring{"cpu::allocate"},
+            dead_code_elimination{},
+            preallocate_param{"scratch", cpu_allocation_model{}},
            dead_code_elimination{}};
 }


--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -139,7 +139,6 @@ add_library(migraphx_gpu
    pack_int8_args.cpp
    pad.cpp
    pooling.cpp
-    preallocate_param.cpp
    quant_convolution.cpp
    reverse.cpp
    rnn_variable_seq_lens.cpp

--- a/src/targets/gpu/allocation_model.cpp
+++ b/src/targets/gpu/allocation_model.cpp
@@ -11,6 +11,11 @@ operation gpu_allocation_model::allocate(const shape& s) const
    return make_op(name(), {{"shape", to_value(s)}});
 }

+operation gpu_allocation_model::preallocate(const shape& s, const std::string& id) const
+{
+    return make_op("hip::hip_allocate_memory", {{"shape", to_value(s)}, {"id", id}});
+}
+
 std::string gpu_allocation_model::copy() const { return "hip::copy"; }

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
@@ -14,6 +14,7 @@ struct gpu_allocation_model
    std::string name() const;
    std::string copy() const;
    operation allocate(const shape& s) const;
+    operation preallocate(const shape& s, const std::string& id) const;
 };

 } // namespace gpu

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -14,6 +14,7 @@
 #include <migraphx/insert_pad.hpp>
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/normalize_ops.hpp>
+#include <migraphx/preallocate_param.hpp>
 #include <migraphx/propagate_constant.hpp>
 #include <migraphx/register_target.hpp>
 #include <migraphx/remap.hpp>
@@ -31,7 +32,6 @@
 #include <migraphx/gpu/lowering.hpp>
 #include <migraphx/gpu/mlir_conv.hpp>
 #include <migraphx/gpu/pack_int8_args.hpp>
-#include <migraphx/gpu/preallocate_param.hpp>
 #include <migraphx/gpu/schedule_model.hpp>
 #include <migraphx/gpu/sync_device.hpp>
 #include <migraphx/gpu/target.hpp>
@@ -98,7 +98,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})},
        memory_coloring{"hip::allocate"},
        sync_device{},
-        preallocate_param{"scratch", &ctx},
+        preallocate_param{"scratch", gpu_allocation_model{}},
        dead_code_elimination{},
        eliminate_workspace{},
        eliminate_allocation{"hip::allocate"},

--- a/test/verify/run_verify.cpp
+++ b/test/verify/run_verify.cpp
@@ -109,7 +109,7 @@ std::pair<migraphx::program, std::vector<migraphx::argument>> run_verify::run_ta
    std::transform(
        tres.begin(), tres.end(), res.begin(), [&](auto& argu) { return t.copy_from(argu); });

-    return std::make_pair(p, res);
+    return std::make_pair(std::move(p), res);
 }

 template <class T>

--- a/tools/include/allocation_model.hpp
+++ b/tools/include/allocation_model.hpp
@@ -26,6 +26,8 @@ struct allocation_model
    std::string copy() const;
    /// Create an allocation operator for the given shape
    operation allocate(const shape& s) const;
+    /// Create a preallocated operator for the given shape
+    operation preallocate(const shape& s, const std::string& id) const;
 };

 #else
@@ -34,7 +36,8 @@ struct allocation_model
 interface('allocation_model',
    virtual('name', returns='std::string', const=True),
    virtual('copy', returns='std::string', const=True),
-    virtual('allocate', s='const shape&', returns='operation', const=True)
+    virtual('allocate', s='const shape&', returns='operation', const=True),
+    virtual('preallocate', s='const shape&', id='std::string', returns='operation', const=True)
 )
 %>


--- a/tools/include/operation.hpp
+++ b/tools/include/operation.hpp
@@ -15,6 +15,7 @@
 #include <migraphx/module_ref.hpp>
 #include <migraphx/serialize.hpp>
 #include <migraphx/auto_any_cast.hpp>
+#include <migraphx/lifetime.hpp>
 #include <migraphx/config.hpp>

 namespace migraphx {
@@ -435,9 +436,9 @@ void from_value_op(T& x, const value& v)
 }

 template <class T>
-bool is_borrowed_op(const T&)
+lifetime get_lifetime_op(const T&)
 {
-    return false;
+    return lifetime::local;
 }

 } // namespace detail
@@ -453,7 +454,8 @@ bool is_borrowed_op(const T&)
             const   = True,
             default = 'detail::need_normalization_op'),
     virtual('has_finalize', returns = 'bool', const = True, default = 'detail::has_finalize_op'),
-     virtual('is_borrowed', returns = 'bool', const = True, default = 'detail::is_borrowed_op'),
+     virtual(
+         'get_lifetime', returns = 'lifetime', const = True, default = 'detail::get_lifetime_op'),
     virtual('output_alias',
             returns = 'std::ptrdiff_t',
             input   = 'const std::vector<shape>&',