Somehow this verify test works

* Changed the allocates to occur in the submodules * Incomplete, as the use_local_alloc variable in module does not work properly * added a hip::sync_stream before the return * not sure why the hip::sync_stream gets rid of the dangling reference error (code-wise it's because hip::sync_stream's output alias is -1)

Somehow this verify test works
* Changed the allocates to occur in the submodules * Incomplete, as the use_local_alloc variable in module does not work properly * added a hip::sync_stream before the return * not sure why the hip::sync_stream gets rid of the dangling reference error (code-wise it's because hip::sync_stream's output alias is -1)
996426be · charlie · fe995d05 · 996426be · 996426be · 996426be
Commit 996426be authored Feb 14, 2023 by charlie
5 changed files
--- a/src/include/migraphx/module.hpp
+++ b/src/include/migraphx/module.hpp
@@ -54,6 +54,8 @@ using ins_dep_map   = std::unordered_map<instruction_ref, std::unordered_set<ins
 */
 struct module
 {
+    bool use_local_alloc = true;
    module(const std::string& name = "");
    // move constructor

--- a/src/include/migraphx/op/select_module.hpp
+++ b/src/include/migraphx/op/select_module.hpp
@@ -90,10 +90,11 @@ struct select_module
        auto module_iter =
            std::find_if(submodule_list.cbegin(), submodule_list.cend(), [&](module_ref mr) {
                auto input_param_names = get_input_parameter_names(mr);
-                return std::equal(args.cbegin(),
+                assert(input_param_names.size() <= args.size());
-                                  args.cend(),
+                return std::equal(input_param_names.cbegin(),
-                                  input_param_names.cbegin(),
+                                  input_param_names.cend(),
-                                  [&](auto a, auto p_name) {
+                                  args.cbegin(),
+                                  [&](auto p_name, auto a) {
                                      return a.get_shape() == mr->get_parameter_shape(p_name);
                                  });
            });
@@ -114,15 +115,15 @@ struct select_module
                       std::inserter(params, params.end()),
                       [](auto&& name, auto&& a) { return std::make_pair(name, a); });
-        // add output parameters (none if on ref)
+        // add output parameters from arguments (none if on ref)
-        auto output_param_names = get_output_parameter_names(module_to_run);
+        // auto output_param_names = get_output_parameter_names(module_to_run);
-        std::transform(output_param_names.begin(),
+        // std::transform(output_param_names.begin(),
-                       output_param_names.end(),
+        //               output_param_names.end(),
-                       std::inserter(params, params.end()),
+        //               args.begin() + input_param_names.size(),
-                       [&module_to_run](auto&& name) {
+        //               std::inserter(params, params.end()),
-                           return std::make_pair(
+        //               [](auto&& name, auto&& a) {
-                               name, argument{module_to_run->get_parameter_shape(name)});
+        //                   return std::make_pair(name, a);
-                       });
+        //               });
        auto results = run(module_to_run, params);
        return argument{results};

--- a/src/replace_allocate.cpp
+++ b/src/replace_allocate.cpp
@@ -104,19 +104,17 @@ void replace_allocate::apply(module& m) const
            continue;
        auto s = ins->get_shape();
+        if(not main_offload_copy and not(m.use_local_alloc) and model.needs_out_params() and
-        if(not main_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
+           contains(mod_output_names, ins))
        {
            auto out_param = m.add_parameter(mod_output_names[ins], s);
            m.replace_instruction(ins, out_param);
-            continue;
        }
+        else
-        m.replace_instruction(
+        {
-            ins,
+            m.replace_instruction(ins,
-            m.insert_instruction(ins,
+                                  make_op(model.name(), migraphx::value{{"shape", to_value(s)}}));
-                                 make_op(model.name(), migraphx::value{{"shape", to_value(s)}})));
+        }
    }
 }

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -361,9 +361,11 @@ struct miopen_apply
        });
    }
+    // This might work, but it's going to have many arguments.
+    // Will need a map between submodules and argument index
+    // Also allocates seperate memory for each batch size...
    // void add_select_module_op()
    //{
-    //    // make maximum buffer size allocation for output parameters
    //    apply_map.emplace("select_module", [=](instruction_ref ins) {
    //        std::vector<instruction_ref> inputs = ins->inputs();
    //        auto mod_args = ins->module_inputs();
@@ -380,20 +382,74 @@ struct miopen_apply
    //    });
    //}
+    // Wrong output parameter shape error with this
+    // Try instead to have allocates occur in the submodule
+    // void add_select_module_op()
+    //{
+    //    // make maximum buffer size allocation for output parameters
+    //    apply_map.emplace("select_module", [=](instruction_ref ins) {
+    //        std::vector<instruction_ref> inputs = ins->inputs();
+    //        auto output_sub_shapes              = ins->get_shape().sub_shapes();
+    //        std::transform(output_sub_shapes.begin(),
+    //                       output_sub_shapes.end(),
+    //                       std::back_inserter(inputs),
+    //                       [&](auto s) {
+    //                           shape max_shape{s.type(), s.max_lens()};
+    //                           return insert_allocation(ins, max_shape);
+    //                       });
+    //        return mod->replace_instruction(ins, ins->get_operator(), inputs,
+    //        ins->module_inputs());
+    //    });
+    //}
+    // do a copy to cpu of the submodule alloc
+    // void add_select_module_op()
+    //{
+    //    apply_map.emplace("select_module", [=](instruction_ref ins) {
+    //        std::vector<instruction_ref> inputs = ins->inputs();
+    //        auto mod_args = ins->module_inputs();
+    //        for(auto smod : mod_args)
+    //        {
+    //            auto last_ins = std::prev(smod->end());
+    //            if(last_ins->name() == "@return")
+    //            {
+    //                const auto& prog_outputs = last_ins->inputs();
+    //                std::vector<instruction_ref> outputs_alias(prog_outputs.size());
+    //                std::transform(prog_outputs.begin(),
+    //                        prog_outputs.end(),
+    //                        outputs_alias.begin(),
+    //                        [](const auto& i) { return instruction::get_output_alias(i); });
+    //                for(auto out_ins : outputs_alias)
+    //                {
+    //                    std::cout << "output_alias_ins: " << out_ins->name() << std::endl;
+    //                    smod->insert_instruction(out_ins->outputs()[0],
+    //                    make_op("hip::copy_from_gpu"), out_ins->inputs()[1]);
+    //                }
+    //            }
+    //        }
+    //        return ins;
+    //    });
+    //}
    void add_select_module_op()
    {
-        // make maximum buffer size allocation for output parameters
        apply_map.emplace("select_module", [=](instruction_ref ins) {
            std::vector<instruction_ref> inputs = ins->inputs();
-            auto output_sub_shapes              = ins->get_shape().sub_shapes();
+            auto mod_args                       = ins->module_inputs();
-            std::transform(output_sub_shapes.begin(),
+            for(auto smod : mod_args)
-                           output_sub_shapes.end(),
+            {
-                           std::back_inserter(inputs),
+                auto last_ins = std::prev(smod->end());
-                           [&](auto s) {
+                if(last_ins->name() == "@return")
-                               shape max_shape{s.type(), s.max_lens()};
+                {
-                               return insert_allocation(ins, max_shape);
+                    for(auto out_ins : last_ins->inputs())
-                           });
+                    {
-            return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
+                        auto sync_out = smod->insert_instruction(
+                            last_ins, make_op("hip::sync_stream"), out_ins);
+                        smod->replace_return({sync_out});
+                    }
+                }
+            }
+            return ins;
        });
    }
 };

--- a/test/verify/test_select_module.cpp
+++ b/test/verify/test_select_module.cpp
@@ -35,7 +35,7 @@ struct test_select_module : verify_program<test_select_module>
        // create batch submodules
        auto create_submodule = [&](std::size_t batch_size, std::string module_name) {
-            auto* submod = p.create_module(module_name);
+            auto submod = p.create_module(module_name);
            migraphx::shape sm_shape{migraphx::shape::float_type, {batch_size, 2, 2}};
            auto sm_input = submod->add_parameter("data", sm_shape);
            auto reduce_ins =