Merge branch 'dyn_dim_onnx_parser' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_conv

d5636acd · charlie · fff09052 · 5f215b71 · d5636acd · d5636acd
Commit d5636acd authored Jul 08, 2022 by charlie
20 changed files
--- a/src/targets/gpu/code_object_op.cpp
+++ b/src/targets/gpu/code_object_op.cpp
@@ -52,7 +52,7 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
    std::transform(
        args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
    k.launch(ctx.get_stream().get(), global, local, std::move(kargs));
-    return args.back();
+    return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
 {

--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
@@ -43,6 +43,9 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)

 vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
 {
+    if(std::all_of(
+           inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; }))
+        return {1, axis};
    auto sizes = vector_sizes(inputs);
    std::vector<std::size_t> max_vec_size;
    std::transform(inputs.begin(),

--- a/src/targets/gpu/compile_hip.cpp
+++ b/src/targets/gpu/compile_hip.cpp
@@ -43,6 +43,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG_SYM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
@@ -227,6 +228,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
    if(params.find("-std=") == std::string::npos)
        params += " --std=c++17";
    params += " -fno-gpu-rdc";
+    if(enabled(MIGRAPHX_GPU_DEBUG_SYM{}))
+        params += " -g";
    params += " -c";
    if(is_hcc_compiler())
    {

--- a/src/targets/gpu/deconvolution.cpp
+++ b/src/targets/gpu/deconvolution.cpp
@@ -59,31 +59,30 @@ argument miopen_deconvolution::compute(context& ctx,
    auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
    auto y_desc = make_tensor(reshape_if_1d(output_shape));

-    float alpha = 1;
-    float beta  = 0;
-    auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
-                                           &alpha,
-                                           x_desc.get(),
-                                           args[0].implicit(),
-                                           w_desc.get(),
-                                           args[1].implicit(),
-                                           cd.get(),
-                                           algo,
-                                           &beta,
-                                           y_desc.get(),
-                                           args[3].implicit(),
-                                           args[2].implicit(),
-                                           args[2].get_shape().bytes());
+    if(solution_id == 0)
+        MIGRAPHX_THROW("MIOpen Deconvolution: invalid solution ID");
+
+    auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
+                                                    w_desc.get(),
+                                                    args[1].implicit(),
+                                                    x_desc.get(),
+                                                    args[0].implicit(),
+                                                    cd.get(),
+                                                    y_desc.get(),
+                                                    args[3].implicit(),
+                                                    args[2].implicit(),
+                                                    args[2].get_shape().bytes(),
+                                                    solution_id);
+
    if(status != miopenStatusSuccess)
-        MIGRAPHX_THROW("Running deconvolution failed");
+        MIGRAPHX_THROW("MIOpen Deconvolution: running convolution failed");
    return args[3];
 }

-shape miopen_deconvolution::compile(context& ctx,
-                                    const shape& output_shape,
-                                    std::vector<shape> inputs)
+shape miopen_deconvolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
 {
    shape workspace_shape{};
+
    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
    auto y_desc = make_tensor(reshape_if_1d(output_shape));
@@ -119,9 +118,35 @@ shape miopen_deconvolution::compile(context& ctx,
                                                        workspace_size,
                                                        false);
    if(status != miopenStatusSuccess)
-        MIGRAPHX_THROW("Find deconvolution failed");
-    handle = ctx.get_stream().get_miopen();
-    algo   = perf.fwd_algo;
+        MIGRAPHX_THROW("MIOpen Deconvolution: find convolution failed");
+    algo = perf.fwd_algo;
+
+    size_t solution_count;
+
+    status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
+                                                      w_desc.get(),
+                                                      x_desc.get(),
+                                                      cd.get(),
+                                                      y_desc.get(),
+                                                      &solution_count);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: get solution count failed");
+
+    std::vector<miopenConvSolution_t> solutions(solution_count);
+
+    status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
+                                                 w_desc.get(),
+                                                 x_desc.get(),
+                                                 cd.get(),
+                                                 y_desc.get(),
+                                                 solution_count,
+                                                 &solution_count,
+                                                 solutions.data());
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: get solution failed");
+
+    solution_id = solutions.front().solution_id;
+
    return shape{shape::int8_type, {perf.memory}};
 }

@@ -129,13 +154,29 @@ void miopen_deconvolution::finalize(context& ctx,
                                    const shape& output_shape,
                                    std::vector<shape> inputs)
 {
-    if(handle == ctx.get_stream().get_miopen())
-        return;
-    // Check that workspace hasn't changed
-    auto size = inputs.at(2).bytes();
-    auto ws   = compile(ctx, output_shape, std::move(inputs));
-    if(ws.bytes() > size)
-        MIGRAPHX_THROW("Workspace has changed during finalization.");
+    if(cd == nullptr)
+        cd = make_deconv(op);
+    if(solution_id == 0)
+    {
+        // Check that workspace hasn't changed
+        auto size = inputs.at(2).bytes();
+        auto ws   = find(ctx, output_shape, inputs);
+        if(ws.bytes() > size)
+            MIGRAPHX_THROW("MIOpen Deconvolution: workspace has changed during finalization.");
+    }
+
+    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
+    auto y_desc = make_tensor(reshape_if_1d(output_shape));
+
+    auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
+                                                          w_desc.get(),
+                                                          x_desc.get(),
+                                                          cd.get(),
+                                                          y_desc.get(),
+                                                          solution_id);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: compile solution failed");
 }

 } // namespace gpu

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -336,6 +336,7 @@ void move_standard_front(std::vector<instruction_ref>& args)

 auto gpu_name(const std::string& s) { return match::name("gpu::" + s); }

+namespace {
 struct find_layernorm
 {
    auto matcher() const { return match::layernorm(&gpu_name); }
@@ -836,15 +837,6 @@ inline auto precompile_name(std::string s) // NOLINT
    });
 }

-template <class... Ms>
-auto conv_bias_pointwise(Ms... ms)
-{
-    return precompile_name("pointwise")(
-        match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
-                                fusable_conv(match::used_once()).bind("conv")),
-        ms...);
-}
-
 struct find_conv_bias
 {
    context* ctx = nullptr;
@@ -1013,6 +1005,7 @@ struct find_commutative_broadcast
        m.replace_instruction(ins, ins->get_operator(), args);
    }
 };
+} // namespace

 struct find_contiguous
 {

--- a/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
@@ -38,12 +38,13 @@ struct context;

 struct code_object_op
 {
-    value::binary code_object;
-    std::string symbol_name;
-    std::size_t global;
-    std::size_t local;
-    std::vector<shape> expected_inputs;
-    shape output;
+    value::binary code_object{};
+    std::string symbol_name = "";
+    std::size_t global      = 0;
+    std::size_t local       = 0;
+    std::vector<shape> expected_inputs{};
+    shape output{};
+    std::int64_t output_arg = -1;
    kernel k{};

    template <class Self, class F>
@@ -66,9 +67,13 @@ struct code_object_op
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
    void finalize(context&, const shape&, const std::vector<shape>&);
+    std::int64_t get_output_arg(std::size_t n) const
+    {
+        return output_arg < 0 ? n + output_arg : output_arg;
+    }
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {
-        return shapes.size() - 1;
+        return get_output_arg(shapes.size());
    }

    friend std::ostream& operator<<(std::ostream& os, const code_object_op& op)

--- a/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
@@ -39,20 +39,20 @@ struct miopen_deconvolution
    op::deconvolution op;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
-    miopenHandle_t handle = nullptr;
+    uint64_t solution_id = 0;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        // TODO: Add algo
-        return op::convolution::reflect(self.op, f);
+        return pack_join(op::deconvolution::reflect(self.op, f),
+                         pack(f(self.solution_id, "solution_id")));
    }

    std::string name() const { return "gpu::deconv"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {

--- a/src/targets/gpu/include/migraphx/gpu/mlir_conv.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/mlir_conv.hpp
@@ -21,8 +21,8 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP
-#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP
+#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
+#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP

 #include <migraphx/config.hpp>
 #include <migraphx/gpu/context.hpp>
@@ -30,18 +30,19 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-struct module;
+struct module_pass_manager;

 namespace gpu {
-struct mlir_conv
+
+struct fuse_mlir
 {
-    context* ctx;
-    std::string name() const { return "mlir::convolution"; }
-    void apply(module& m) const;
+    context* ctx = nullptr;
+    std::string name() const { return "gpu::fuse_mlir"; }
+    void apply(module_pass_manager& mpm) const;
 };

 } // namespace gpu
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
-
-#endif
+#endif // MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
--- a/src/targets/gpu/include/migraphx/gpu/mlir.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/mlir.hpp
--- a/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
@@ -41,7 +41,7 @@ struct miopen_quant_convolution
    bool int8_x4_format = false;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
-    miopenHandle_t handle = nullptr;
+    uint64_t solution_id = 0;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -55,7 +55,7 @@ struct miopen_quant_convolution
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {

--- a/src/targets/gpu/jit/mlir.cpp
+++ b/src/targets/gpu/jit/mlir.cpp
--- a/src/targets/gpu/jit/softmax.cpp
+++ b/src/targets/gpu/jit/softmax.cpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
--- a/src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp