Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into refactor_auto_pad_conv

84add0fc · charlie · c1caf40a · f7d987ba · 84add0fc · 84add0fc
Commit 84add0fc authored Oct 06, 2022 by charlie
9 changed files
--- a/test/gpu/stream_sync.cpp
+++ b/test/gpu/stream_sync.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <iostream>
+#include <vector>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/context.hpp>
+#include <migraphx/gpu/compile_hip.hpp>
+#include <migraphx/gpu/kernel.hpp>
+#include <migraphx/gpu/device_name.hpp>
+#include <migraphx/par_for.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/gpu/target.hpp>
+#include "test.hpp"
+
+using hip_stream_ptr = MIGRAPHX_MANAGE_PTR(hipStream_t, hipStreamDestroy);
+
+constexpr uint32_t stream_sync_test_val = 1337;
+
+// NOLINTNEXTLINE
+const std::string compare_numbers = R"__migraphx__(
+#include <hip/hip_runtime.h>
+
+extern "C" {
+__global__ void compare(float* data) 
+{
+    int i = threadIdx.x + blockDim.x * blockIdx.x;
+    if (data[i] != 1337) 
+    {
+        abort();
+    }
+}
+    
+}
+
+int main() {}
+
+)__migraphx__";
+
+migraphx::src_file make_src_file(const std::string& name, const std::string& content)
+{
+    return {name, std::make_pair(content.data(), content.data() + content.size())};
+}
+
+hip_stream_ptr get_stream()
+{
+    hipStream_t stream;
+
+    auto status = hipStreamCreate(&stream);
+    if(status != hipSuccess)
+    {
+        MIGRAPHX_THROW("Failed to get stream");
+    }
+
+    return hip_stream_ptr{stream};
+}
+
+TEST_CASE(test_stream_sync_compare_kernel)
+{
+    auto binaries = migraphx::gpu::compile_hip_src(
+        {make_src_file("check_stuff.cpp", compare_numbers)}, "", migraphx::gpu::get_device_name());
+    EXPECT(binaries.size() == 1);
+
+    migraphx::gpu::kernel k1{binaries.front(), "compare"};
+
+    auto input =
+        migraphx::fill_argument({migraphx::shape::float_type, {128}}, stream_sync_test_val);
+
+    auto ginput = migraphx::gpu::to_gpu(input);
+
+    hip_stream_ptr pstream = get_stream();
+
+    k1.launch(pstream.get(), input.get_shape().elements(), 1024)(ginput.cast<float>());
+
+    auto output = migraphx::gpu::from_gpu(ginput);
+    EXPECT(output == input);
+}
+
+TEST_CASE(test_stream_sync)
+{
+    auto binaries = migraphx::gpu::compile_hip_src(
+        {make_src_file("check_stuff.cpp", compare_numbers)}, "", migraphx::gpu::get_device_name());
+    EXPECT(binaries.size() == 1);
+
+    migraphx::gpu::kernel k1{binaries.front(), "compare"};
+    const unsigned int m = 128;
+    const unsigned int k = 8192;
+
+    // Setup empty GPU memory buffer
+    migraphx::shape input_shape{migraphx::shape::float_type, {m, k}};
+    migraphx::shape output_shape{migraphx::shape::float_type, {m, m}};
+    auto input  = migraphx::fill_argument(input_shape, 0);
+    auto ginput = migraphx::gpu::to_gpu(input);
+
+    auto output  = migraphx::fill_argument(output_shape, 0);
+    auto goutput = migraphx::gpu::to_gpu(output);
+
+    hip_stream_ptr pstream = get_stream();
+
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    auto x = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {m, k}});
+    auto y = mm->add_literal(
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {k, m}}));
+
+    std::vector<float> data(m * m, stream_sync_test_val);
+    auto test_val = mm->add_literal(output_shape, data);
+    auto mult_out = mm->add_instruction(migraphx::make_op("dot"), x, y);
+    mm->add_instruction(migraphx::make_op("add"), mult_out, test_val);
+
+    p.compile(migraphx::gpu::target{});
+
+    // Run network and then verify with kernel
+    auto args = p.eval({{"x", ginput}, {"output", goutput}}, {pstream.get(), true});
+    k1.launch(pstream.get(), m * m, 1024)(goutput.cast<float>());
+
+    output = migraphx::gpu::from_gpu(goutput);
+    EXPECT(output != input);
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/py/CMakeLists.txt
+++ b/test/py/CMakeLists.txt
@@ -56,4 +56,5 @@ add_py_test(gpu_offload test_gpu_offload.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 add_py_test(gpu test_gpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 add_py_test(array test_array.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 add_py_test(backend onnx_backend_test.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
+add_py_test(gpu_async test_gpu_async.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 endif()
--- a/test/py/test_gpu_async.py
+++ b/test/py/test_gpu_async.py
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+import migraphx
+import ctypes
+
+
+def test_conv_relu():
+    hip = ctypes.cdll.LoadLibrary("libamdhip64.so")
+
+    p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx")
+    print(p)
+    print("Compiling ...")
+    # Need to have offload_copy = False to avoid syncs() back to the host device
+    p.compile(migraphx.get_target("gpu"), offload_copy=False)
+    print(p)
+    params = {}
+
+    # Using default value in api for hipSuccess which is always 0
+    hipSuccess = ctypes.c_long(0)
+
+    # Alloc a stream
+    stream = ctypes.c_void_p()
+
+    err = ctypes.c_long(
+        hip.hipStreamCreateWithFlags(ctypes.byref(stream), ctypes.c_uint(0)))
+
+    if err.value != hipSuccess.value:
+        print("FAILED hipStreamCreate")
+        return err
+
+    # Use to_gpu to push generated argument to the GPU before we perform a run
+    for key, value in p.get_parameter_shapes().items():
+        params[key] = migraphx.to_gpu(migraphx.generate_argument(value))
+
+    result = migraphx.from_gpu(
+        p.run_async(params, stream.value, "ihipStream_t")[-1])
+
+    # Wait for all commands in stream to complete
+    err = ctypes.c_long(hip.hipStreamSynchronize(stream))
+    if err.value != hipSuccess.value:
+        print("FAILED: hipStreamSyncronize")
+        return err
+
+    # Cleanup Stream
+    err = ctypes.c_long(hip.hipStreamDestroy(stream))
+    if err.value != hipSuccess.value:
+        print("FAILED: hipStreamDestroy")
+        return err
+
+    print(result)
+
+
+test_conv_relu()
--- a/test/verify/test_softmax_large3.cpp
+++ b/test/verify/test_softmax_large3.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/common.hpp>
+
+struct test_softmax_large3 : verify_program<test_softmax_large3>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm   = p.get_main_module();
+        auto x     = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {2, 4}});
+        auto large = mm->add_literal({migraphx::shape{migraphx::shape::float_type}, {100}});
+        auto add   = migraphx::add_common_op(*mm, migraphx::make_op("mul"), {x, large});
+        mm->add_instruction(migraphx::make_op("softmax", {{"axis", -1}}), add);
+        return p;
+    }
+};
--- a/tools/accuracy/accuracy_checker.py
+++ b/tools/accuracy/accuracy_checker.py
@@ -116,6 +116,9 @@ def main():

    model = migraphx.parse_onnx(model_name, default_dim_value=batch)

+    if args.verbose:
+        print(model)
+
    model.compile(migraphx.get_target('gpu'), offload_copy=False)

    params = {}

--- a/tools/api.py
+++ b/tools/api.py
@@ -21,7 +21,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-import string, sys, re, runpy
+import string
+import sys
+import re
+import runpy
 from functools import wraps
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union

@@ -308,18 +311,39 @@ class Parameter:
        return self.substitute('${type} ${name}', prefix=prefix)

    def virtual_output_args(self, prefix: Optional[str] = None) -> List[str]:
-        return [
-            '&{prefix}{n}'.format(prefix=prefix or '', n=n)
-            for t, n in self.cparams
-        ]
+        container_type = self.type.remove_generic().basic().str()
+        decl_list: List[str] = []
+        container = (container_type == "std::vector"
+                     or container_type == "vector")
+        for t, n, in self.cparams:
+            if not decl_list and container:
+                decl_list.append('{prefix}{n}.data()'.format(prefix=prefix
+                                                             or '',
+                                                             n=n))
+            else:
+                decl_list.append('&{prefix}{n}'.format(prefix=prefix or '',
+                                                       n=n))
+        return decl_list

    def virtual_output_declarations(self,
                                    prefix: Optional[str] = None) -> List[str]:
-        return [
-            'std::remove_pointer_t<{type}> {prefix}{n};'.format(
-                type=Type(t).str(), prefix=prefix or '', n=n)
-            for t, n in self.cparams
-        ]
+        container_type = self.type.remove_generic().basic().str()
+        container = (container_type == "std::vector"
+                     or container_type == "vector")
+        decl_list: List[str] = []
+        for t, n, in self.cparams:
+            if not decl_list and container:
+                inner_t = self.type.inner_type()
+                if inner_t:
+                    decl_list.append(
+                        'std::array<{inner_t}, 1024> {prefix}{n};'.format(
+                            inner_t=inner_t.str(), prefix=prefix or '', n=n))
+            else:
+                decl_list.append(
+                    'std::remove_pointer_t<{type}> {prefix}{n}'.format(
+                        type=Type(t).str(), prefix=prefix or '', n=n))
+                decl_list[-1] += '=1024;' if container else ';'
+        return decl_list

    def virtual_output(self, prefix: Optional[str] = None) -> str:
        write = self.virtual_write
@@ -694,9 +718,14 @@ def generate_cpp_header() -> str:
                          [c.generate() for c in cpp_classes])


-def cwrap(name: str) -> Callable:
+c_type_map: Dict[str, Type] = {}
+
+
+def cwrap(name: str, c_type: Optional[str] = None) -> Callable:
    def with_cwrap(f):
        type_map[name] = f
+        if c_type:
+            c_type_map[name] = Type(c_type)

        @wraps(f)
        def decorated(*args, **kwargs):
@@ -917,6 +946,9 @@ def vector_c_wrap(p: Parameter) -> None:
    # Not a generic type
    if not inner:
        return
+    if inner.str() in c_type_map:
+        inner = c_type_map[inner.str()]
+
    t = inner.add_pointer()
    if p.type.is_reference():
        if p.type.is_const():
@@ -927,6 +959,12 @@ def vector_c_wrap(p: Parameter) -> None:
            p.add_size_param()
            p.bad_param('${name} == nullptr or ${size} == nullptr',
                        'Null pointer')
+        elif p.virtual:
+            p.add_param(t)
+            p.add_size_param()
+            p.bad_param('${name} == nullptr or ${size} == nullptr',
+                        'Null pointer')
+            p.virtual_write = '{${name}.begin(), ${name}.begin()+${size}}; // cppcheck-suppress returnDanglingLifetime'
        else:
            p.add_param(t)
            p.bad_param('${name} == nullptr', 'Null pointer')
@@ -946,7 +984,7 @@ def vector_c_wrap(p: Parameter) -> None:
        p.write = ['std::copy(${result}.begin(), ${result}.end(), ${name})']


-@cwrap('std::string')
+@cwrap('std::string', 'char*')
 def string_c_wrap(p: Parameter) -> None:
    t = Type('char*')
    if p.returns:
@@ -1061,9 +1099,9 @@ struct ${ctype} {
 c_api_virtual_impl = Template('''
 ${return_type} ${name}(${params}) const
 {
-    ${output_decls}
    if (${fname} == nullptr)
        throw std::runtime_error("${name} function is missing.");
+    ${output_decls}
    std::array<char, 256> exception_msg;
    exception_msg.front() = '\\0';
    auto api_error_result = ${fname}(${args});

--- a/tools/api/api.cpp
+++ b/tools/api/api.cpp
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+#include <migraphx/execution_environment.hpp>
 #include <migraphx/migraphx.h>
 #include <migraphx/rank.hpp>
 #include <migraphx/shape.hpp>
@@ -166,6 +167,13 @@ void set_output_names(tf_options& options, std::vector<const char*> names)
    options.output_node_names = std::vector<std::string>(names.begin(), names.end());
 }

+std::vector<argument>
+run_async(program& p, const parameter_map& params, void* s, std::string_view name)
+{
+    execution_environment exec_env{any_ptr(s, name), true};
+    return p.eval(params, exec_env);
+}
+
 template <class Value>
 std::vector<const char*> get_names(const std::unordered_map<std::string, Value>& m)
 {
@@ -265,11 +273,18 @@ struct experimental_custom_op
 template <class CustomOp>
 struct custom_operation
 {
+
    template <class Self, class F>
    static auto reflect(Self&, F)
    {
        return pack();
    }
+
+    value attributes() const
+    {
+        return {{"custom_op", true}, {"target", op.runs_on_offload_target() ? "gpu" : "cpu"}};
+    }
+
    CustomOp op;
    std::string name() const { return op.xobject.name; }

@@ -284,6 +299,23 @@ struct custom_operation
    {
        return op.compute(std::move(ctx), std::move(output_shape), std::move(inputs));
    }
+
+    std::ptrdiff_t output_alias(std::vector<shape> inputs) const
+    {
+        auto alias_vec = op.output_alias(std::move(inputs));
+        // TODO: For now, only support one output alias
+        if(alias_vec.empty())
+        {
+            return -1;
+        }
+        if(alias_vec.size() > 1)
+        {
+            MIGRAPHX_THROW("Currently, CustomOps in MIGraphX only supports one output_alias");
+        }
+        return alias_vec.front();
+    }
+
+    bool runs_on_offload_target() const { return op.runs_on_offload_target(); }
 };

 template <class CustomOp>

--- a/tools/api/migraphx.h
+++ b/tools/api/migraphx.h
@@ -26,7 +26,6 @@

 #include <stdlib.h>
 #include <stdbool.h>
-
 // Add new types here
 // clang-format off
 #define MIGRAPHX_SHAPE_VISIT_TYPES(m) \

--- a/tools/include/context.hpp
+++ b/tools/include/context.hpp
@@ -66,12 +66,21 @@ any_ptr get_queue_context(T&)
 {
    return {};
 }
+template <class T>
+void wait_for_context(T&, any_ptr)
+{
+}
+
+template <class T>
+void finish_on_context(T&, any_ptr){}

 <%
 interface('context',
           virtual('to_value', returns = 'value', const = True, default = 'to_value_context'),
           virtual('from_value', v = 'const value&', default = 'from_value_context'),
           virtual('get_queue', returns = 'any_ptr', default = 'get_queue_context'),
+           virtual('wait_for', queue = 'any_ptr', returns = 'void', default = 'wait_for_context'),
+           virtual('finish_on', queue = 'any_ptr', returns = 'void', default = 'finish_on_context'),
           virtual('finish', returns = 'void', const = True)) %>

    inline void migraphx_to_value(value& v, const context& ctx)