Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

7dc6e3ae · Khalique Ahmed · f94d77fc · a275f590 · 7dc6e3ae · 7dc6e3ae
Commit 7dc6e3ae authored Sep 17, 2021 by Khalique Ahmed
20 changed files
--- a/src/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp
+++ b/src/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp
+#ifndef MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
+#define MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/auto_register.hpp>
+#include <migraphx/type_name.hpp>
+#include <migraphx/gpu/driver/parser.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+using action_function = std::function<void(const parser&, const value&)>;
+
+action_function get_action(const std::string& name);
+void register_action(const std::string& name, const action_function& a);
+
+struct auto_register_action
+{
+    template <class T>
+    static void apply()
+    {
+        auto name = get_type_name<T>();
+        register_action(name.substr(name.rfind("::") + 2),
+                        [](auto&&... xs) { T::apply(std::forward<decltype(xs)>(xs)...); });
+    }
+};
+
+template <class T>
+using action = auto_register<auto_register_action, T>;
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
--- a/src/targets/gpu/driver/include/migraphx/gpu/driver/parser.hpp
+++ b/src/targets/gpu/driver/include/migraphx/gpu/driver/parser.hpp
+#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
+#define MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
+
+#include <migraphx/value.hpp>
+#include <migraphx/shape.hpp>
+
+#include <unordered_map>
+#include <functional>
+#include <vector>
+#include <string>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+[[noreturn]] void error(const std::string& msg);
+
+struct parser
+{
+    parser() = default;
+
+    template <class T>
+    T get(const value& v, const std::string& key, const T& default_value) const
+    {
+        return v.get(key, settings.get(key, default_value));
+    }
+
+    shape parse_shape(const value& v) const;
+
+    std::vector<shape> parse_shapes(const value& v) const;
+
+    void load_settings(const value& v);
+
+    static void process(const value& v);
+
+    private:
+    value settings = value::object{};
+};
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
--- a/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+++ b/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
+#define MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/operation.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n = 100);
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
--- a/src/targets/gpu/driver/main.cpp
+++ b/src/targets/gpu/driver/main.cpp
+#include <migraphx/gpu/driver/parser.hpp>
+#include <migraphx/json.hpp>
+#include <migraphx/convert_to_json.hpp>
+#include <migraphx/file_buffer.hpp>
+#include <iostream>
+
+using namespace migraphx;              // NOLINT
+using namespace migraphx::gpu;         // NOLINT
+using namespace migraphx::gpu::driver; // NOLINT
+
+int main(int argc, char const* argv[])
+{
+    std::vector<std::string> args(argv, argv + argc);
+    if(args.size() < 2)
+    {
+        std::cout << "Usage: gpu-driver <input-file>" << std::endl;
+        std::abort();
+    }
+    auto v = from_json_string(convert_to_json(read_string(args[1])));
+    parser::process(v);
+}
--- a/src/targets/gpu/driver/parser.cpp
+++ b/src/targets/gpu/driver/parser.cpp
+#include <migraphx/gpu/driver/parser.hpp>
+#include <migraphx/gpu/driver/action.hpp>
+#include <iostream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+[[noreturn]] void error(const std::string& msg)
+{
+    std::cout << msg << std::endl;
+    std::abort();
+}
+
+shape parser::parse_shape(const value& v) const
+{
+    auto lens    = get(v, "lens", std::vector<std::size_t>{});
+    auto strides = get(v, "strides", std::vector<std::size_t>{});
+    auto type    = shape::parse_type(get<std::string>(v, "type", "float"));
+    if(strides.empty())
+        return shape{type, lens};
+    else
+        return shape{type, lens, strides};
+}
+
+std::vector<shape> parser::parse_shapes(const value& v) const
+{
+    std::vector<shape> result;
+    std::transform(
+        v.begin(), v.end(), std::back_inserter(result), [&](auto&& x) { return parse_shape(x); });
+    return result;
+}
+
+void parser::load_settings(const value& v)
+{
+    if(v.contains("settings"))
+        settings = v.at("settings");
+}
+
+void parser::process(const value& v)
+{
+    if(not v.is_object())
+        error("Input is not an object");
+    parser p{};
+    p.load_settings(v);
+    for(auto&& pp : v)
+    {
+        if(pp.get_key() == "settings")
+            continue;
+        get_action(pp.get_key())(p, pp.without_key());
+    }
+}
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/driver/perf.cpp
+++ b/src/targets/gpu/driver/perf.cpp
+#include <migraphx/gpu/driver/perf.hpp>
+#include <migraphx/context.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/time.hpp>
+#include <migraphx/gpu/hip.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsigned long seed = 0)
+{
+    std::vector<argument> args;
+    std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](auto& s) {
+        return to_gpu(generate_argument(s, seed++));
+    });
+    return args;
+}
+
+using milliseconds = std::chrono::duration<double, std::milli>;
+double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n)
+{
+    // TODO: Use std::ref
+    migraphx::context gctx = ctx;
+    auto output            = op.compute_shape(inputs);
+    op.finalize(gctx, output, inputs);
+    auto args = generate_arguments(inputs);
+    auto run  = [&] {
+        op.compute(gctx, output, args);
+        gctx.finish();
+    };
+    run();
+    auto r   = range(n);
+    double t = std::accumulate(
+        r.begin(), r.end(), double{0.0}, [&](auto x, auto) { return x + time<milliseconds>(run); });
+    return t / n;
+}
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/driver/run_op.cpp
+++ b/src/targets/gpu/driver/run_op.cpp
+#include <migraphx/gpu/driver/action.hpp>
+#include <migraphx/gpu/driver/perf.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/make_op.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace driver {
+
+struct run_op : action<run_op>
+{
+    static void apply(const parser& p, const value& v)
+    {
+        context ctx;
+        auto inputs = p.parse_shapes(v.at("inputs"));
+        auto name   = v.at("name").to<std::string>();
+        if(not contains(name, "::"))
+            name = "gpu::" + name;
+        auto op  = make_op(name);
+        double t = time_op(ctx, op, inputs);
+        std::cout << op << ": " << t << "ms" << std::endl;
+    }
+};
+
+} // namespace driver
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -169,12 +169,26 @@ void gpu_copy(context& ctx, const argument& src, const argument& dst)

 void copy_to_gpu(context& ctx, const argument& src, const argument& dst)
 {
-    gpu_copy(ctx, register_on_gpu(src), dst);
+    if(src.get_shape() == dst.get_shape() and dst.get_shape().packed())
+    {
+        hip_async_copy(ctx, src, dst, hipMemcpyHostToDevice);
+    }
+    else
+    {
+        gpu_copy(ctx, register_on_gpu(src), dst);
+    }
 }

 void copy_from_gpu(context& ctx, const argument& src, const argument& dst)
 {
-    gpu_copy(ctx, src, register_on_gpu(dst));
+    if(src.get_shape() == dst.get_shape() and dst.get_shape().packed())
+    {
+        hip_async_copy(ctx, src, dst, hipMemcpyDeviceToHost);
+    }
+    else
+    {
+        gpu_copy(ctx, src, register_on_gpu(dst));
+    }
 }

 argument get_preallocation(context& ctx, const std::string& id)

--- a/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
@@ -51,6 +51,7 @@ struct code_object_op
        os << "symbol_name=" << op.symbol_name << ",";
        os << "global=" << op.global << ",";
        os << "local=" << op.local << ",";
+        os << "]";
        return os;
    }
 };

--- a/src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
@@ -14,8 +14,9 @@ struct hip_compile_options
    std::size_t local;
    std::vector<shape> inputs;
    shape output;
-    std::string kernel_name = "kernel";
-    std::string params      = "";
+    std::string kernel_name           = "kernel";
+    std::string params                = "";
+    std::vector<shape> reduced_inputs = {};
 };

 operation compile_hip_code_object(const std::string& content, hip_compile_options options);

--- a/src/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp
+#ifndef MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
+#define MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/operation.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+operation
+compile_pointwise(context& ctx, const std::vector<shape>& inputs, const std::string& lambda);
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
--- a/src/targets/gpu/include/migraphx/gpu/device/fill.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/fill.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void fill(hipStream_t stream, const argument& result, unsigned long val);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/topk.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/topk.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument topk_smallest(hipStream_t stream,
+                       const argument& val_res,
+                       const argument& ind_res,
+                       const argument& arg,
+                       int64_t k,
+                       int64_t axis);
+
+argument topk_largest(hipStream_t stream,
+                      const argument& val_res,
+                      const argument& ind_res,
+                      const argument& arg,
+                      int64_t k,
+                      int64_t axis);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/device/where.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/where.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+void where(hipStream_t stream,
+           const argument& result,
+           const argument& arg0,
+           const argument& arg1,
+           const argument& arg2);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/loop.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/loop.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_LOOP_HPP
+#define MIGRAPHX_GUARD_RTGLIB_LOOP_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/reflect.hpp>
+#include <migraphx/op/loop.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct hip_loop
+{
+    op::loop op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
+    std::string name() const { return "gpu::loop"; }
+    shape compute_shape(std::vector<shape> inputs, std::vector<module_ref> mods) const;
+    argument
+    compute(context& ctx,
+            const shape& output_shape,
+            const std::vector<argument>& args,
+            const std::vector<module_ref>& mods,
+            const std::function<std::vector<argument>(
+                module_ref&, const std::unordered_map<std::string, argument>&)>& run) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
--- a/src/targets/gpu/include/migraphx/gpu/topk.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/topk.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_TOPK_HPP
+#define MIGRAPHX_GUARD_RTGLIB_TOPK_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/reflect.hpp>
+#include <migraphx/op/topk.hpp>
+#include <migraphx/gpu/miopen.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct hip_topk
+{
+    op::topk op;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+
+    std::string name() const { return "gpu::topk"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/where.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/where.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_WHERE_HPP
+#define MIGRAPHX_GUARD_RTGLIB_WHERE_HPP
+
+#include <migraphx/gpu/oper.hpp>
+#include <migraphx/gpu/device/where.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct hip_where : ternary_device<hip_where, device::where>
+{
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(4).same_dims();
+        auto s1 = inputs.at(1);
+        auto s2 = inputs.at(2);
+        if(s1 == s2 and s1.packed())
+        {
+            return s1;
+        }
+        else if(s1.packed() != s2.packed())
+        {
+            return s1.packed() ? s1 : s2;
+        }
+        else if(s1.broadcasted() != s2.broadcasted())
+        {
+            return s1.broadcasted() ? s2.with_lens(s1.lens()) : s1.with_lens(s1.lens());
+        }
+        else
+        {
+            return {s1.type(), s1.lens()};
+        }
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/int8_conv_pack.cpp
+++ b/src/targets/gpu/int8_conv_pack.cpp
@@ -5,10 +5,25 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

+shape pack_int8_shape(const shape& s)
+{
+    if(s.type() != shape::int8_type)
+    {
+        MIGRAPHX_THROW("PACK_INT8_ARGS: only process int8_type");
+    }
+
+    auto lens    = s.lens();
+    auto strides = s.strides();
+    lens[1]      = (lens[1] + 3) / 4 * 4;
+    strides[0]   = strides[1] * lens[1];
+
+    return {s.type(), lens, strides};
+}
+
 shape miopen_int8_conv_pack::compute_shape(const std::vector<shape>& inputs) const
 {
    check_shapes{{inputs.at(0)}, *this}.has(1).standard();
-    return inputs.at(0);
+    return pack_int8_shape(inputs.at(0));
 }

 argument

--- a/src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
@@ -43,6 +43,59 @@ constexpr bool is_sorted(Iterator first, Iterator last, Compare comp)
    return is_sorted_until(first, last, comp) == last;
 }

+template <class Iterator, class F>
+constexpr F for_each(Iterator first, Iterator last, F f)
+{
+    for(; first != last; ++first)
+    {
+        f(*first);
+    }
+    return f;
+}
+
+template <class Iterator, class Predicate>
+constexpr Iterator find_if(Iterator first, Iterator last, Predicate p)
+{
+    for(; first != last; ++first)
+    {
+        if(p(*first))
+        {
+            return first;
+        }
+    }
+    return last;
+}
+
+template <class Iterator, class T>
+constexpr Iterator find(Iterator first, Iterator last, const T& value)
+{
+    return find_if(first, last, [&](const auto& x) { return x == value; });
+}
+
+template <class Iterator1, class Iterator2>
+constexpr Iterator1 search(Iterator1 first, Iterator1 last, Iterator2 s_first, Iterator2 s_last)
+{
+    for(;; ++first)
+    {
+        Iterator1 it = first;
+        for(Iterator2 s_it = s_first;; ++it, ++s_it)
+        {
+            if(s_it == s_last)
+            {
+                return first;
+            }
+            if(it == last)
+            {
+                return last;
+            }
+            if(!(*it == *s_it))
+            {
+                break;
+            }
+        }
+    }
+}
+
 } // namespace migraphx

 #endif