Add hip clang builds to jenkins (#651)

* Make global variables const * Tidy fixes * Disable some lints * Formatting * Fix tidy const * Formatting * Add missing const keywords * Formatting * More fixes * Fix remaining tidy issues * Formatting * Fix rocblas function call * Formatting * Fix nodiscard warnings * Formatting * Use named parameters * Remove overload * Add overload * Remove noncps * Use named param for node * Add auto register header * Use named parameters * Refactor jenkinsfile * Fix shadow * Add missing body variable * Add more const methods * Add hip-clang docker builds * Remove comments * Add clang-format * Add more const * Formatting * Rename stage * Disable check * Add another const * Add python 2 dev packages * Add sphinx to dockerfile

Add hip clang builds to jenkins (#651)
* Make global variables const * Tidy fixes * Disable some lints * Formatting * Fix tidy const * Formatting * Add missing const keywords * Formatting * More fixes * Fix remaining tidy issues * Formatting * Fix rocblas function call * Formatting * Fix nodiscard warnings * Formatting * Use named parameters * Remove overload * Add overload * Remove noncps * Use named param for node * Add auto register header * Use named parameters * Refactor jenkinsfile * Fix shadow * Add missing body variable * Add more const methods * Add hip-clang docker builds * Remove comments * Add clang-format * Add more const * Formatting * Rename stage * Disable check * Add another const * Add python 2 dev packages * Add sphinx to dockerfile
f28a62ea · Paul Fultz II · GitHub · 48fa934d · f28a62ea · f28a62ea
Unverified Commit f28a62ea authored Sep 30, 2020 by Paul Fultz II Committed by GitHub Sep 30, 2020
20 changed files
--- a/src/pass_manager.cpp
+++ b/src/pass_manager.cpp
@@ -17,7 +17,7 @@ inline namespace MIGRAPHX_INLINE_NS {

 void run_passes(program& prog, const std::vector<pass>& passes, tracer trace)
 {
-    for(auto& p : passes)
+    for(const auto& p : passes)
    {
        trace("Pass: ", p.name());
        p.apply(prog);

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -191,7 +191,7 @@ instruction_ref program::insert_instruction(instruction_ref ins,

 instruction_ref program::replace_instruction(instruction_ref ins,
                                             const operation& op,
-                                             std::vector<instruction_ref> args)
+                                             std::vector<instruction_ref> args) MIGRAPHX_TIDY_CONST
 {
    assert(std::all_of(
               args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
@@ -394,7 +394,7 @@ std::vector<shape> program::get_output_shapes() const
    auto last_ins = impl->instructions.back();
    if(last_ins.name() == "@return")
    {
-        auto& output_ins = last_ins.inputs();
+        const auto& output_ins = last_ins.inputs();
        std::vector<shape> output_shapes;
        std::transform(output_ins.begin(),
                       output_ins.end(),

--- a/src/register_op.cpp
+++ b/src/register_op.cpp
@@ -6,7 +6,7 @@ inline namespace MIGRAPHX_INLINE_NS {

 std::unordered_map<std::string, operation>& op_map()
 {
-    static std::unordered_map<std::string, operation> m;
+    static std::unordered_map<std::string, operation> m; // NOLINT
    return m;
 }
 void register_op(const operation& op) { op_map()[op.name()] = op; }

--- a/src/register_target.cpp
+++ b/src/register_target.cpp
@@ -6,7 +6,7 @@ inline namespace MIGRAPHX_INLINE_NS {

 std::unordered_map<std::string, target>& target_map()
 {
-    static std::unordered_map<std::string, target> m;
+    static std::unordered_map<std::string, target> m; // NOLINT
    return m;
 }


--- a/src/schedule.cpp
+++ b/src/schedule.cpp
@@ -332,7 +332,7 @@ struct stream_info
    }

    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
-    find_concurrent_instructions(program& p)
+    find_concurrent_instructions(program& p) const
    {
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
@@ -350,7 +350,7 @@ struct stream_info
            auto streams = this->get_streams(ins);

            // Collect concur instructions for each merge point.
-            for(auto& merge : merge_from[ins])
+            for(const auto& merge : merge_from[ins])
            {
                for(auto stream : streams)
                {

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -15,7 +15,7 @@ struct shape_impl
 {
    static std::shared_ptr<shape_impl> default_shape()
    {
-        static std::shared_ptr<shape_impl> result = std::make_shared<shape_impl>();
+        static const std::shared_ptr<shape_impl> result = std::make_shared<shape_impl>();
        return result;
    }

@@ -235,7 +235,7 @@ std::ostream& operator<<(std::ostream& os, const shape& x)

 shape::type_t shape::parse_type(const std::string& s)
 {
-    static std::unordered_map<std::string, shape::type_t> m = {
+    static const std::unordered_map<std::string, shape::type_t> m = {
 #define MIGRAPHX_SHAPE_GENERATE_TYPE_STRING_MAP(x, t) {#x, x}, {#t, x},
        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_TYPE_STRING_MAP)};
    return m.at(s);

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -939,7 +939,7 @@ struct cpu_apply
        }
    }

-    void apply_cpu_op(instruction_ref ins)
+    void apply_cpu_op(instruction_ref ins) const
    {
        prog->replace_instruction(ins, cpu_op{ins->get_operator()}, ins->inputs());
    }
@@ -957,7 +957,7 @@ struct cpu_apply
        prog->replace_instruction(ins, T{op}, ins->inputs());
    }

-    void apply_pooling(instruction_ref ins)
+    void apply_pooling(instruction_ref ins) const
    {
        auto&& op = any_cast<op::pooling>(ins->get_operator());
        if(op.mode == "max")

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -214,6 +214,7 @@ rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
 rocm_clang_tidy_check(migraphx_gpu)
 # Workaround broken rocblas headers
 target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
+target_compile_options(migraphx_gpu PRIVATE -std=c++17)
 target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
 target_link_libraries(migraphx_gpu PRIVATE migraphx_device)


--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
@@ -12,11 +12,11 @@ namespace device {

 argument gather(hipStream_t stream, argument result, argument arg1, argument arg2, int axis)
 {
-    auto axis_index    = (axis < 0) ? (axis + arg1.get_shape().lens().size()) : axis;
-    auto& input_shape  = arg1.get_shape();
-    auto lens          = input_shape.lens();
-    auto axis_dim_size = lens[axis_index];
-    lens[axis_index]   = arg2.get_shape().elements();
+    auto axis_index         = (axis < 0) ? (axis + arg1.get_shape().lens().size()) : axis;
+    const auto& input_shape = arg1.get_shape();
+    auto lens               = input_shape.lens();
+    auto axis_dim_size      = lens[axis_index];
+    lens[axis_index]        = arg2.get_shape().elements();
    shape out_comp_shape{result.get_shape().type(), lens};
    std::size_t nelements = result.get_shape().elements();


--- a/src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
@@ -44,7 +44,7 @@ struct index
 template <class F>
 __global__ void launcher(F f)
 {
-    index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
+    index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
    f(idx);
 }

@@ -84,6 +84,9 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
    };
 }

+#ifdef MIGRAPHX_USE_CLANG_TIDY
+#define MIGRAPHX_DEVICE_SHARED
+#else
 // Workaround hcc's broken tile_static macro
 #ifdef tile_static
 #undef tile_static
@@ -91,6 +94,7 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
 #else
 #define MIGRAPHX_DEVICE_SHARED __shared__
 #endif
+#endif

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/gemm_impl.cpp
+++ b/src/targets/gpu/gemm_impl.cpp
@@ -26,6 +26,15 @@ rocblas_datatype get_type(shape::type_t type)
    MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!");
 }

+template <class R, class... Ts, class... Us>
+R rocblas_invoke(R (*f)(Ts...), Us... xs)
+{
+    if constexpr(sizeof...(Ts) == sizeof...(Us))
+        return f(xs...);
+    else
+        return f(xs..., nullptr, nullptr);
+}
+
 template <class T>
 void gemm_impl(
    context& ctx, const shape& output_shape, const std::vector<argument>& args, T alpha, T beta)
@@ -75,67 +84,64 @@ void gemm_impl(
            // column-major format. When doing a C = A * B, we actually do
            // C^T = (B^T) * (A^T). That is the reason we input args[1] as
            // A and args[0] as B in calling the rocblas_gemm.
-            rocblas_gemm_ex(ctx.get_stream().get_rocblas(),
-                            transb ? rocblas_operation_transpose : rocblas_operation_none,
-                            transa ? rocblas_operation_transpose : rocblas_operation_none,
-                            n,
-                            m,
-                            k,
-                            &alpha_r,
-                            to_pointer(args.at(1)),
-                            arg_type,
-                            ldb,
-                            to_pointer(args.at(0)),
-                            arg_type,
-                            lda,
-                            &beta_r,
-                            to_pointer(args[2]),
-                            output_type,
-                            ldc,
-                            is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
-                            output_type,
-                            ldc,
-                            compute_type,
-                            rocblas_gemm_algo_standard,
-                            0,
-                            0,
-                            nullptr,
-                            nullptr);
+            rocblas_invoke(&rocblas_gemm_ex,
+                           ctx.get_stream().get_rocblas(),
+                           transb ? rocblas_operation_transpose : rocblas_operation_none,
+                           transa ? rocblas_operation_transpose : rocblas_operation_none,
+                           n,
+                           m,
+                           k,
+                           &alpha_r,
+                           to_pointer(args.at(1)),
+                           arg_type,
+                           ldb,
+                           to_pointer(args.at(0)),
+                           arg_type,
+                           lda,
+                           &beta_r,
+                           to_pointer(args[2]),
+                           output_type,
+                           ldc,
+                           is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
+                           output_type,
+                           ldc,
+                           compute_type,
+                           rocblas_gemm_algo_standard,
+                           0,
+                           0);
        }
        else
        {
-            rocblas_gemm_strided_batched_ex(
-                ctx.get_stream().get_rocblas(),
-                transb ? rocblas_operation_transpose : rocblas_operation_none,
-                transa ? rocblas_operation_transpose : rocblas_operation_none,
-                n,
-                m,
-                k,
-                &alpha_r,
-                to_pointer(args.at(1)),
-                arg_type,
-                ldb,
-                k * n,
-                to_pointer(args.at(0)),
-                arg_type,
-                lda,
-                m * k,
-                &beta_r,
-                to_pointer(args[2]),
-                output_type,
-                ldc,
-                m * n,
-                is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
-                output_type,
-                ldc,
-                m * n,
-                num_matrices,
-                compute_type,
-                rocblas_gemm_algo_standard,
-                0,
-                0,
-                nullptr,
-                nullptr);
+            rocblas_invoke(&rocblas_gemm_strided_batched_ex,
+                           ctx.get_stream().get_rocblas(),
+                           transb ? rocblas_operation_transpose : rocblas_operation_none,
+                           transa ? rocblas_operation_transpose : rocblas_operation_none,
+                           n,
+                           m,
+                           k,
+                           &alpha_r,
+                           to_pointer(args.at(1)),
+                           arg_type,
+                           ldb,
+                           k * n,
+                           to_pointer(args.at(0)),
+                           arg_type,
+                           lda,
+                           m * k,
+                           &beta_r,
+                           to_pointer(args[2]),
+                           output_type,
+                           ldc,
+                           m * n,
+                           is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
+                           output_type,
+                           ldc,
+                           m * n,
+                           num_matrices,
+                           compute_type,
+                           rocblas_gemm_algo_standard,
+                           0,
+                           0);
        }
    });
 }

--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -139,7 +139,12 @@ void set_device(std::size_t id)
        MIGRAPHX_THROW("Error setting device");
 }

-void gpu_sync() { hipDeviceSynchronize(); }
+void gpu_sync()
+{
+    auto status = hipDeviceSynchronize();
+    if(status != hipSuccess)
+        MIGRAPHX_THROW("hip device synchronization failed: " + hip_error(status));
+}

 void hip_async_copy(context& ctx, const argument& src, const argument& dst, hipMemcpyKind kind)
 {

--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -37,7 +37,7 @@ struct hip_device

        stream(std::size_t device_number) : id(device_number) {}

-        void setup() { set_device(id); }
+        void setup() const { set_device(id); }

        static hip_stream_ptr create_stream()
        {

--- a/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
@@ -78,8 +78,8 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
    migraphx::shape batch_shape{arg_shape.type(), batch_lens};

    hip_visit_all(arg, arg_shape, batch_shape)([&](auto input, auto arg_s, auto batch_s) {
-        auto output = device_cast(result.get<int64_t>().data());
-        using type  = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
+        auto* output = device_cast(result.get<int64_t>().data());
+        using type   = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
        // use one block for items in one batch.
        const size_t max_block_size  = 256;
        const std::size_t block_size = compute_block_size(batch_item_num, max_block_size);

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -48,7 +48,7 @@ struct miopen_apply
    instruction_ref last{};
    std::unordered_map<instruction_ref, std::string> prog_output_names{};

-    context& get_context()
+    context& get_context() const
    {
        assert(pass != nullptr);
        assert(pass->ctx != nullptr);
@@ -67,7 +67,7 @@ struct miopen_apply
        this->last = instruction::get_output_alias(std::prev(prog->end()));
        if(this->last->name() == "@return")
        {
-            auto& prog_outputs = last->inputs();
+            const auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
@@ -178,11 +178,11 @@ struct miopen_apply
        auto ret = std::prev(prog->end());
        if(ret->name() == "@return")
        {
-            auto& inputs = ret->inputs();
+            const auto& inputs = ret->inputs();

            // each input of ret need to be copied from gpu to host, and replace
            // output with copy output
-            for(auto& in : inputs)
+            for(const auto& in : inputs)
            {
                auto p_output = prog->insert_instruction(ret, hip_copy_from_gpu{}, in);
                instruction::replace_argument(ret, in, p_output);

--- a/src/targets/gpu/schedule_model.cpp
+++ b/src/targets/gpu/schedule_model.cpp
@@ -26,7 +26,7 @@ struct record_event
        return {};
    }

-    void finalize(context& ctx, const shape&, const std::vector<shape>&)
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) const
    {
        ctx.create_events(event);
    }
@@ -66,7 +66,10 @@ struct set_stream
        ctx.set_stream(stream);
        return {};
    }
-    void finalize(context& ctx, const shape&, const std::vector<shape>&) { ctx.set_stream(stream); }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) const
+    {
+        ctx.set_stream(stream);
+    }
 };

 MIGRAPHX_REGISTER_OP(record_event)
@@ -112,7 +115,7 @@ static std::unordered_map<std::string, std::size_t> create_weight_map()

 static const std::unordered_map<std::string, std::size_t>& weight_map()
 {
-    static std::unordered_map<std::string, std::size_t> m = create_weight_map();
+    static const std::unordered_map<std::string, std::size_t> m = create_weight_map();
    return m;
 }


--- a/src/value.cpp
+++ b/src/value.cpp
@@ -170,7 +170,7 @@ value& value::operator=(std::nullptr_t)
 bool value::is_array() const { return x ? x->get_type() == array_type : false; }
 const std::vector<value>& value::value::get_array() const
 {
-    auto* r = this->if_array();
+    const auto* r = this->if_array();
    assert(r);
    return *r;
 }
@@ -179,7 +179,7 @@ const std::vector<value>* value::if_array() const { return x ? x->if_array() : n
 bool value::is_object() const { return x ? x->get_type() == object_type : false; }
 const std::vector<value>& value::get_object() const
 {
-    auto* r = this->if_object();
+    const auto* r = this->if_object();
    assert(r);
    return *r;
 }
@@ -236,7 +236,7 @@ value* value::find(const std::string& pkey) { return find_impl(x, pkey); }
 const value* value::find(const std::string& pkey) const { return find_impl(x, pkey); }
 bool value::contains(const std::string& pkey) const
 {
-    auto it = find(pkey);
+    const auto* it = find(pkey);
    if(it == nullptr)
        return false;
    if(it == end())
@@ -308,7 +308,7 @@ value& value::at(const std::string& pkey)
 }
 const value& value::at(const std::string& pkey) const
 {
-    auto* r = find(pkey);
+    const auto* r = find(pkey);
    if(r == nullptr)
        MIGRAPHX_THROW("Not an object for field: " + pkey);
    if(r == end())

--- a/test/gpu/test_utils.hpp
+++ b/test/gpu/test_utils.hpp
@@ -33,6 +33,7 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_GPU_COMPILE)

 static std::array<std::function<void()>, 2>& handlers()
 {
+    // NOLINTNEXTLINE
    static std::array<std::function<void()>, 2> r = {};
    return r;
 };
@@ -57,6 +58,7 @@ struct auto_print
 {
    static void set_terminate_handler(const std::string& name)
    {
+        // NOLINTNEXTLINE
        static std::string pname;
        pname = name;
        std::set_terminate(+[] {
@@ -218,7 +220,7 @@ int auto_register_verify_program()
 template <class T>
 struct verify_program
 {
-    static int static_register;
+    static const int static_register;
    // This typedef ensures that the static member will be instantiated if
    // the class itself is instantiated
    using static_register_type =
@@ -226,6 +228,6 @@ struct verify_program
 };

 template <class T>
-int verify_program<T>::static_register = auto_register_verify_program<T>(); // NOLINT
+const int verify_program<T>::static_register = auto_register_verify_program<T>(); // NOLINT

 #endif
--- a/test/include/rob.hpp
+++ b/test/include/rob.hpp
@@ -10,18 +10,22 @@
 template <class Tag>
 struct stowed
 {
+    // NOLINTNEXTLINE
    static typename Tag::type value;
 };
 template <class Tag>
+// NOLINTNEXTLINE
 typename Tag::type stowed<Tag>::value;

 template <class Tag, typename Tag::type X>
 struct stow_private
 {
    stow_private() noexcept { stowed<Tag>::value = X; }
+    // NOLINTNEXTLINE
    static stow_private instance;
 };
 template <class Tag, typename Tag::type X>
+// NOLINTNEXTLINE
 stow_private<Tag, X> stow_private<Tag, X>::instance;

 template <class C, class T>

--- a/test/include/test.hpp
+++ b/test/include/test.hpp
@@ -262,6 +262,7 @@ string_map parse(std::vector<std::string> as, Keyword keyword)

 inline auto& get_test_cases()
 {
+    // NOLINTNEXTLINE
    static std::vector<std::pair<std::string, std::function<void()>>> cases;
    return cases;
 }