Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into refine_eliminate_contiguous

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX...
Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into refine_eliminate_contiguous
ef5d7092 · Shucai Xiao · 2e04ea47 · b2051bbc · ef5d7092 · ef5d7092
Commit ef5d7092 authored May 02, 2019 by Shucai Xiao
20 changed files
--- a/src/include/migraphx/op/transpose.hpp
+++ b/src/include/migraphx/op/transpose.hpp
@@ -57,7 +57,7 @@ struct transpose
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
-    int output_alias(const std::vector<shape>&) const { return 0; }
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

 } // namespace op

--- a/src/include/migraphx/op/unsqueeze.hpp
+++ b/src/include/migraphx/op/unsqueeze.hpp
@@ -56,7 +56,7 @@ struct unsqueeze
    {
        return {std::move(output_shape), std::move(args.front().data)};
    }
-    int output_alias(const std::vector<shape>&) const { return 0; }
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

 } // namespace op

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -49,7 +49,7 @@ struct operation
    argument compute(context& ctx, const shape& output, const std::vector<argument>& input) const;
    /// An optional method to return which argument the output will alias. If
    /// there is no aliased output then -1 can be returned.
-    int output_alias(const std::vector<shape>& input) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
    /// An optional stream operator to print the operation. When this is not
    /// implemented, it will just print the operation's name.
    friend std::ostream& operator<<(std::ostream& os, const operation& op);
@@ -175,7 +175,7 @@ auto is_context_free_op(const T& x) -> decltype(is_context_free_op(
 }

 template <class T>
-int output_alias_op(rank<0>, const T&, const std::vector<shape>&)
+std::ptrdiff_t output_alias_op(rank<0>, const T&, const std::vector<shape>&)
 {
    return -1;
 }
@@ -188,7 +188,7 @@ auto output_alias_op(rank<1>, const T& x, const std::vector<shape>& shapes)
 }

 template <class T>
-int output_alias_op(const T& x, const std::vector<shape>& shapes)
+std::ptrdiff_t output_alias_op(const T& x, const std::vector<shape>& shapes)
 {
    return output_alias_op(rank<1>{}, x, shapes);
 }
@@ -239,7 +239,7 @@ auto has_finalize_op(const T&) -> decltype(has_finalize_op(rank<1>{},
 *      std::string name() const;
 *      bool is_context_free() const;
 *      bool has_finalize() const;
- *      int output_alias(const std::vector<shape>& input) const;
+ *      std::ptrdiff_t output_alias(const std::vector<shape>& input) const;
 *      void finalize(context& ctx,const shape& output,const std::vector<shape>& input) ;
 *      shape compute_shape(const std::vector<shape>& input) const;
 *      argument compute(context& ctx,const shape& output,const std::vector<argument>& input) const;
@@ -325,7 +325,7 @@ struct operation
        return (*this).private_detail_te_get_handle().has_finalize();
    }

-    int output_alias(const std::vector<shape>& input) const
+    std::ptrdiff_t output_alias(const std::vector<shape>& input) const
    {
        assert((*this).private_detail_te_handle_mem_var);
        return (*this).private_detail_te_get_handle().output_alias(input);
@@ -380,10 +380,10 @@ struct operation
        virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
        virtual const std::type_info& type() const                                = 0;

-        virtual std::string name() const                                = 0;
-        virtual bool is_context_free() const                            = 0;
-        virtual bool has_finalize() const                               = 0;
-        virtual int output_alias(const std::vector<shape>& input) const = 0;
+        virtual std::string name() const                                           = 0;
+        virtual bool is_context_free() const                                       = 0;
+        virtual bool has_finalize() const                                          = 0;
+        virtual std::ptrdiff_t output_alias(const std::vector<shape>& input) const = 0;
        virtual void
        finalize(context& ctx, const shape& output, const std::vector<shape>& input) = 0;
        virtual shape compute_shape(const std::vector<shape>& input) const           = 0;
@@ -432,7 +432,7 @@ struct operation

        bool has_finalize() const override { return has_finalize_op(private_detail_te_value); }

-        int output_alias(const std::vector<shape>& input) const override
+        std::ptrdiff_t output_alias(const std::vector<shape>& input) const override
        {

            return output_alias_op(private_detail_te_value, input);

--- a/src/include/migraphx/stringutils.hpp
+++ b/src/include/migraphx/stringutils.hpp
@@ -38,8 +38,9 @@ inline std::string join_strings(Strings strings, const std::string& delim)
        return "";

    auto nit = std::next(it);
-    return std::accumulate(
-        nit, strings.end(), *it, [&](std::string x, std::string y) { return x + delim + y; });
+    return std::accumulate(nit, strings.end(), *it, [&](std::string x, std::string y) {
+        return std::move(x) + delim + std::move(y);
+    });
 }

 template <class F>

--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -162,7 +162,24 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
    old->remove_output(*this);
 }

-argument instruction::eval() const
+bool instruction::can_eval() const
+{
+    if(op.name() == "@literal")
+    {
+        return true;
+    }
+    else if(is_context_free(op))
+    {
+        return std::all_of(
+            this->inputs().begin(), this->inputs().end(), [](auto arg) { return arg->can_eval(); });
+    }
+    else
+    {
+        return false;
+    }
+}
+
+argument instruction::eval(bool check_eval) const
 {
    if(op.name() == "@literal")
    {
@@ -170,14 +187,13 @@ argument instruction::eval() const
    }
    if(is_context_free(op))
    {
+        if(check_eval and not this->can_eval())
+            return {};
        std::vector<argument> args;
-        for(auto&& arg : this->inputs())
-        {
-            argument a = arg->eval();
-            if(a.empty())
-                return {};
-            args.push_back(a);
-        }
+        std::transform(this->inputs().begin(),
+                       this->inputs().end(),
+                       std::back_inserter(args),
+                       [](auto arg) { return arg->eval(false); });
        return op.compute(result, args);
    }
    return {};

--- a/src/onnx/cifar10.cpp
+++ b/src/onnx/cifar10.cpp
@@ -32,7 +32,7 @@ auto read_cifar10_images(const std::string& full_path)
            labels[i] = *pimage++;
            for(size_t j = 0; j < nbytes_per_image; j++)
            {
-                float v                        = *(pimage + j) / 255.0f;
+                float v                        = float(*(pimage + j)) / 255.0f;
                data[i * nbytes_per_image + j] = v;
            }
        }

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -207,7 +207,7 @@ struct onnx_parser
    template <class T>
    void add_generic_op(std::string name, T x)
    {
-        add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
+        add_op(name, [this, x](const attribute_map&, std::vector<instruction_ref> args) {
            return prog.add_instruction(x, args);
        });
    }
@@ -215,7 +215,7 @@ struct onnx_parser
    template <class T>
    void add_variadic_op(std::string name, T x)
    {
-        add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
+        add_op(name, [this, x](const attribute_map&, std::vector<instruction_ref> args) {
            return std::accumulate(std::next(args.begin()),
                                   args.end(),
                                   args.front(),

--- a/src/opt/memory_coloring_impl.cpp
+++ b/src/opt/memory_coloring_impl.cpp
@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
        }
    }

-    long long offset = 0;
+    std::size_t offset = 0;
    while(!conflict_queue.empty())
    {
-        live_range* range     = conflict_queue.top();
-        long long iter_offset = range->offset;
+        live_range* range       = conflict_queue.top();
+        std::size_t iter_offset = range->offset;
        if(offset > iter_offset)
        {
            offset = std::max(offset, iter_offset + range->size);
@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
    if(num_of_instrs == 0)
        return;

-    int cur_points        = num_of_instrs * 2;
+    auto cur_points       = num_of_instrs * 2;
    instruction_ref iter  = p_program->end();
    instruction_ref begin = p_program->begin();
    std::vector<instruction_ref> dead_instrs;
@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
                continue;

            std::size_t offset = 0;
-            if(interval->get_offset() == invalid_offset)
+            if(interval->get_offset() != invalid_offset)
            {
-                assert(interval->result.bytes() == 0);
+                offset = interval->get_offset();
            }
            else
            {
-                offset = interval->get_offset();
+                assert(interval->result.bytes() == 0);
            }

            if(is_allocate(ins))
@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
                p_program->replace_instruction(
                    ins, op::load{ins->get_shape(), offset}, scratch_param);
            }
-            else if(is_literal(ins))
-            {
-#if 0                
-                auto pre      = p_program->add_literal(ins->lit);
-                bool pre_copy = (interval->get_begin() < earliest_end_point);
-                p_program->replace_instruction(
-                    ins, write_literal{offset, pre_copy}, scratch_param, pre);
-#endif
-            }
        }
    }
    MIGRAPHX_DEBUG(dump("---After rewrite---"));

--- a/src/opt/memory_coloring_impl.hpp
+++ b/src/opt/memory_coloring_impl.hpp
@@ -21,15 +21,15 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-static const int invalid_offset = -1;
+static const std::size_t invalid_offset = std::numeric_limits<std::size_t>::max();

 struct live_range
 {
-    int begin;        // begin point in the instruction stream.
-    int end;          // end point in the instruction stream.
-    long long offset; // offset to base pointer of allocated memory trunk.
-    int vn;           // value number that identifies this live_range.
-    long long size;   // size of required memory in bytes
+    std::size_t begin;  // begin point in the instruction stream.
+    std::size_t end;    // end point in the instruction stream.
+    std::size_t offset; // offset to base pointer of allocated memory trunk.
+    std::size_t vn;     // value number that identifies this live_range.
+    std::size_t size;   // size of required memory in bytes
 #ifdef MIGRAPHX_DEBUG_OPT
    void dump();
 #endif
@@ -45,9 +45,9 @@ struct live_interval
        is_live_on_entry = false;
    }

-    void add_use(int use) { use_points.push_front(use); }
-    int get_begin() const { return segment.begin; }
-    int get_end() const { return segment.end; }
+    void add_use(std::size_t use) { use_points.push_front(use); }
+    std::size_t get_begin() const { return segment.begin; }
+    std::size_t get_end() const { return segment.end; }
    long long get_offset() const { return segment.offset; }

 #ifdef MIGRAPHX_DEBUG_OPT
@@ -55,9 +55,9 @@ struct live_interval
 #endif

    live_range segment;
-    int id;
-    std::list<int> use_points;
-    int def_point;
+    std::size_t id;
+    std::list<std::size_t> use_points;
+    std::size_t def_point;
    shape result;
    bool is_literal;
    bool is_live_on_entry;
@@ -111,8 +111,8 @@ struct memory_coloring_impl
    {
        if((range1.size == 0) || (range2.size == 0))
            return false;
-        long long end1 = range1.offset + range1.size - 1;
-        long long end2 = range2.offset + range2.size - 1;
+        auto end1 = range1.offset + range1.size - 1;
+        auto end2 = range2.offset + range2.size - 1;
        return ((end1 < range2.offset) || (end2 < range1.offset));
    }
    void verify();
@@ -125,8 +125,8 @@ struct memory_coloring_impl
    {
        bool operator()(const interval_ptr i1, const interval_ptr i2) const
        {
-            int len1 = i1->get_end() - i1->get_begin();
-            int len2 = i2->get_end() - i2->get_begin();
+            auto len1 = i1->get_end() - i1->get_begin();
+            auto len2 = i2->get_end() - i2->get_begin();
            if(len1 != len2)
            {
                return (len1 < len2);
@@ -158,7 +158,7 @@ struct memory_coloring_impl

    int num_of_lives;
    int max_value_number;
-    long long required_bytes;
+    std::size_t required_bytes;
    // The earliest program point where an live interval ends.
    int earliest_end_point;
    // The latest program point where an live interval ends.

--- a/src/propagate_constant.cpp
+++ b/src/propagate_constant.cpp
@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)

 void propagate_constant::apply(program& p) const
 {
-    fix([&](auto self, auto ins) {
-        if(not skip_propogate(ins))
-        {
-            auto r = ins->eval();
-            if(not r.empty())
+    for(auto i : iterator_for(p))
+    {
+        if(i->name() != "@literal")
+            continue;
+        if(i->outputs().empty())
+            continue;
+        fix([&](auto self, auto ins) {
+            std::unordered_set<instruction_ref> children(ins->outputs().begin(),
+                                                         ins->outputs().end());
+            for(auto child : children)
            {
-                assert(r.get_shape() == ins->get_shape());
-                auto l = p.add_literal(r.get_shape(), r.data());
-                p.replace_instruction(ins, l);
-                return;
+                if(skip_propogate(child))
+                {
+                    self(child);
+                    continue;
+                }
+                auto r = child->eval();
+                if(not r.empty())
+                {
+                    assert(r.get_shape() == child->get_shape());
+                    auto l = p.add_literal(r.get_shape(), r.data());
+                    self(p.replace_instruction(child, l));
+                }
            }
-        }
-        std::unordered_set<instruction_ref> children(ins->inputs().begin(), ins->inputs().end());
-        for(auto child : children)
-            self(child);
-    })(std::prev(p.end()));
+        })(i);
+    }
 }

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -117,7 +117,7 @@ struct cpu_lrn
            int channels        = output_shape.lens()[1];
            int height          = output_shape.lens()[2];
            int width           = output_shape.lens()[3];
-            float alphaoverarea = op.alpha / op.size;
+            float alphaoverarea = op.alpha / float(op.size);
            int radius          = (op.size - 1) / 2;

            par_dfor(n_batch, height, width)([&](int b, int h, int w) {
@@ -165,15 +165,15 @@ struct cpu_convolution
                     output_shape.lens()[2],
                     output_shape.lens()[3])(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
-                    const int start_x  = i * op.stride[0] - op.padding[0];
-                    const int start_y  = j * op.stride[1] - op.padding[1];
-                    const int group_id = w / (wei_n / op.group);
+                    const auto start_x  = i * op.stride[0] - op.padding[0];
+                    const auto start_y  = j * op.stride[1] - op.padding[1];
+                    const auto group_id = w / (wei_n / op.group);

                    double acc = 0;
                    dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
-                        const int in_x  = start_x + x;
-                        const int in_y  = start_y + y;
-                        const int in_ch = group_id * wei_c + k;
+                        const auto in_x  = start_x + x;
+                        const auto in_y  = start_y + y;
+                        const auto in_ch = group_id * wei_c + k;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
@@ -209,10 +209,8 @@ struct cpu_im2col
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

-            int kdiv2_h;
-            int kdiv2_w;
-            kdiv2_h = kernel_h / 2;
-            kdiv2_w = kernel_w / 2;
+            auto kdiv2_h = kernel_h / 2;
+            auto kdiv2_w = kernel_w / 2;
            // calculate output sizes
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
@@ -230,8 +228,8 @@ struct cpu_im2col
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
-                        int idx     = iinput + koffset - kdiv2_h;
-                        int jdx     = jinput + loffset - kdiv2_w;
+                        auto idx    = iinput + koffset - kdiv2_h;
+                        auto jdx    = jinput + loffset - kdiv2_w;
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
@@ -642,20 +640,20 @@ struct softmax2d
            auto nw          = input.get_shape().lens()[3];
            dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
                value_type cmax = std::numeric_limits<value_type>::lowest();
-                for(int c = 0; c < nc; c++)
+                for(std::size_t c = 0; c < nc; c++)
                {
                    cmax = std::max(cmax, input(b, c, i, j));
                }
-                for(int c = 0; c < nc; c++)
+                for(std::size_t c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
                }
                value_type sum = value_type(0);
-                for(int c = 0; c < nc; c++)
+                for(std::size_t c = 0; c < nc; c++)
                {
                    sum += output(b, c, i, j);
                }
-                for(int c = 0; c < nc; c++)
+                for(std::size_t c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = output(b, c, i, j) / sum;
                }

--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
                std::vector<migraphx::argument> args,
                int axis)
 {
-    int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
+    auto axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
    visit_all(args.back(), args[0])([&](auto output, auto input) {
        std::size_t nelements = output_shape.elements();
        args[1].visit([&](auto indices) {

--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -162,7 +162,10 @@ struct hip_triadd
        device::add(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
        return args.at(3);
    }
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 struct hip_triadd_relu
@@ -178,7 +181,10 @@ struct hip_triadd_relu
        device::add_relu(ctx.get_stream().get(), args.at(3), args.at(0), args.at(1), args.at(2));
        return args.at(3);
    }
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 struct hip_add_relu
@@ -194,7 +200,10 @@ struct hip_add_relu
        device::add_relu(ctx.get_stream().get(), args.at(2), args.at(0), args.at(1));
        return args.at(2);
    }
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 struct find_add_relu
@@ -285,7 +294,10 @@ struct miopen_conv_bias

    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 struct miopen_conv_bias_relu
@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
    }
    void finalize(context& ctx, const shape&, const std::vector<shape>&) { f.compile(ctx); }
    shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 template <class... Ms>

--- a/src/targets/gpu/include/migraphx/gpu/abs.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/abs.hpp
@@ -17,7 +17,10 @@ struct miopen_abs
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
@@ -17,7 +17,10 @@ struct miopen_batch_norm_inference
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/concat.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/concat.hpp
@@ -18,7 +18,10 @@ struct hip_concat
    shape compute_shape(std::vector<shape> inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/contiguous.hpp
@@ -16,7 +16,10 @@ struct miopen_contiguous
    std::string name() const { return "gpu::contiguous"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument compute(context&, shape output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -31,7 +31,10 @@ struct miopen_convolution
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/elu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/elu.hpp
@@ -17,7 +17,10 @@ struct miopen_elu
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/gather.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/gather.hpp
@@ -18,7 +18,10 @@ struct hip_gather
    shape compute_shape(std::vector<shape> inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };

 } // namespace gpu