change size_t to int

94e3a2e4 · Shucai Xiao · 26bd92d8 · 94e3a2e4 · 94e3a2e4 · 94e3a2e4
Commit 94e3a2e4 authored Feb 12, 2022 by Shucai Xiao
20 changed files
--- a/src/targets/gpu/device/nonzero.cpp
+++ b/src/targets/gpu/device/nonzero.cpp
@@ -36,7 +36,7 @@ argument nonzero(hipStream_t stream, const argument& result, const argument& arg
                                           return;

                                       auto index = si.multi(j);
-                                       for(size_t k = 0; k < index.size(); ++k)
+                                       for(int k = 0; k < index.size(); ++k)
                                       {
                                           ptr[k * elem_num + out_loc] = index[k];
                                       }

--- a/src/targets/gpu/device/pad.cpp
+++ b/src/targets/gpu/device/pad.cpp
@@ -15,7 +15,7 @@ namespace device {
 argument
 pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads)
 {
-    std::size_t nelements = arg1.get_shape().elements();
+    int nelements = arg1.get_shape().elements();
    hip_visit_all(result, arg1)([&](auto output, auto input) {
        using type      = typename decltype(output)::value_type;
        using hip_index = typename decltype(output)::hip_index;
@@ -27,7 +27,7 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
        std::copy(pads.begin(), pads.begin() + offsets.size(), offsets.begin());
        gs_launch(stream, nelements)([=](auto i) __device__ {
            auto idx = input.get_shape().multi(i);
-            for(std::size_t j = 0; j < offsets.size(); j++)
+            for(int j = 0; j < offsets.size(); j++)
            {
                idx[j] += offsets[j];
            }

--- a/src/targets/gpu/device/reduce_mean.cpp
+++ b/src/targets/gpu/device/reduce_mean.cpp
@@ -9,7 +9,7 @@ namespace device {
 void reduce_mean(hipStream_t stream, const argument& result, const argument& arg)
 {
    index_int item_num = arg.get_shape().elements() / result.get_shape().elements();
-    reduce(stream, result, arg, sum{}, 0, id{}, mean{item_num});
+    reduce(stream, result, arg, sum{}, 0, id{}, mean{static_cast<int>(item_num)});
 }

 } // namespace device

--- a/src/targets/gpu/device/reverse.cpp
+++ b/src/targets/gpu/device/reverse.cpp
@@ -16,9 +16,9 @@ reverse(hipStream_t stream, argument result, argument arg1, const std::vector<in
 {
    auto s = arg1.get_shape();
    // auto lens             = s.lens();
-    std::vector<std::size_t> axis_len(axes.begin(), axes.end());
+    std::vector<int> axis_len(axes.begin(), axes.end());
    shape sa{shape::float_type, axis_len};
-    std::size_t nelements = s.elements();
+    int nelements = s.elements();
    visit_all(result, arg1)([&](auto output1, auto input1) {
        hip_visit_views(output1, input1, s)([&](auto output, auto input, auto hs) {
            hip_visit_views(sa)([&](auto daxes) {

--- a/src/targets/gpu/device/topk.cpp
+++ b/src/targets/gpu/device/topk.cpp
@@ -142,7 +142,7 @@ std::vector<argument> topk(hipStream_t stream,
    auto comp_lens  = in_lens;
    comp_lens[axis] = 1;
    shape comp_s{in_s.type(), comp_lens};
-    std::size_t elem_num = comp_s.elements();
+    int elem_num = comp_s.elements();

    hip_visit_all(val_res, arg, out_s, in_s, comp_s)(
        [&](auto out_val, auto input, auto oss, auto iss, auto css) {

--- a/src/targets/gpu/driver/parser.cpp
+++ b/src/targets/gpu/driver/parser.cpp
@@ -15,8 +15,8 @@ namespace driver {

 shape parser::parse_shape(const value& v) const
 {
-    auto lens    = get(v, "lens", std::vector<std::size_t>{});
-    auto strides = get(v, "strides", std::vector<std::size_t>{});
+    auto lens    = get(v, "lens", std::vector<int>{});
+    auto strides = get(v, "strides", std::vector<int>{});
    auto type    = shape::parse_type(get<std::string>(v, "type", "float"));
    if(strides.empty())
        return shape{type, lens};

--- a/src/targets/gpu/eliminate_workspace.cpp
+++ b/src/targets/gpu/eliminate_workspace.cpp
@@ -13,7 +13,7 @@ namespace gpu {

 void eliminate_workspace::apply(module& p) const
 {
-    std::size_t n = 0;
+    int n = 0;
    std::vector<instruction_ref> allocs;
    for(auto ins : iterator_for(p))
    {

--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -64,7 +64,7 @@ struct fusion

    bool empty() const { return fp == nullptr; }

-    op_t operator[](std::size_t i) const
+    op_t operator[](int i) const
    {
        assert(fp);
        op_t result;
@@ -118,7 +118,7 @@ struct fusion
    {
        // assert(fp);
        // TODO: Use zero workspace for now
-        std::size_t ws_size = 0;
+        int ws_size = 0;
        // int algo_count = 1;
        // miopenConvFwdAlgorithm_t algo;
        // miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
@@ -596,7 +596,7 @@ struct miopen_fusion
    {
        // Compensate for allocation
        inputs.pop_back();
-        std::size_t i = 0;
+        int i = 0;
        f             = fusion(inputs[i]);
        i++;
        std::vector<std::function<void(const fused_operator_args&, const std::vector<argument>&)>>

--- a/src/targets/gpu/gemm_impl.cpp
+++ b/src/targets/gpu/gemm_impl.cpp
@@ -90,7 +90,7 @@ void gemm_impl(context& ctx,
        }

        auto num_matrices = std::accumulate(
-            out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
+            out_lens.rbegin() + 2, out_lens.rend(), int{1}, std::multiplies<int>());
        if(num_matrices == 1)
        {
            // the rocblas_gemm API handles inputs and output matrices as

--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -27,10 +27,10 @@ using hip_host_ptr = MIGRAPHX_MANAGE_PTR(void, hipHostUnregister);

 std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }

-std::size_t get_available_gpu_memory()
+int get_available_gpu_memory()
 {
-    size_t free;
-    size_t total;
+    std::size_t free;
+    std::size_t total;
    auto status = hipMemGetInfo(&free, &total);
    if(status != hipSuccess)
        MIGRAPHX_THROW("Failed getting available memory: " + hip_error(status));
@@ -46,7 +46,7 @@ void* get_device_ptr(void* hptr)
    return result;
 }

-hip_ptr allocate_gpu(std::size_t sz, bool host = false)
+hip_ptr allocate_gpu(int sz, bool host = false)
 {
    if(sz > get_available_gpu_memory())
        MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
@@ -62,7 +62,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
    return hip_ptr{result};
 }

-hip_host_ptr register_on_gpu(void* ptr, std::size_t sz)
+hip_host_ptr register_on_gpu(void* ptr, int sz)
 {
    auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped);
    if(status != hipSuccess)
@@ -71,7 +71,7 @@ hip_host_ptr register_on_gpu(void* ptr, std::size_t sz)
 }

 template <class T>
-std::vector<T> read_from_gpu(const void* x, std::size_t sz)
+std::vector<T> read_from_gpu(const void* x, int sz)
 {
    gpu_sync();
    std::vector<T> result(sz);
@@ -81,7 +81,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
    return result;
 }

-hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
+hip_ptr write_to_gpu(const void* x, int sz, bool host = false)
 {
    gpu_sync();
    auto result = allocate_gpu(sz, host);
@@ -133,7 +133,7 @@ argument from_gpu(const argument& arg)
    return result;
 }

-void set_device(std::size_t id)
+void set_device(int id)
 {
    auto status = hipSetDevice(id);
    if(status != hipSuccess)
@@ -151,8 +151,8 @@ void gpu_sync(const context& ctx) { ctx.finish(); }

 void hip_async_copy(context& ctx, const argument& src, const argument& dst, hipMemcpyKind kind)
 {
-    std::size_t src_size = src.get_shape().bytes();
-    std::size_t dst_size = dst.get_shape().bytes();
+    int src_size = src.get_shape().bytes();
+    int dst_size = dst.get_shape().bytes();
    if(src_size > dst_size)
        MIGRAPHX_THROW("Not enough memory available in destination to do copy");
    auto status = hipMemcpyAsync(dst.data(), src.data(), src_size, kind, ctx.get_stream().get());

--- a/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
@@ -17,8 +17,8 @@ struct code_object_op
 {
    value::binary code_object;
    std::string symbol_name;
-    std::size_t global;
-    std::size_t local;
+    int global;
+    int local;
    std::vector<shape> expected_inputs;
    shape output;
    kernel k{};

--- a/src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
@@ -15,9 +15,9 @@ namespace gpu {
 std::vector<std::vector<char>>
 compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch);

-std::string enum_params(std::size_t count, std::string param);
+std::string enum_params(int count, std::string param);

-std::size_t compute_global(std::size_t n, std::size_t local = 1024);
+int compute_global(int n, int local = 1024);

 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
@@ -10,8 +10,8 @@ namespace gpu {

 struct hip_compile_options
 {
-    std::size_t global;
-    std::size_t local;
+    int global;
+    int local;
    std::vector<shape> inputs;
    shape output;
    std::string kernel_name           = "kernel";

--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -29,13 +29,13 @@ struct hip_device
        add_stream();
    }

-    hip_device(std::size_t id, std::size_t n) : device_id(id)
+    hip_device(int id, int n) : device_id(id)
    {
        auto status = hipGetDeviceProperties(&device_props, device_id);
        if(status != hipSuccess)
            MIGRAPHX_THROW("Failed to allocate stream");

-        for(std::size_t i = 0; i < n; i++)
+        for(int i = 0; i < n; i++)
            add_stream();
    }

@@ -45,7 +45,7 @@ struct hip_device

        stream() {}

-        stream(std::size_t device_number) : id(device_number) {}
+        stream(int device_number) : id(device_number) {}

        void setup() const { set_device(id); }

@@ -124,7 +124,7 @@ struct hip_device
        }

        private:
-        std::size_t id                      = 0;
+        int id                      = 0;
        shared<hip_stream_ptr> s            = nullptr;
        shared<miopen_handle> mihandle      = nullptr;
        shared<rocblas_handle_ptr> rbhandle = nullptr;
@@ -134,29 +134,29 @@ struct hip_device

    stream& get_stream() { return streams.at(current_stream); }

-    stream& get_stream(std::size_t n) { return streams.at(n); }
+    stream& get_stream(int n) { return streams.at(n); }

    const stream& get_stream() const { return streams.at(current_stream); }

-    const stream& get_stream(std::size_t n) const { return streams.at(n); }
+    const stream& get_stream(int n) const { return streams.at(n); }

-    void set_stream(std::size_t n) { current_stream = n; }
+    void set_stream(int n) { current_stream = n; }

-    std::size_t nstreams() const { return streams.size(); }
+    int nstreams() const { return streams.size(); }

-    std::size_t stream_id() const { return current_stream; }
+    int stream_id() const { return current_stream; }

    std::string get_device_name() const { return device_props.gcnArchName; }

-    std::size_t get_device_major() const { return device_props.major; }
+    int get_device_major() const { return device_props.major; }

-    std::size_t get_device_minor() const { return device_props.minor; }
+    int get_device_minor() const { return device_props.minor; }

-    std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
+    int get_cu_count() const { return device_props.multiProcessorCount; }

    private:
-    std::size_t device_id      = 0;
-    std::size_t current_stream = 0;
+    int device_id      = 0;
+    int current_stream = 0;
    std::vector<stream> streams;
    hipDeviceProp_t device_props;

@@ -166,7 +166,7 @@ struct hip_device

 struct context
 {
-    context(std::size_t device_id = 0, std::size_t n = value_of(MIGRAPHX_NSTREAMS{}, 1))
+    context(int device_id = 0, int n = value_of(MIGRAPHX_NSTREAMS{}, 1))
        : current_device(std::make_shared<hip_device>(device_id, n))
    {
    }
@@ -184,23 +184,23 @@ struct context
    }

    hip_device::stream& get_stream() { return get_current_device().get_stream(); }
-    hip_device::stream& get_stream(std::size_t n) { return get_current_device().get_stream(n); }
+    hip_device::stream& get_stream(int n) { return get_current_device().get_stream(n); }

    const hip_device::stream& get_stream() const { return get_current_device().get_stream(); }
-    const hip_device::stream& get_stream(std::size_t n) const
+    const hip_device::stream& get_stream(int n) const
    {
        return get_current_device().get_stream(n);
    }

-    void set_stream(std::size_t n) { get_current_device().set_stream(n); }
+    void set_stream(int n) { get_current_device().set_stream(n); }

-    void create_events(std::size_t num_of_events)
+    void create_events(int num_of_events)
    {
-        for(std::size_t i = events.size(); i < num_of_events + 1; ++i)
+        for(int i = events.size(); i < num_of_events + 1; ++i)
            events.emplace_back(create_event());
    }

-    hipEvent_t get_event(std::size_t i) const { return events.at(i).get(); }
+    hipEvent_t get_event(int i) const { return events.at(i).get(); }

    std::vector<argument> literals{};
    void finish() const { get_stream().wait(); }
@@ -226,11 +226,11 @@ struct context
    void from_value(const value& v)
    {
        auto v_events        = v.at("events");
-        std::size_t n_events = v_events.without_key().to<std::size_t>();
+        int n_events = v_events.without_key().to<int>();
        this->create_events(n_events - 1);

        auto v_streams        = v.at("streams");
-        std::size_t n_streams = v_streams.without_key().to<std::size_t>();
+        int n_streams = v_streams.without_key().to<int>();

        this->current_device = std::make_shared<hip_device>(0, n_streams);
    }

--- a/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
@@ -73,7 +73,7 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
 {
    auto arg_shape        = arg.get_shape();
    auto batch_lens       = arg_shape.lens();
-    size_t batch_item_num = batch_lens[axis];
+    int batch_item_num = batch_lens[axis];
    batch_lens[axis]      = 1;
    migraphx::shape batch_shape{arg_shape.type(), batch_lens};
    migraphx::shape std_arg_shape{arg_shape.type(), arg_shape.lens()};
@@ -82,8 +82,8 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
        auto* output = device_cast(result.get<int64_t>().data());
        using type   = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
        // use one block for items in one batch.
-        const size_t max_block_size  = 256;
-        const std::size_t block_size = compute_block_size(batch_item_num, max_block_size);
+        const int max_block_size  = 256;
+        const int block_size = compute_block_size(batch_item_num, max_block_size);
        gs_launch(stream,
                  batch_shape.elements() * block_size,
                  block_size)([=](auto i, auto idx) __device__ {

--- a/src/targets/gpu/include/migraphx/gpu/device/concat.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/concat.hpp
@@ -13,7 +13,7 @@ namespace device {
 argument concat(hipStream_t stream,
                const shape& output_shape,
                std::vector<argument> args,
-                std::vector<std::size_t> offsets);
+                std::vector<int> offsets);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/gemm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/gemm.hpp
@@ -89,13 +89,13 @@ struct rocblas_gemm
        return args.back();
    }

-    void batch_not_transposed(const std::vector<std::size_t>& strides) const
+    void batch_not_transposed(const std::vector<int>& strides) const
    {
        if(strides.size() <= 2)
            return;
        auto dim_0       = strides.size() - 2;
        auto matrix_size = std::max(strides[dim_0], strides[dim_0 + 1]);
-        std::vector<std::size_t> batch(strides.begin(), strides.begin() + dim_0);
+        std::vector<int> batch(strides.begin(), strides.begin() + dim_0);
        if(std::all_of(batch.begin(), batch.end(), [&](auto i) { return (i < matrix_size); }))
        {
            MIGRAPHX_THROW("GPU_GEMM: matrix size and batch size {" + to_string_range(strides) +

--- a/src/targets/gpu/include/migraphx/gpu/hip.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp
@@ -22,7 +22,7 @@ argument to_gpu(const argument& arg, bool host = false);

 argument from_gpu(const argument& arg);

-void set_device(std::size_t id);
+void set_device(int id);

 void gpu_sync();
 void gpu_sync(const context& ctx);

--- a/src/targets/gpu/include/migraphx/gpu/kernel.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/kernel.hpp
@@ -25,16 +25,16 @@ struct kernel
    }

    void launch(hipStream_t stream,
-                std::size_t global,
-                std::size_t local,
+                int global,
+                int local,
                const std::vector<kernel_argument>& args) const;

    void launch(hipStream_t stream,
-                std::size_t global,
-                std::size_t local,
+                int global,
+                int local,
                std::vector<void*> args) const;

-    auto launch(hipStream_t stream, std::size_t global, std::size_t local) const
+    auto launch(hipStream_t stream, int global, int local) const
    {
        return [=](auto&&... xs) {
            launch(stream, global, local, std::vector<kernel_argument>{xs...});

--- a/src/targets/gpu/include/migraphx/gpu/oper.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/oper.hpp
@@ -16,7 +16,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

-template <class Derived, std::size_t N>
+template <class Derived, int N>
 struct device_base : oper<Derived>
 {
    template <class Self, class F>
@@ -32,7 +32,7 @@ struct device_base : oper<Derived>
        reduce_shapes = reduce_dims(inputs);
    }

-    argument get_arg(const std::vector<argument>& args, std::size_t i) const
+    argument get_arg(const std::vector<argument>& args, int i) const
    {
        if(reduce_shapes.empty())
            return args[i];