Commit 94e3a2e4 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

change size_t to int

parent 26bd92d8
...@@ -18,8 +18,8 @@ struct kernel_argument ...@@ -18,8 +18,8 @@ struct kernel_argument
kernel_argument(T&& x) : size(sizeof(U)), align(alignof(U)), data(&x) // NOLINT kernel_argument(T&& x) : size(sizeof(U)), align(alignof(U)), data(&x) // NOLINT
{ {
} }
std::size_t size; int size;
std::size_t align; int align;
void* data; void* data;
}; };
......
...@@ -15,12 +15,12 @@ namespace gpu { ...@@ -15,12 +15,12 @@ namespace gpu {
struct schedule_model struct schedule_model
{ {
std::size_t streams = 0; int streams = 0;
std::size_t concurrency() const; int concurrency() const;
void sched(module& p, instruction_ref ins, std::size_t n) const; void sched(module& p, instruction_ref ins, int n) const;
void wait(module& p, instruction_ref ins, std::size_t wait_id) const; void wait(module& p, instruction_ref ins, int wait_id) const;
void record(module& p, instruction_ref ins, std::size_t wait_id) const; void record(module& p, instruction_ref ins, int wait_id) const;
std::size_t weight(const operation& op) const; int weight(const operation& op) const;
}; };
} // namespace gpu } // namespace gpu
......
...@@ -12,7 +12,7 @@ extern hipError_t hipExtModuleLaunchKernel(hipFunction_t, // NOLINT ...@@ -12,7 +12,7 @@ extern hipError_t hipExtModuleLaunchKernel(hipFunction_t, // NOLINT
uint32_t, uint32_t,
uint32_t, uint32_t,
uint32_t, uint32_t,
size_t, int,
hipStream_t, hipStream_t,
void**, void**,
void**, void**,
...@@ -54,10 +54,10 @@ kernel::kernel(const char* image, const std::string& name) : impl(std::make_shar ...@@ -54,10 +54,10 @@ kernel::kernel(const char* image, const std::string& name) : impl(std::make_shar
void launch_kernel(hipFunction_t fun, void launch_kernel(hipFunction_t fun,
hipStream_t stream, hipStream_t stream,
std::size_t global, int global,
std::size_t local, int local,
void* kernargs, void* kernargs,
std::size_t size) int size)
{ {
void* config[] = { void* config[] = {
// HIP_LAUNCH_PARAM_* are macros that do horrible things // HIP_LAUNCH_PARAM_* are macros that do horrible things
...@@ -79,25 +79,25 @@ void launch_kernel(hipFunction_t fun, ...@@ -79,25 +79,25 @@ void launch_kernel(hipFunction_t fun,
} }
void kernel::launch(hipStream_t stream, void kernel::launch(hipStream_t stream,
std::size_t global, int global,
std::size_t local, int local,
std::vector<void*> args) const std::vector<void*> args) const
{ {
assert(impl != nullptr); assert(impl != nullptr);
void* kernargs = args.data(); void* kernargs = args.data();
std::size_t size = args.size() * sizeof(void*); int size = args.size() * sizeof(void*);
launch_kernel(impl->fun, stream, global, local, kernargs, size); launch_kernel(impl->fun, stream, global, local, kernargs, size);
} }
void kernel::launch(hipStream_t stream, void kernel::launch(hipStream_t stream,
std::size_t global, int global,
std::size_t local, int local,
const std::vector<kernel_argument>& args) const const std::vector<kernel_argument>& args) const
{ {
assert(impl != nullptr); assert(impl != nullptr);
std::vector<char> kernargs = pack_args(args); std::vector<char> kernargs = pack_args(args);
std::size_t size = kernargs.size(); int size = kernargs.size();
launch_kernel(impl->fun, stream, global, local, kernargs.data(), size); launch_kernel(impl->fun, stream, global, local, kernargs.data(), size);
} }
......
...@@ -34,7 +34,7 @@ struct id ...@@ -34,7 +34,7 @@ struct id
struct mean struct mean
{ {
size_t item_num = 1; int item_num = 1;
template <class T> template <class T>
constexpr auto operator()(T x) const constexpr auto operator()(T x) const
{ {
......
...@@ -26,7 +26,7 @@ struct swallow ...@@ -26,7 +26,7 @@ struct swallow
} }
}; };
template <size_t N> template <int N>
struct print_buffer struct print_buffer
{ {
char buffer[N + 1] = {0}; char buffer[N + 1] = {0};
...@@ -43,7 +43,7 @@ struct print_buffer ...@@ -43,7 +43,7 @@ struct print_buffer
} }
} }
template <size_t M> template <int M>
constexpr void append(const char (&array)[M]) constexpr void append(const char (&array)[M])
{ {
for(int i = 0; i < M; i++) for(int i = 0; i < M; i++)
......
...@@ -19,7 +19,7 @@ struct max_pool ...@@ -19,7 +19,7 @@ struct max_pool
} }
template <class T> template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T final(T x, std::size_t) MIGRAPHX_DEVICE_CONSTEXPR T final(T x, int)
{ {
return (x); return (x);
} }
...@@ -36,7 +36,7 @@ struct avg_pool ...@@ -36,7 +36,7 @@ struct avg_pool
} }
template <class T> template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T final(T x, std::size_t y) MIGRAPHX_DEVICE_CONSTEXPR T final(T x, int y)
{ {
return (y == 0) ? 0.0 : (x / y); return (y == 0) ? 0.0 : (x / y);
} }
...@@ -44,13 +44,13 @@ struct avg_pool ...@@ -44,13 +44,13 @@ struct avg_pool
template <class T, class Op> template <class T, class Op>
MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data, MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data,
const array<std::size_t, 2>& dims, const array<int, 2>& dims,
array<float, 2> xy, array<float, 2> xy,
Op pooling) Op pooling)
{ {
array<int, 2> low{}; array<int, 2> low{};
array<int, 2> high{}; array<int, 2> high{};
for(std::size_t ii = 0; ii < xy.size(); ++ii) for(int ii = 0; ii < xy.size(); ++ii)
{ {
if(xy[ii] < -1.0f or xy[ii] > dims[ii]) if(xy[ii] < -1.0f or xy[ii] > dims[ii])
{ {
...@@ -65,7 +65,7 @@ MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data, ...@@ -65,7 +65,7 @@ MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data,
xy[ii] = high[ii] = low[ii] = dims[ii] - 1; xy[ii] = high[ii] = low[ii] = dims[ii] - 1;
} }
} }
array<std::size_t, 4> locs = {low[0] * dims[1] + low[1], array<int, 4> locs = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1], low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1], high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]}; high[0] * dims[1] + high[1]};
...@@ -86,15 +86,15 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data, ...@@ -86,15 +86,15 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
const array<float, 2>& roi_starts, const array<float, 2>& roi_starts,
const array<float, 2>& bin_size, const array<float, 2>& bin_size,
const array<int, 2>& idx, const array<int, 2>& idx,
const array<std::size_t, 2>& bin_grid_size, const array<int, 2>& bin_grid_size,
const array<std::size_t, 2>& dims, const array<int, 2>& dims,
float roi_offset, float roi_offset,
Op op) Op op)
{ {
T output_val = op.init(); T output_val = op.init();
const int64_t count = bin_grid_size[0] * bin_grid_size[1]; const int64_t count = bin_grid_size[0] * bin_grid_size[1];
dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) {
array<std::size_t, 2> id = {iy, ix}; array<int, 2> id = {iy, ix};
array<float, 2> locs = array<float, 2> locs =
roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset;
...@@ -134,7 +134,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -134,7 +134,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
auto channel_num = x_lens[1]; auto channel_num = x_lens[1];
// input dims of height and width, in all 2-dim arrays, the first dim // input dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width // is for height and second dim is for width
array<std::size_t, 2> in_dims = {x_lens[2], x_lens[3]}; array<int, 2> in_dims = {x_lens[2], x_lens[3]};
const auto stride = index.nglobal(); const auto stride = index.nglobal();
auto out_s = y_t.get_shape(); auto out_s = y_t.get_shape();
...@@ -143,7 +143,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -143,7 +143,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
// output dims of height and width, in all 2-dim arrays, the first dim // output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width // is for height and second dim is for width
const auto& out_lens = out_s.lens; const auto& out_lens = out_s.lens;
array<std::size_t, 2> out_dims = {out_lens[2], out_lens[3]}; array<int, 2> out_dims = {out_lens[2], out_lens[3]};
for(index_int i = index.global; i < out_s.elements(); i += stride) for(index_int i = index.global; i < out_s.elements(); i += stride)
{ {
...@@ -163,9 +163,9 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -163,9 +163,9 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
array<float, 2> roi_size{}; array<float, 2> roi_size{};
array<float, 2> bin_size{}; array<float, 2> bin_size{};
array<std::size_t, 2> bin_grid_size{}; array<int, 2> bin_grid_size{};
for(std::size_t ii = 0; ii < roi_size.size(); ++ii) for(int ii = 0; ii < roi_size.size(); ++ii)
{ {
roi_size[ii] = roi_ends[ii] - roi_starts[ii]; roi_size[ii] = roi_ends[ii] - roi_starts[ii];
roi_size[ii] = max(roi_size[ii], 1.0f); roi_size[ii] = max(roi_size[ii], 1.0f);
......
...@@ -88,7 +88,7 @@ struct miopen_apply ...@@ -88,7 +88,7 @@ struct miopen_apply
outputs_alias.begin(), outputs_alias.begin(),
[](const auto& i) { return instruction::get_output_alias(i); }); [](const auto& i) { return instruction::get_output_alias(i); });
std::size_t index = 0; int index = 0;
for(auto ins : outputs_alias) for(auto ins : outputs_alias)
{ {
prog_output_names[ins] = mod->name() + ":#output_" + std::to_string(index++); prog_output_names[ins] = mod->name() + ":#output_" + std::to_string(index++);
......
...@@ -45,9 +45,9 @@ struct mlir_apply ...@@ -45,9 +45,9 @@ struct mlir_apply
struct execution_spec struct execution_spec
{ {
migraphx::value::binary binary; migraphx::value::binary binary;
size_t global_size; int global_size;
size_t local_size; int local_size;
execution_spec(migraphx::value::binary&& binary_m, size_t global_s, size_t local_s) execution_spec(migraphx::value::binary&& binary_m, int global_s, int local_s)
: binary(std::move(binary_m)), global_size(global_s), local_size(local_s) : binary(std::move(binary_m)), global_size(global_s), local_size(local_s)
{ {
} }
...@@ -152,7 +152,7 @@ struct mlir_apply ...@@ -152,7 +152,7 @@ struct mlir_apply
auto bin_i = binary_map.find(mlir_options); auto bin_i = binary_map.find(mlir_options);
if(bin_i == binary_map.end()) if(bin_i == binary_map.end())
{ {
size_t bin_size = 0; int bin_size = 0;
using mlir_handle = MIGRAPHX_MANAGE_PTR(MiirHandle, miirDestroyHandle); using mlir_handle = MIGRAPHX_MANAGE_PTR(MiirHandle, miirDestroyHandle);
auto handle = mlir_handle(miirCreateHandle(mlir_options.c_str())); auto handle = mlir_handle(miirCreateHandle(mlir_options.c_str()));
...@@ -164,8 +164,8 @@ struct mlir_apply ...@@ -164,8 +164,8 @@ struct mlir_apply
if(miirBufferGet(handle.get(), reinterpret_cast<char*>(bin.data()), &bin_size) == if(miirBufferGet(handle.get(), reinterpret_cast<char*>(bin.data()), &bin_size) ==
MIIR_SUCCESS) MIIR_SUCCESS)
{ {
size_t global_size; int global_size;
size_t block_size; int block_size;
if(miirGetExecutionDims(handle.get(), &global_size, &block_size) == if(miirGetExecutionDims(handle.get(), &global_size, &block_size) ==
MIIR_SUCCESS) MIIR_SUCCESS)
{ {
...@@ -224,7 +224,7 @@ struct mlir_apply ...@@ -224,7 +224,7 @@ struct mlir_apply
void add_memref_descriptor(std::vector<instruction_ref>& refs, instruction_ref inst) void add_memref_descriptor(std::vector<instruction_ref>& refs, instruction_ref inst)
{ {
const size_t offset = 0; const int offset = 0;
auto inst_t = inst->get_shape(); auto inst_t = inst->get_shape();
refs.push_back(inst); refs.push_back(inst);
refs.push_back(inst); refs.push_back(inst);
......
...@@ -10,10 +10,10 @@ std::vector<char> pack_args(const std::vector<kernel_argument>& args) ...@@ -10,10 +10,10 @@ std::vector<char> pack_args(const std::vector<kernel_argument>& args)
std::vector<char> kernargs; std::vector<char> kernargs;
for(auto&& arg : args) for(auto&& arg : args)
{ {
std::size_t n = arg.size; int n = arg.size;
const auto* p = static_cast<const char*>(arg.data); const auto* p = static_cast<const char*>(arg.data);
// Insert padding // Insert padding
std::size_t padding = (arg.align - (kernargs.size() % arg.align)) % arg.align; int padding = (arg.align - (kernargs.size() % arg.align)) % arg.align;
kernargs.insert(kernargs.end(), padding, 0); kernargs.insert(kernargs.end(), padding, 0);
kernargs.insert(kernargs.end(), p, p + n); kernargs.insert(kernargs.end(), p, p + n);
} }
......
...@@ -19,7 +19,7 @@ inline void reshape_if_1d(shape& input) ...@@ -19,7 +19,7 @@ inline void reshape_if_1d(shape& input)
if(dims.size() == 3) if(dims.size() == 3)
{ {
std::vector<size_t> new_dims = dims; std::vector<int> new_dims = dims;
new_dims.insert(new_dims.begin() + 2, 1); new_dims.insert(new_dims.begin() + 2, 1);
input = shape{input.type(), new_dims}; input = shape{input.type(), new_dims};
} }
......
...@@ -60,7 +60,7 @@ shape miopen_quant_convolution::compile(context& ctx, ...@@ -60,7 +60,7 @@ shape miopen_quant_convolution::compile(context& ctx,
cd.get(), cd.get(),
y_desc.get(), y_desc.get(),
&workspace_size); &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {static_cast<int>(workspace_size)}};
auto arg_vec4_x = to_gpu(generate_argument(pack_int8_shape(inputs[0]))); auto arg_vec4_x = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
auto arg_vec4_w = to_gpu(generate_argument(pack_int8_shape(inputs[1]))); auto arg_vec4_w = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
...@@ -89,7 +89,7 @@ shape miopen_quant_convolution::compile(context& ctx, ...@@ -89,7 +89,7 @@ shape miopen_quant_convolution::compile(context& ctx,
} }
handle = ctx.get_stream().get_miopen(); handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo; algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}}; return shape{shape::int8_type, {static_cast<int>(perf.memory)}};
} }
void miopen_quant_convolution::finalize(context& ctx, void miopen_quant_convolution::finalize(context& ctx,
......
...@@ -11,7 +11,7 @@ namespace gpu { ...@@ -11,7 +11,7 @@ namespace gpu {
struct record_event struct record_event
{ {
std::size_t event = 0; int event = 0;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
...@@ -34,7 +34,7 @@ struct record_event ...@@ -34,7 +34,7 @@ struct record_event
struct wait_event struct wait_event
{ {
std::size_t event = 0; int event = 0;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
...@@ -52,7 +52,7 @@ struct wait_event ...@@ -52,7 +52,7 @@ struct wait_event
struct set_stream struct set_stream
{ {
std::size_t stream = 0; int stream = 0;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
...@@ -76,8 +76,8 @@ MIGRAPHX_REGISTER_OP(record_event) ...@@ -76,8 +76,8 @@ MIGRAPHX_REGISTER_OP(record_event)
MIGRAPHX_REGISTER_OP(wait_event) MIGRAPHX_REGISTER_OP(wait_event)
MIGRAPHX_REGISTER_OP(set_stream) MIGRAPHX_REGISTER_OP(set_stream)
std::size_t schedule_model::concurrency() const { return streams; } int schedule_model::concurrency() const { return streams; }
void schedule_model::sched(module& p, instruction_ref ins, std::size_t n) const void schedule_model::sched(module& p, instruction_ref ins, int n) const
{ {
auto last_stream = std::find_if(std::make_reverse_iterator(ins), auto last_stream = std::find_if(std::make_reverse_iterator(ins),
std::make_reverse_iterator(p.begin()), std::make_reverse_iterator(p.begin()),
...@@ -92,16 +92,16 @@ void schedule_model::sched(module& p, instruction_ref ins, std::size_t n) const ...@@ -92,16 +92,16 @@ void schedule_model::sched(module& p, instruction_ref ins, std::size_t n) const
p.insert_instruction(ins, set_stream{n}); p.insert_instruction(ins, set_stream{n});
} }
void schedule_model::wait(module& p, instruction_ref ins, std::size_t wait_id) const void schedule_model::wait(module& p, instruction_ref ins, int wait_id) const
{ {
p.insert_instruction(ins, wait_event{wait_id}); p.insert_instruction(ins, wait_event{wait_id});
} }
void schedule_model::record(module& p, instruction_ref ins, std::size_t wait_id) const void schedule_model::record(module& p, instruction_ref ins, int wait_id) const
{ {
p.insert_instruction(std::next(ins), record_event{wait_id}); p.insert_instruction(std::next(ins), record_event{wait_id});
} }
static std::unordered_map<std::string, std::size_t> create_weight_map() static std::unordered_map<std::string, int> create_weight_map()
{ {
return {{"hip::load_literal", 0}, return {{"hip::load_literal", 0},
{"hip::hip_allocate_memory", 0}, {"hip::hip_allocate_memory", 0},
...@@ -113,13 +113,13 @@ static std::unordered_map<std::string, std::size_t> create_weight_map() ...@@ -113,13 +113,13 @@ static std::unordered_map<std::string, std::size_t> create_weight_map()
{"gpu::gemm", 4}}; {"gpu::gemm", 4}};
} }
static const std::unordered_map<std::string, std::size_t>& weight_map() static const std::unordered_map<std::string, int>& weight_map()
{ {
static const std::unordered_map<std::string, std::size_t> m = create_weight_map(); static const std::unordered_map<std::string, int> m = create_weight_map();
return m; return m;
} }
std::size_t schedule_model::weight(const operation& op) const int schedule_model::weight(const operation& op) const
{ {
if(weight_map().count(op.name()) == 0) if(weight_map().count(op.name()) == 0)
{ {
......
...@@ -14,7 +14,7 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_COPY_LITERALS) ...@@ -14,7 +14,7 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_COPY_LITERALS)
void write_literals::apply(module& p) const void write_literals::apply(module& p) const
{ {
assert(ctx != nullptr); assert(ctx != nullptr);
std::size_t n = 0; int n = 0;
for(auto ins : iterator_for(p)) for(auto ins : iterator_for(p))
{ {
if(ins->name() == "@literal") if(ins->name() == "@literal")
......
...@@ -20,8 +20,8 @@ static auto make_mat(tensor_view<T> x) ...@@ -20,8 +20,8 @@ static auto make_mat(tensor_view<T> x)
int dim_0 = n_dims - 2; int dim_0 = n_dims - 2;
int dim_1 = n_dims - 1; int dim_1 = n_dims - 1;
if(s.transposed()) if(s.transposed())
return matrix<T>{x.data(), s.lens()[dim_1], s.lens()[dim_0], s.strides()[dim_1]}; return matrix<T>{x.data(), static_cast<std::size_t>(s.lens()[dim_1]), static_cast<std::size_t>(s.lens()[dim_0]), static_cast<std::size_t>(s.strides()[dim_1])};
return matrix<T>{x.data(), s.lens()[dim_0], s.lens()[dim_1], s.strides()[dim_0]}; return matrix<T>{x.data(), static_cast<std::size_t>(s.lens()[dim_0]), static_cast<std::size_t>(s.lens()[dim_1]), static_cast<std::size_t>(s.strides()[dim_0])};
} }
template <class T, class F> template <class T, class F>
......
...@@ -218,7 +218,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>> ...@@ -218,7 +218,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
auto wei_lens = weights.get_shape().lens(); auto wei_lens = weights.get_shape().lens();
auto wei_n = wei_lens[0]; auto wei_n = wei_lens[0];
auto wei_c = wei_lens[1]; auto wei_c = wei_lens[1];
std::vector<std::size_t> win_size(wei_lens.begin() + 1, wei_lens.end()); std::vector<int> win_size(wei_lens.begin() + 1, wei_lens.end());
par_for(output_shape.elements(), [&](auto i) { par_for(output_shape.elements(), [&](auto i) {
auto idx_o = output_shape.multi(i); auto idx_o = output_shape.multi(i);
...@@ -226,7 +226,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>> ...@@ -226,7 +226,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
auto n_dim = idx_o.size(); auto n_dim = idx_o.size();
std::vector<std::ptrdiff_t> win_start; std::vector<std::ptrdiff_t> win_start;
for(std::size_t dim = 2; dim < n_dim; ++dim) for(int dim = 2; dim < n_dim; ++dim)
{ {
auto d_2 = dim - 2; auto d_2 = dim - 2;
win_start.push_back(std::ptrdiff_t(idx_o[dim] * op.stride[d_2]) - win_start.push_back(std::ptrdiff_t(idx_o[dim] * op.stride[d_2]) -
...@@ -291,35 +291,35 @@ struct ref_im2col ...@@ -291,35 +291,35 @@ struct ref_im2col
auto input_shape = args[0].get_shape(); auto input_shape = args[0].get_shape();
auto weights_shape = args[1].get_shape(); auto weights_shape = args[1].get_shape();
visit_all(result, args[0])([&](auto col, auto input) { visit_all(result, args[0])([&](auto col, auto input) {
const std::size_t& height = input_shape.lens()[2]; const int& height = input_shape.lens()[2];
const std::size_t& width = input_shape.lens()[3]; const int& width = input_shape.lens()[3];
const std::size_t& channels = weights_shape.lens()[1]; const int& channels = weights_shape.lens()[1];
const std::size_t& kernel_h = weights_shape.lens()[2]; const int& kernel_h = weights_shape.lens()[2];
const std::size_t& kernel_w = weights_shape.lens()[3]; const int& kernel_w = weights_shape.lens()[3];
const std::size_t& pad_h = op.padding[0]; const int& pad_h = op.padding[0];
const std::size_t& pad_w = op.padding[1]; const int& pad_w = op.padding[1];
const std::size_t& stride_h = op.stride[0]; const int& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1]; const int& stride_w = op.stride[1];
long kdiv2_h = long(kernel_h) / 2; long kdiv2_h = long(kernel_h) / 2;
long kdiv2_w = long(kernel_w) / 2; long kdiv2_w = long(kernel_w) / 2;
// calculate output sizes // calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1; const int col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1; const int col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
// account for padding for the starting position of the input pixels // account for padding for the starting position of the input pixels
long iinput = kdiv2_h - long(pad_h); long iinput = kdiv2_h - long(pad_h);
// loop over output pixels (ioutput, joutput) // loop over output pixels (ioutput, joutput)
for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h) for(int ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
{ {
long jinput = kdiv2_w - long(pad_w); long jinput = kdiv2_w - long(pad_w);
for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w) for(int joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
{ {
// compute linear index for output // compute linear index for output
std::size_t ldx = ioutput * col_width + joutput; int ldx = ioutput * col_width + joutput;
std::size_t p = 0; int p = 0;
dfor(channels, dfor(channels,
kernel_h, kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) { kernel_w)([&](int c, int koffset, int loffset) {
auto idx = iinput + long(koffset) - kdiv2_h; auto idx = iinput + long(koffset) - kdiv2_h;
auto jdx = jinput + long(loffset) - kdiv2_w; auto jdx = jinput + long(loffset) - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width)) col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
...@@ -350,7 +350,7 @@ struct max_pool ...@@ -350,7 +350,7 @@ struct max_pool
return (m); return (m);
} }
static double final(double x, std::size_t) { return (x); } static double final(double x, int) { return (x); }
}; };
struct avg_pool struct avg_pool
...@@ -365,7 +365,7 @@ struct avg_pool ...@@ -365,7 +365,7 @@ struct avg_pool
static double apply(double x, double y) { return x + y; } static double apply(double x, double y) { return x + y; }
static double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); } static double final(double x, int y) { return (y == 0) ? 0.0 : (x / y); }
}; };
template <class Op> template <class Op>
...@@ -395,14 +395,14 @@ struct ref_pooling : auto_register_op<ref_pooling<Op>> ...@@ -395,14 +395,14 @@ struct ref_pooling : auto_register_op<ref_pooling<Op>>
using type = typename decltype(output)::value_type; using type = typename decltype(output)::value_type;
auto in_s = input.get_shape(); auto in_s = input.get_shape();
auto in_lens = in_s.lens(); auto in_lens = in_s.lens();
std::vector<std::size_t> vec_len(in_lens.begin() + 2, in_lens.end()); std::vector<int> vec_len(in_lens.begin() + 2, in_lens.end());
par_for(output_shape.elements(), [&](auto i) { par_for(output_shape.elements(), [&](auto i) {
auto idx_o = output_shape.multi(i); auto idx_o = output_shape.multi(i);
auto n_dim = idx_o.size(); auto n_dim = idx_o.size();
std::vector<std::size_t> win_start; std::vector<int> win_start;
std::vector<std::size_t> win_size; std::vector<int> win_size;
for(std::size_t dim = 2; dim < n_dim; ++dim) for(int dim = 2; dim < n_dim; ++dim)
{ {
auto d_2 = dim - 2; auto d_2 = dim - 2;
int start = static_cast<int>(idx_o[dim] * op.stride[d_2]) - int start = static_cast<int>(idx_o[dim] * op.stride[d_2]) -
...@@ -494,7 +494,7 @@ struct ref_pad ...@@ -494,7 +494,7 @@ struct ref_pad
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
shape_for_each(input.get_shape(), [&](const auto& idx) { shape_for_each(input.get_shape(), [&](const auto& idx) {
std::vector<std::size_t> new_idx(idx.size()); std::vector<int> new_idx(idx.size());
std::transform( std::transform(
idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) { idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
return i + j; return i + j;
...@@ -650,7 +650,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>> ...@@ -650,7 +650,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
argument result{output_shape}; argument result{output_shape};
auto batch_lens = output_shape.lens(); auto batch_lens = output_shape.lens();
int64_t tuned_axis = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name()); int64_t tuned_axis = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name());
std::size_t n_dims = batch_lens[tuned_axis]; int n_dims = batch_lens[tuned_axis];
batch_lens[tuned_axis] = 1; batch_lens[tuned_axis] = 1;
shape batch_shape{shape::int32_type, batch_lens}; shape batch_shape{shape::int32_type, batch_lens};
...@@ -661,27 +661,27 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>> ...@@ -661,27 +661,27 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
std::vector<value_type> batch_sum(batch_shape.elements(), value_type(0)); std::vector<value_type> batch_sum(batch_shape.elements(), value_type(0));
par_for(batch_shape.elements(), [&](auto i) { par_for(batch_shape.elements(), [&](auto i) {
auto idx = batch_shape.multi(i); auto idx = batch_shape.multi(i);
for(std::size_t j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
{ {
idx[tuned_axis] = j; idx[tuned_axis] = j;
batch_max[i] = batch_max[i] =
std::max<value_type>(batch_max[i], input(idx.begin(), idx.end())); std::max<value_type>(batch_max[i], input(idx.begin(), idx.end()));
} }
for(std::size_t j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
{ {
idx[tuned_axis] = j; idx[tuned_axis] = j;
std::size_t index = output_shape.index(idx); int index = output_shape.index(idx);
output[index] = std::exp(input[index] - batch_max[i]); output[index] = std::exp(input[index] - batch_max[i]);
} }
for(std::size_t j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
{ {
idx[tuned_axis] = j; idx[tuned_axis] = j;
batch_sum[i] += output(idx.begin(), idx.end()); batch_sum[i] += output(idx.begin(), idx.end());
} }
for(std::size_t j = 0; j < n_dims; ++j) for(int j = 0; j < n_dims; ++j)
{ {
idx[tuned_axis] = j; idx[tuned_axis] = j;
output(idx.begin(), idx.end()) = output(idx.begin(), idx.end()) =
......
...@@ -60,8 +60,8 @@ struct tf_parser ...@@ -60,8 +60,8 @@ struct tf_parser
module* mm = prog.get_main_module(); module* mm = prog.get_main_module();
bool is_nhwc = true; bool is_nhwc = true;
unsigned int batch_size = 1; unsigned int batch_size = 1;
std::size_t default_dim_value = 1; int default_dim_value = 1;
std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims; std::unordered_map<std::string, std::vector<int>> map_input_dims;
std::unordered_map<std::string, op_func> ops; std::unordered_map<std::string, op_func> ops;
...@@ -73,7 +73,7 @@ struct tf_parser ...@@ -73,7 +73,7 @@ struct tf_parser
instruction_ref to_kcxy(instruction_ref ins) const; instruction_ref to_kcxy(instruction_ref ins) const;
std::vector<instruction_ref> to_nchw(const std::vector<instruction_ref>& args) const; std::vector<instruction_ref> to_nchw(const std::vector<instruction_ref>& args) const;
std::vector<instruction_ref> to_nhwc(const std::vector<instruction_ref>& args) const; std::vector<instruction_ref> to_nhwc(const std::vector<instruction_ref>& args) const;
int64_t parse_axis(int64_t dim, size_t num_dims) const; int64_t parse_axis(int64_t dim, int num_dims) const;
// tf stores certain attributes such as strides, dilations, as a 4D input. // tf stores certain attributes such as strides, dilations, as a 4D input.
// The first and last dims are equal to 1, and the relevant data is in dims 2 and 3. // The first and last dims are equal to 1, and the relevant data is in dims 2 and 3.
// This helper function reorders the data to store for the respective operator member variables. // This helper function reorders the data to store for the respective operator member variables.
...@@ -81,7 +81,7 @@ struct tf_parser ...@@ -81,7 +81,7 @@ struct tf_parser
void reorder_data(std::vector<T>& prev_data) const void reorder_data(std::vector<T>& prev_data) const
{ {
std::vector<T> new_data(prev_data.size()); std::vector<T> new_data(prev_data.size());
for(size_t i = 0; i < new_data.size(); i++) for(int i = 0; i < new_data.size(); i++)
{ {
auto new_idx = parse_axis(i, new_data.size()); auto new_idx = parse_axis(i, new_data.size());
new_data.at(new_idx) = prev_data.at(i); new_data.at(new_idx) = prev_data.at(i);
...@@ -91,7 +91,7 @@ struct tf_parser ...@@ -91,7 +91,7 @@ struct tf_parser
void parse_undefined(module* mm, const std::string& name); void parse_undefined(module* mm, const std::string& name);
void parse_from(std::istream& is); void parse_from(std::istream& is);
void parse_from(const void* data, std::size_t size); void parse_from(const void* data, int size);
void parse_graph(const tensorflow::GraphDef& graph); void parse_graph(const tensorflow::GraphDef& graph);
void parse_node(const std::string& name); void parse_node(const std::string& name);
literal parse_tensor(const tensorflow::TensorProto& t) const; literal parse_tensor(const tensorflow::TensorProto& t) const;
...@@ -99,7 +99,7 @@ struct tf_parser ...@@ -99,7 +99,7 @@ struct tf_parser
std::vector<std::string> find_outputs() const; std::vector<std::string> find_outputs() const;
}; };
std::vector<int64_t> get_axes_from_mask(size_t num_axes, uint32_t mask); std::vector<int64_t> get_axes_from_mask(int num_axes, uint32_t mask);
} // namespace tf } // namespace tf
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -18,7 +18,7 @@ struct parse_concat : op_parser<parse_concat> ...@@ -18,7 +18,7 @@ struct parse_concat : op_parser<parse_concat>
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
// get index for axis within args // get index for axis within args
size_t axis_idx = info.attributes.at("N").i(); int axis_idx = info.attributes.at("N").i();
int64_t axis = args[axis_idx]->eval().at<int64_t>(); int64_t axis = args[axis_idx]->eval().at<int64_t>();
auto op = make_op("concat", {{"axis", axis}}); auto op = make_op("concat", {{"axis", axis}});
// return only first N arguments (assuming last index is the axis value) // return only first N arguments (assuming last index is the axis value)
......
...@@ -23,7 +23,7 @@ struct parse_pack : op_parser<parse_pack> ...@@ -23,7 +23,7 @@ struct parse_pack : op_parser<parse_pack>
int64_t axis = 0; int64_t axis = 0;
if(contains(info.attributes, "axis")) if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i(); axis = info.attributes.at("axis").i();
size_t input_size = args.front()->get_shape().lens().size(); int input_size = args.front()->get_shape().lens().size();
if(axis > input_size) if(axis > input_size)
{ {
MIGRAPHX_THROW("TF_PARSER: axis value of " + to_string(axis) + MIGRAPHX_THROW("TF_PARSER: axis value of " + to_string(axis) +
......
...@@ -18,13 +18,13 @@ struct parse_pad : op_parser<parse_pad> ...@@ -18,13 +18,13 @@ struct parse_pad : op_parser<parse_pad>
const tf_parser::node_info& info, const tf_parser::node_info& info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
size_t ndims = args.front()->get_shape().lens().size(); int ndims = args.front()->get_shape().lens().size();
// in tf, the paddings are arranged as a 2d shape (ndims, 2), // in tf, the paddings are arranged as a 2d shape (ndims, 2),
// the last dim contains the left padding and right padding respectively // the last dim contains the left padding and right padding respectively
std::vector<std::pair<int32_t, int32_t>> pad_per_dim(ndims); std::vector<std::pair<int32_t, int32_t>> pad_per_dim(ndims);
auto tf_padding = args[1]->eval().get<int32_t>().to_vector(); auto tf_padding = args[1]->eval().get<int32_t>().to_vector();
for(size_t i = 0; i < 2 * ndims; i += 2) for(int i = 0; i < 2 * ndims; i += 2)
{ {
pad_per_dim[i / 2].first = tf_padding[i]; pad_per_dim[i / 2].first = tf_padding[i];
pad_per_dim[i / 2].second = tf_padding[i + 1]; pad_per_dim[i / 2].second = tf_padding[i + 1];
...@@ -32,7 +32,7 @@ struct parse_pad : op_parser<parse_pad> ...@@ -32,7 +32,7 @@ struct parse_pad : op_parser<parse_pad>
parser.reorder_data(pad_per_dim); parser.reorder_data(pad_per_dim);
std::vector<int64_t> pads(ndims * 2); std::vector<int64_t> pads(ndims * 2);
for(size_t i = 0; i < ndims; i++) for(int i = 0; i < ndims; i++)
{ {
pads[i] = pad_per_dim[i].first; pads[i] = pad_per_dim[i].first;
pads[i + ndims] = pad_per_dim[i].second; pads[i + ndims] = pad_per_dim[i].second;
......
...@@ -22,14 +22,14 @@ struct parse_slice : op_parser<parse_slice> ...@@ -22,14 +22,14 @@ struct parse_slice : op_parser<parse_slice>
auto starts = args[1]->eval().get<int32_t>().to_vector(); auto starts = args[1]->eval().get<int32_t>().to_vector();
auto size = args[2]->eval().get<int32_t>().to_vector(); auto size = args[2]->eval().get<int32_t>().to_vector();
auto axes = args[0]->get_shape().lens(); auto axes = args[0]->get_shape().lens();
size_t num_axes = axes.size(); int num_axes = axes.size();
std::vector<int64_t> axes_int64(axes.begin(), axes.end()); std::vector<int64_t> axes_int64(axes.begin(), axes.end());
std::vector<int64_t> starts_int64(starts.begin(), starts.end()); std::vector<int64_t> starts_int64(starts.begin(), starts.end());
std::vector<int64_t> ends(num_axes); std::vector<int64_t> ends(num_axes);
std::vector<int64_t> op_axes(num_axes); std::vector<int64_t> op_axes(num_axes);
std::iota(op_axes.begin(), op_axes.end(), 0); std::iota(op_axes.begin(), op_axes.end(), 0);
for(size_t i = 0; i < num_axes; i++) for(int i = 0; i < num_axes; i++)
{ {
if(size[i] == -1) if(size[i] == -1)
ends[i] = axes_int64[i]; ends[i] = axes_int64[i];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment