Unverified Commit 70d9faf7 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into mi200

parents a56c531c a60bdb67
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -37,20 +37,22 @@ namespace op { ...@@ -37,20 +37,22 @@ namespace op {
* Static allocate: * Static allocate:
* No inputs: `allocate()` * No inputs: `allocate()`
* `this.s` attribute set to the static output shape of the buffer. * `this.s` attribute set to the static output shape of the buffer.
* `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
* buffer size for that case
* *
* Dynamic allocate: * Dynamic allocate:
* One input: `allocate(output_dims)` * One input: `allocate(output_dims)`
* `output_dims` are the output buffer dimensions and has a static shape. * `output_dims` are the output buffer dimensions and has a static shape.
* Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute * Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
* time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have * shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
* dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the * will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
* compute_shape() will output `this.s`; `this.s` should be a dynamic shape. * then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
*/ */
struct allocate struct allocate
{ {
shape s{}; optional<shape> s;
// for dynamic allocate to set the buffer type // for dynamic allocate to set the buffer type
shape::type_t buf_type = shape::half_type; optional<shape::type_t> buf_type;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
...@@ -62,8 +64,12 @@ struct allocate ...@@ -62,8 +64,12 @@ struct allocate
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
if(s != shape()) if(s.has_value())
{ {
if(buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
}
if(inputs.size() == 1) if(inputs.size() == 1)
{ {
migraphx::check_shapes{inputs, *this, false}.only_dims(1); migraphx::check_shapes{inputs, *this, false}.only_dims(1);
...@@ -72,16 +78,20 @@ struct allocate ...@@ -72,16 +78,20 @@ struct allocate
{ {
migraphx::check_shapes{inputs, *this, false}.has(0); migraphx::check_shapes{inputs, *this, false}.has(0);
} }
return s; return s.value();
} }
else else
{ {
if(not buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
}
migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1); migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
const auto& out_dims = inputs.at(0); const auto& out_dims = inputs.at(0);
std::size_t max_val = std::numeric_limits<std::size_t>::max(); std::size_t max_val = std::numeric_limits<std::size_t>::max();
std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0), std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
shape::dynamic_dimension{0, max_val}); shape::dynamic_dimension{0, max_val});
return {buf_type, dyn_dims}; return {buf_type.value(), dyn_dims};
} }
} }
argument compute(const shape& output_shape, const std::vector<argument>& args) const argument compute(const shape& output_shape, const std::vector<argument>& args) const
...@@ -94,7 +104,11 @@ struct allocate ...@@ -94,7 +104,11 @@ struct allocate
{ {
std::vector<std::size_t> output_dims(output_shape.ndim()); std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); }); args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return argument{shape{buf_type, output_dims}}; if(s)
{
return argument{shape{s->type(), output_dims}};
}
return argument{shape{buf_type.value(), output_dims}};
} }
} }
}; };
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp> #include <migraphx/dyn_output.hpp>
#include <migraphx/par.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -95,11 +96,11 @@ struct binary : op_name<Derived> ...@@ -95,11 +96,11 @@ struct binary : op_name<Derived>
{ {
argument result{dyn_out.computed_shape}; argument result{dyn_out.computed_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) { visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
std::transform(input1.begin(), par_transform(input1.begin(),
input1.end(), input1.end(),
input2.begin(), input2.begin(),
output.begin(), output.begin(),
static_cast<const Derived&>(*this).apply()); static_cast<const Derived&>(*this).apply());
}); });
return result; return result;
} }
......
...@@ -72,8 +72,8 @@ struct dequantizelinear ...@@ -72,8 +72,8 @@ struct dequantizelinear
visit_all(x, x_zero_point)([&](auto input, auto zero_pts) { visit_all(x, x_zero_point)([&](auto input, auto zero_pts) {
visit_all(result, x_scale)([&](auto output, auto scales) { visit_all(result, x_scale)([&](auto output, auto scales) {
par_for(output_shape.elements(), [&](auto i) { par_for(output_shape.elements(), [&](auto i) {
output[i] = static_cast<double>(static_cast<int64_t>(input[i]) - output[i] = static_cast<double>(static_cast<double>(input[i]) -
static_cast<int64_t>(zero_pts[i])) * static_cast<double>(zero_pts[i])) *
scales[i]; scales[i];
}); });
}); });
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,31 +21,32 @@ ...@@ -21,31 +21,32 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #define MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#include <migraphx/argument.hpp> #include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace op {
struct context; struct isinf : unary<isinf>
struct miopen_int8_conv_pack
{ {
std::string name() const { return "gpu::int8_conv_pack"; } auto apply() const
shape compute_shape(const std::vector<shape>& inputs) const; {
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const; return [&](auto x) { return std::isinf(static_cast<double>(x)); };
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const }
std::string name() const { return "isinf"; }
shape compute_shape(std::vector<shape> inputs) const
{ {
return shapes.size() - 1; return unary<isinf>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
} }
}; };
} // namespace gpu } // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,11 +21,52 @@ ...@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <random> #include <random>
...@@ -47,22 +88,35 @@ struct multinomial ...@@ -47,22 +88,35 @@ struct multinomial
std::string name() const { return "multinomial"; } std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(2).only_dims(2); check_shapes{inputs, *this, true}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype)) if(inputs.back().ndim() < 1)
MIGRAPHX_THROW( MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."); if(dtype == shape::bool_type)
MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
return {dtype, {inputs.front().lens().front(), sample_size}}; // Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
{
if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
{
size_t batch = {inputs.front().max_lens().front()};
size_t sample_size{inputs.back().max_lens().back()};
return {dtype, {batch, sample_size}};
}
}
return {dtype,
{inputs.front().to_dynamic().dyn_dims().front(),
inputs.back().to_dynamic().dyn_dims().back()}};
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
size_t batch_size = output_shape.lens().front(); size_t batch_size = dyn_out.computed_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back(); size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back(); size_t sample_size = dyn_out.computed_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) { visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) { result.visit([&](auto output) {
...@@ -70,13 +124,16 @@ struct multinomial ...@@ -70,13 +124,16 @@ struct multinomial
auto idx = args[1].get_shape().multi(i); auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size); auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size; auto cdf_end = cdf_begin + class_size;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto sample_iter = auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
// convert iterator to an integer index
output[i] = std::distance(cdf_begin, sample_iter); output[i] = std::distance(cdf_begin, sample_iter);
}); });
}); });
}); });
return result; return result;
} }
}; };
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,25 +21,29 @@ ...@@ -21,25 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP #define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#include <migraphx/argument.hpp> #include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <fenv.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace ref { namespace op {
struct nearbyint : unary<nearbyint>
void migemm( {
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta); auto apply() const
void migemm(const argument& c_arg, {
const argument& a_arg, return [](auto x) {
const argument& b_arg, auto rounding_mode = fegetround();
int32_t alpha, fesetround(FE_TONEAREST);
int32_t beta); return std::nearbyint(x);
fesetround(rounding_mode);
} // namespace ref };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -40,6 +40,8 @@ namespace op { ...@@ -40,6 +40,8 @@ namespace op {
* 2. use_rank (default) vs use_len: * 2. use_rank (default) vs use_len:
* `use_rank` sets the max value/index of the attribute as the rank of lens. * `use_rank` sets the max value/index of the attribute as the rank of lens.
* `use_lens` sets the max value/index as the corresponding value in lens at the axes index. * `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
* Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
* (no normalization) if any of dynamic_dimension[axes] are not fixed.
* 3. `clip_min` vs. `not_clip_min` (default): * 3. `clip_min` vs. `not_clip_min` (default):
* Clip values less than the minimum to the minimum or not. * Clip values less than the minimum to the minimum or not.
* 4. `include_min` vs. `exclude_min` (default): * 4. `include_min` vs. `exclude_min` (default):
......
...@@ -70,7 +70,8 @@ struct pooling ...@@ -70,7 +70,8 @@ struct pooling
// 2 smaller than the input tensor rank (NCHW layout) // 2 smaller than the input tensor rank (NCHW layout)
std::vector<std::size_t> lengths = {1, 1}; std::vector<std::size_t> lengths = {1, 1};
// Dilations are not supported at this time. // Spacing between the elements of the pooling kernel. Must be the same ndim as lengths.
std::vector<std::size_t> dilations = {1, 1};
// ceiling mode is a flag affecting output size // ceiling mode is a flag affecting output size
// or equivalently, placements of the pooling kernel. // or equivalently, placements of the pooling kernel.
...@@ -99,6 +100,7 @@ struct pooling ...@@ -99,6 +100,7 @@ struct pooling
f(self.padding_mode, "padding_mode"), f(self.padding_mode, "padding_mode"),
f(self.stride, "stride"), f(self.stride, "stride"),
f(self.lengths, "lengths"), f(self.lengths, "lengths"),
f(self.dilations, "dilations"),
f(self.ceil_mode, "ceil_mode"), f(self.ceil_mode, "ceil_mode"),
f(self.lp_order, "lp_order"), f(self.lp_order, "lp_order"),
f(self.dyn_global, "dyn_global")); f(self.dyn_global, "dyn_global"));
...@@ -112,14 +114,17 @@ struct pooling ...@@ -112,14 +114,17 @@ struct pooling
return; return;
if((padding_mode != default_ and padding.size() != stride.size() and if((padding_mode != default_ and padding.size() != stride.size() and
(padding.size()) != stride.size() * 2) or (padding.size()) != stride.size() * 2) or
stride.size() != lengths.size()) stride.size() != lengths.size() or dilations.size() != lengths.size())
{ {
MIGRAPHX_THROW("POOLING: inconsistent attribute sizes"); MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
} }
if(std::any_of(lengths.begin(), lengths.end(), [&](auto i) { return (i == 0); }) or
std::any_of(stride.begin(), stride.end(), [&](auto i) { return (i == 0); })) const auto is_zero = [](auto el) { return el == 0; };
if(std::any_of(lengths.begin(), lengths.end(), is_zero) or
std::any_of(stride.begin(), stride.end(), is_zero) or
std::any_of(dilations.begin(), dilations.end(), is_zero))
{ {
MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride"); MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride or dilations");
} }
// TODO: update lowering to run the reference // TODO: update lowering to run the reference
...@@ -142,6 +147,11 @@ struct pooling ...@@ -142,6 +147,11 @@ struct pooling
value attributes() const { return {{"normalize_padding", "padding"}}; } value attributes() const { return {{"normalize_padding", "padding"}}; }
inline std::size_t dilate_dim(std::size_t dim, std::size_t dilation) const
{
return 1 + dilation * (dim - 1);
}
std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens, std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens,
std::size_t kdims) const std::size_t kdims) const
{ {
...@@ -151,8 +161,9 @@ struct pooling ...@@ -151,8 +161,9 @@ struct pooling
std::size_t padding_factor = 2 * padding[i]; std::size_t padding_factor = 2 * padding[i];
if(padding.size() == 2 * kdims) if(padding.size() == 2 * kdims)
padding_factor = padding[i] + padding[i + kdims]; padding_factor = padding[i] + padding[i + kdims];
std::size_t dilated_length = dilate_dim(lengths[i], dilations[i]);
std::size_t dim_size; std::size_t dim_size;
if(input_lens[i + 2] + padding_factor < lengths[i]) if(input_lens[i + 2] + padding_factor < dilated_length)
{ {
if(padding_mode == default_) if(padding_mode == default_)
MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size"); MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
...@@ -162,7 +173,7 @@ struct pooling ...@@ -162,7 +173,7 @@ struct pooling
} }
else else
{ {
dim_size = input_lens[i + 2] + padding_factor - lengths[i]; dim_size = input_lens[i + 2] + padding_factor - dilated_length;
} }
std::size_t len = std::size_t len =
(ceil_mode) (ceil_mode)
...@@ -331,6 +342,7 @@ struct pooling ...@@ -331,6 +342,7 @@ struct pooling
int start = static_cast<int>(idx_o[dim] * stride[d_2]) - int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
static_cast<int>(padding_vals[d_2]); static_cast<int>(padding_vals[d_2]);
int end; int end;
std::size_t dilated_kernel_dim = dilate_dim(kernel_dims[d_2], dilations[d_2]);
// NOLINT // NOLINT
if(count_include_pad and ceil_mode and (mode != pooling_mode::max)) if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
{ {
...@@ -340,15 +352,14 @@ struct pooling ...@@ -340,15 +352,14 @@ struct pooling
// padding. Clip out-of-bounds indexes but not padding. // padding. Clip out-of-bounds indexes but not padding.
// Check if this kernel extends beyond the padding at end of dimension // Check if this kernel extends beyond the padding at end of dimension
end = std::min(start + kernel_dims[d_2], end = std::min(start + dilated_kernel_dim,
in_lens[dim] + static_cast<int>(padding_vals[d_2])); in_lens[dim] + static_cast<int>(padding_vals[d_2]));
} }
else else
{ {
// In non-ceiling mode, when // In non-ceiling mode, when
// count_include_pad is false, or for max pooling, clip off padding. // count_include_pad is false, or for max pooling, clip off padding.
end = std::min(start + kernel_dims[d_2], in_lens[dim]); end = std::min(start + dilated_kernel_dim, in_lens[dim]);
start = std::max(start, 0);
} }
win_start.push_back(start); win_start.push_back(start);
if(end < start) if(end < start)
...@@ -366,6 +377,16 @@ struct pooling ...@@ -366,6 +377,16 @@ struct pooling
// for each element in the window... // for each element in the window...
shape_for_each(win_shape, [&](const auto& idx_w) { shape_for_each(win_shape, [&](const auto& idx_w) {
// Skip elements that belong to the dilated area
for(size_t axis = 0; axis < idx_w.size(); ++axis)
{
if(idx_w[axis] % dilations[axis])
{
pool_size -= 1;
return;
}
}
// the coordinates of this element // the coordinates of this element
auto idx = idx_o; auto idx = idx_o;
...@@ -390,7 +411,15 @@ struct pooling ...@@ -390,7 +411,15 @@ struct pooling
// this is a padding element. Padding locations // this is a padding element. Padding locations
// don't contribute to average or max pooling total but can play in // don't contribute to average or max pooling total but can play in
// lpnorm pooling. // lpnorm pooling.
output_val = op(output_val, 0); if(mode == pooling_mode::lpnorm)
{
output_val = op(output_val, op.template init<Type>());
}
if(mode == pooling_mode::average)
{
// Ignore padding
pool_size -= 1;
}
} }
}); });
output[i] = Type(op.final(output_val, pool_size)); output[i] = Type(op.final(output_val, pool_size));
......
...@@ -22,6 +22,12 @@ ...@@ -22,6 +22,12 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <migraphx/op/common.hpp> #include <migraphx/op/common.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp> #include <migraphx/check_shapes.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/convolution.hpp> #include <migraphx/convolution.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
...@@ -87,11 +88,13 @@ struct quant_convolution ...@@ -87,11 +88,13 @@ struct quant_convolution
} }
// all input type must be int8_type and output is float_type // all input type must be int8_type and output is float_type
if(t != shape::int8_type) std::set<migraphx::shape::type_t> supported_types = {shape::int8_type,
shape::fp8e4m3fnuz_type};
if(not contains(supported_types, t))
{ {
MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t"); MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t or "
"fp8e4m3fnuz_type");
} }
t = shape::int32_type;
std::vector<size_t> output_lens{input.lens()[0], weights.lens()[0]}; std::vector<size_t> output_lens{input.lens()[0], weights.lens()[0]};
auto padding_size = padding.size(); auto padding_size = padding.size();
...@@ -107,8 +110,11 @@ struct quant_convolution ...@@ -107,8 +110,11 @@ struct quant_convolution
stride[i] + stride[i] +
1))); 1)));
} }
if(t == shape::int8_type)
return inputs[0].with_lens(t, output_lens); {
return inputs[0].with_lens(shape::int32_type, output_lens);
} // else fp8 conv
return inputs[0].with_lens(shape::float_type, output_lens);
} }
size_t kdims() const size_t kdims() const
......
...@@ -44,9 +44,11 @@ struct quant_dot ...@@ -44,9 +44,11 @@ struct quant_dot
const shape& a = inputs.at(0); const shape& a = inputs.at(0);
const shape& b = inputs.at(1); const shape& b = inputs.at(1);
auto t = a.type(); auto t = a.type();
if(t != shape::int8_type) std::set<migraphx::shape::type_t> suppported_types = {shape::int8_type,
shape::fp8e4m3fnuz_type};
if(not contains(suppported_types, t))
{ {
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t"); MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t and fp8e4m3fnuz_type");
} }
if(not std::all_of( if(not std::all_of(
...@@ -73,6 +75,10 @@ struct quant_dot ...@@ -73,6 +75,10 @@ struct quant_dot
auto out_lens = a.lens(); auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1]; out_lens[dim_1] = b.lens()[dim_1];
if(t == shape::fp8e4m3fnuz_type)
{
return {shape::float_type, out_lens};
} // else int8 gemm
return {shape::int32_type, out_lens}; return {shape::int32_type, out_lens};
} }
}; };
......
...@@ -30,11 +30,11 @@ ...@@ -30,11 +30,11 @@
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <cmath> #include <cmath>
#include <fenv.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace op { namespace op {
struct quantizelinear struct quantizelinear
{ {
std::string name() const { return "quantizelinear"; } std::string name() const { return "quantizelinear"; }
...@@ -71,26 +71,26 @@ struct quantizelinear ...@@ -71,26 +71,26 @@ struct quantizelinear
{ {
y_zero_point = args.at(2); y_zero_point = args.at(2);
} }
argument result{output_shape}; argument result{output_shape};
auto rounding_mode = fegetround();
fesetround(FE_TONEAREST);
visit_all(result, y_zero_point)([&](auto output, auto zero_pts) { visit_all(result, y_zero_point)([&](auto output, auto zero_pts) {
visit_all(x, y_scale)([&](auto input, auto scales) { visit_all(x, y_scale)([&](auto input, auto scales) {
using quant_type = typename decltype(output)::value_type; using quant_type = typename decltype(output)::value_type;
auto min_value = std::numeric_limits<quant_type>::min(); auto min_value = std::numeric_limits<quant_type>::min();
auto max_value = std::numeric_limits<quant_type>::max(); auto max_value = std::numeric_limits<quant_type>::max();
par_for(output_shape.elements(), [&](auto i) { par_for(output_shape.elements(), [&](auto i) {
int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) + double quantized = static_cast<double>(std::nearbyint(input[i] / scales[i])) +
static_cast<int64_t>(zero_pts[i]); static_cast<double>(zero_pts[i]);
output[i] = std::max(static_cast<int64_t>(min_value), output[i] = std::max(static_cast<double>(min_value),
std::min(static_cast<int64_t>(max_value), quantized)); std::min(static_cast<double>(max_value), quantized));
}); });
}); });
}); });
fesetround(rounding_mode);
return result; return result;
} }
}; };
} // namespace op } // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -65,11 +65,10 @@ struct random_uniform ...@@ -65,11 +65,10 @@ struct random_uniform
return inputs.at(1); return inputs.at(1);
} }
argument compute(const shape&, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
// Output goes into the passed buffer, not the shape output. // Output goes into the passed buffer, not the shape output.
auto result = args[1]; argument result{dyn_out.computed_shape};
uint64_t local_seed = args[0].at<uint64_t>(0); uint64_t local_seed = args[0].at<uint64_t>(0);
std::mt19937 gen(local_seed); std::mt19937 gen(local_seed);
......
...@@ -112,84 +112,6 @@ struct reshape ...@@ -112,84 +112,6 @@ struct reshape
return {s0.type(), output_dyn_dims}; return {s0.type(), output_dyn_dims};
} }
template <class Iterator>
static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
{
std::size_t x = 1;
auto it = std::find_if(start, last, [&](auto i) {
x *= i;
return x >= dim;
});
if(x != dim)
return start;
return it;
}
// This will attempt to alias the dimensions of the input shape to the lens of
// `rdims`. Unlike reshape_lazy though we can modify memory layout with copies and this
// can remove previous nullopts that were sent back for the alias case
static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
{
if(input.standard())
return shape{input.type(), rdims};
const auto& idims = input.lens();
const auto& istrides = input.strides();
std::vector<std::size_t> rstrides;
std::size_t i = 0;
std::size_t r = 0;
while(i < idims.size() and r < rdims.size())
{
auto idim = idims[i];
auto rdim = rdims[r];
if(rdim == idim)
{
rstrides.push_back(istrides[i]);
}
// squeeze
else if(rdim > idim)
{
auto start = idims.begin() + i;
auto it = compute_end_dim(start, idims.end(), rdim);
auto n = it - start;
assert((i + n) <= istrides.size());
i += n;
rstrides.push_back(istrides[i]);
}
// unsqueeze
else // if(rdim < idim)
{
auto start = rdims.begin() + i;
auto it = compute_end_dim(start, rdims.end(), idim);
auto n = it - start;
assert((r + n) <= rdims.size());
auto stride = istrides[i] * idim;
std::for_each(start, it + 1, [&](auto dim) {
stride /= dim;
rstrides.push_back(stride);
});
r += n;
}
i++;
r++;
}
// Handle trailing 1s
if(rstrides.size() < rdims.size() and not rstrides.empty())
{
auto stride = rstrides.back();
for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
{
(void)d;
rstrides.push_back(stride);
}
}
return shape{input.type(), rdims, rstrides};
}
shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this}.has(1);
...@@ -219,14 +141,14 @@ struct reshape ...@@ -219,14 +141,14 @@ struct reshape
} }
} }
auto s = reshape_dims(inputs.front(), rdims); auto s = shape{inputs.front().type(), rdims};
if(s->elements() != inputs.front().elements()) if(s.elements() != inputs.front().elements())
MIGRAPHX_THROW("reshape: Wrong number of elements for reshape: reshape has " + MIGRAPHX_THROW("reshape: Wrong number of elements for reshape: reshape has " +
std::to_string(s->elements()) + " elements whereas the input has " + std::to_string(s.elements()) + " elements whereas the input has " +
std::to_string(inputs.front().elements())); std::to_string(inputs.front().elements()));
return *s; return s;
} }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
......
...@@ -110,22 +110,69 @@ struct reshape_lazy ...@@ -110,22 +110,69 @@ struct reshape_lazy
return it; return it;
} }
template <class OptionalPair>
static OptionalPair try_merge_pairs(OptionalPair p2, OptionalPair p1)
{
if(not p1.has_value())
return nullopt;
if(not p2.has_value())
return nullopt;
auto dim1 = p1->first;
auto dim2 = p2->first;
auto stride1 = p1->second;
auto stride2 = p2->second;
auto elements = dim1 * dim2;
// Transposed
if(stride2 > stride1)
return nullopt;
// Broadcasted check to avoid division by zero
if(stride2 == 0)
{
if(stride1 == 0)
return {{elements, 0}};
return nullopt;
}
if(stride1 % stride2 != 0)
return nullopt;
auto space = (stride1 * dim1 + stride2 * dim2 - stride1) / stride2;
// Nonpacked
if(space != elements)
return nullopt;
return {{elements, stride2}};
}
template <class DimIterator, class StrideIterator>
static optional<std::size_t> merge_strides(DimIterator dim_start,
DimIterator dim_last,
StrideIterator stride_start,
StrideIterator stride_last)
{
if(dim_start == dim_last)
return nullopt;
(void)stride_start; // Is only used in the assert
assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
auto make_pair_optional = [&](auto dim, auto stride) {
return std::make_optional(std::make_pair(dim, stride));
};
auto dim_stride_pair =
std::inner_product(std::make_reverse_iterator(dim_last - 1),
std::make_reverse_iterator(dim_start),
std::make_reverse_iterator(stride_last - 1),
make_pair_optional(*std::prev(dim_last), *std::prev(stride_last)),
MIGRAPHX_LIFT(try_merge_pairs),
make_pair_optional);
if(not dim_stride_pair.has_value())
return nullopt;
return dim_stride_pair->second;
}
template <class DimIterator, class StrideIterator> template <class DimIterator, class StrideIterator>
static auto can_strides_merge(DimIterator dim_start, static auto can_strides_merge(DimIterator dim_start,
DimIterator dim_last, DimIterator dim_last,
StrideIterator stride_start, StrideIterator stride_start,
StrideIterator stride_last) StrideIterator stride_last)
{ {
assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last)); return merge_strides(dim_start, dim_last, stride_start, stride_last).has_value();
auto cstride = *std::prev(stride_last);
return std::equal(std::make_reverse_iterator(dim_last),
std::make_reverse_iterator(dim_start + 1),
std::make_reverse_iterator(stride_last - 1),
std::make_reverse_iterator(stride_start),
[&](auto dim, auto stride) {
cstride *= dim;
return stride == cstride;
});
} }
// This will attempt to alias the dimensions of the input shape to the lens of // This will attempt to alias the dimensions of the input shape to the lens of
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,25 +21,26 @@ ...@@ -21,25 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_MAX_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP #define MIGRAPHX_GUARD_OPERATORS_SCATTERND_MAX_HPP
#include <migraphx/program.hpp> #include <migraphx/op/scatternd_op.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace op {
namespace gpu { struct scatternd_max : scatternd_op<scatternd_max>
struct MIGRAPHX_GPU_EXPORT pack_int8_args
{ {
std::string name() const { return "gpu::pack_int8_args"; } scatternd_max() {}
void apply(module& m) const;
shape pack_int8_shape(const shape& s) const; auto reduction() const
{
return [](auto& x, const auto& y) { x = std::max(x, y); };
}
}; };
} // namespace gpu } // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,23 +21,26 @@ ...@@ -21,23 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_GATHER_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCATTERND_MIN_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_GATHER_HPP #define MIGRAPHX_GUARD_OPERATORS_SCATTERND_MIN_HPP
#include <migraphx/argument.hpp> #include <migraphx/op/scatternd_op.hpp>
#include <migraphx/gpu/device/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace op {
namespace device {
argument MIGRAPHX_DEVICE_EXPORT struct scatternd_min : scatternd_op<scatternd_min>
gather(hipStream_t stream, argument result, argument arg1, argument arg2, int64_t axis); {
scatternd_min() {}
} // namespace device auto reduction() const
} // namespace gpu {
return [](auto& x, const auto& y) { x = std::min(x, y); };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -121,7 +121,8 @@ struct scatternd_op : op_name<Derived> ...@@ -121,7 +121,8 @@ struct scatternd_op : op_name<Derived>
auto k = indices_shape.lens().back(); auto k = indices_shape.lens().back();
auto q = indices_shape.ndim(); auto q = indices_shape.ndim();
auto r = dyn_out.computed_shape.ndim(); auto r = dyn_out.computed_shape.ndim();
par_for(updates_shape.elements(), [&](const auto i) { for(auto i = 0u; i < updates_shape.elements(); ++i)
{
auto updates_idx = updates_std.multi(i); auto updates_idx = updates_std.multi(i);
std::vector<std::size_t> indices_idx(q, 0); std::vector<std::size_t> indices_idx(q, 0);
std::copy( std::copy(
...@@ -135,7 +136,7 @@ struct scatternd_op : op_name<Derived> ...@@ -135,7 +136,7 @@ struct scatternd_op : op_name<Derived>
std::copy(updates_idx.begin() + q - 1, updates_idx.end(), out_idx.begin() + k); std::copy(updates_idx.begin() + q - 1, updates_idx.end(), out_idx.begin() + k);
self.reduction()(output[dyn_out.computed_shape.index(out_idx)], updates[i]); self.reduction()(output[dyn_out.computed_shape.index(out_idx)], updates[i]);
}); }
}); });
}); });
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <migraphx/dyn_output.hpp> #include <migraphx/dyn_output.hpp>
#include <migraphx/op/normalize_attribute.hpp> #include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/normalize_attributes.hpp> #include <migraphx/normalize_attributes.hpp>
#include <array>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -38,6 +39,18 @@ namespace op { ...@@ -38,6 +39,18 @@ namespace op {
/** /**
* Slice operator that accepts variable axes, starts and ends. * Slice operator that accepts variable axes, starts and ends.
* All of `starts`, `ends`, and `axes` must be supplied by either
* their attribute or an input (but not both).
*
* Valid calls:
* slice(input); axes, starts, ends set
* slice(input, starts); axes, ends set
* slice(input, ends); starts, axes set
* slice(input, axes); starts, ends set
* slice(input, starts, ends); axes set
* slice(input, starts, axes); ends set
* slice(input, ends, axes); starts set
* slice(input, start, ends, axes); none set
* *
* Attributes: * Attributes:
* axes: constant axes to slice over (optional) * axes: constant axes to slice over (optional)
...@@ -46,8 +59,8 @@ namespace op { ...@@ -46,8 +59,8 @@ namespace op {
* *
* Parameters: * Parameters:
* data: the input tensor to slice (dynamic or static shape) * data: the input tensor to slice (dynamic or static shape)
* input_starts: starting indicies of slice (optional, static shape) * input_starts: starting indices of slice (optional, static shape)
* input_ends: ending indicies of slice (optional, static shape) * input_ends: ending indices of slice (optional, static shape)
* input_axes: axes to slice over (optional, static shape) * input_axes: axes to slice over (optional, static shape)
*/ */
struct slice struct slice
...@@ -56,6 +69,18 @@ struct slice ...@@ -56,6 +69,18 @@ struct slice
std::vector<int64_t> starts{}; std::vector<int64_t> starts{};
std::vector<int64_t> ends{}; std::vector<int64_t> ends{};
/**
* Named arrays for the set attribute possibilities.
*/
static constexpr std::array<bool, 3> all_set = {true, true, true};
static constexpr std::array<bool, 3> ends_axes = {false, true, true};
static constexpr std::array<bool, 3> starts_axes = {true, false, true};
static constexpr std::array<bool, 3> starts_ends = {true, true, false};
static constexpr std::array<bool, 3> axes_only = {false, false, true};
static constexpr std::array<bool, 3> ends_only = {false, true, false};
static constexpr std::array<bool, 3> starts_only = {true, false, false};
static constexpr std::array<bool, 3> none_set = {false, false, false};
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
...@@ -63,24 +88,26 @@ struct slice ...@@ -63,24 +88,26 @@ struct slice
} }
/** /**
* Ensure that attribute vectors axes, starts, and ends are all the same size and values are * Ensure that attribute axes is within limits.
* within limits. * Will attempt to normalize starts and ends; but will use the dynamic_dimension.max
* values for dynamic shapes. This makes it so you have to renormalize for
* non-fixed dynamic_dimensions.
*/ */
value attributes() const value attributes() const
{ {
value normalize = value::object{}; value normalize_axes = value::object{};
normalize["axes"] = value::array{normalize_attribute::include_min}; normalize_axes["axes"] = value::array{normalize_attribute::include_min};
normalize["starts"] = value::array{normalize_attribute::clip_max, normalize_axes["starts"] = value::array{normalize_attribute::clip_max,
normalize_attribute::clip_min, normalize_attribute::clip_min,
normalize_attribute::include_max, normalize_attribute::include_max,
normalize_attribute::use_len, normalize_attribute::use_len,
normalize_attribute::include_min}; normalize_attribute::include_min};
normalize["ends"] = value::array{normalize_attribute::clip_max, normalize_axes["ends"] = value::array{normalize_attribute::clip_max,
normalize_attribute::clip_min, normalize_attribute::clip_min,
normalize_attribute::include_max, normalize_attribute::include_max,
normalize_attribute::use_len, normalize_attribute::use_len,
normalize_attribute::include_min}; normalize_attribute::include_min};
return {{"normalize_axes", normalize}}; return {{"normalize_axes", normalize_axes}};
} }
std::string name() const { return "slice"; } std::string name() const { return "slice"; }
...@@ -88,7 +115,7 @@ struct slice ...@@ -88,7 +115,7 @@ struct slice
/** /**
* Computes the slice output shape dimensions for given starts, ends,and axes. * Computes the slice output shape dimensions for given starts, ends,and axes.
* Templated to also handle tensor views. * Templated to also handle tensor views.
* Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this * Possibly different type between [in_starts, in_ends] and [in_axes] if in_axes is this
* object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid. * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
*/ */
template <class A, class B> template <class A, class B>
...@@ -104,62 +131,160 @@ struct slice ...@@ -104,62 +131,160 @@ struct slice
return new_lens; return new_lens;
} }
shape normalize_compute_shape(std::vector<shape> inputs) const /// Get the attributes that are non-empty
std::array<bool, 3> get_set_attributes() const
{ {
check_shapes{inputs, *this, true}.has(1, 3, 4); std::array<std::vector<int64_t>, 3> attrs = {this->starts, this->ends, this->axes};
auto input_shape = inputs[0]; std::array<bool, 3> bool_vec;
if(inputs.size() == 1) std::transform(
attrs.cbegin(), attrs.cend(), bool_vec.begin(), [](auto a) { return not a.empty(); });
return bool_vec;
}
/// Helper function for normalize_compute_shape()
shape compute_two_or_more(std::vector<shape> inputs) const
{
auto input_shape = inputs[0];
auto set_attributes = get_set_attributes();
// check that inputs [1, end) are all 1D, have the same
// dimension, and are static
check_shapes{inputs.begin() + 1,
inputs.end(),
std::string("SLICE: inputs (starts, ends, and input_axes)"),
false}
.only_dims(1)
.same_dims();
auto dds = input_shape.to_dynamic().dyn_dims();
if(inputs.size() == 2)
{ {
auto t = input_shape.type(); if(set_attributes == ends_axes)
if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
return not input_shape.dyn_dims()[axis].is_fixed();
}))
{ {
MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis "); // attr ends and axes set; inputs are (data, input_starts)
if(inputs[1].lens().at(0) != axes.size())
{
MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
}
std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
dds.at(axis) = {0, dds.at(axis).max};
});
} }
if(input_shape.dynamic()) else if(set_attributes == starts_axes)
{ {
return shape{t, // attr starts and axes set; inputs are (data, input_ends)
lens_calc(input_shape.min_lens(), starts, ends, axes), if(inputs[1].lens().at(0) != axes.size())
lens_calc(input_shape.max_lens(), starts, ends, axes), {
{}}; MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
}
std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
dds.at(axis) = {0, dds.at(axis).max};
});
}
else if(set_attributes == starts_ends)
{
// attr starts and ends set; inputs are (data, input_axes)
if(inputs[1].lens().at(0) != starts.size())
{
MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
}
std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
return shape::dynamic_dimension{0, dd.max};
});
} }
else else
{ {
return shape{ MIGRAPHX_THROW("SLICE: Invalid 2 input and attributes configuration");
t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
} }
} }
else else if(inputs.size() == 3)
{ {
// check that starts, ends, and optionally input_axes are all 1D, have the same if(set_attributes == axes_only)
// dimension, and are static
check_shapes{inputs.begin() + 1,
inputs.end(),
std::string("SLICE: inputs (starts, ends, and input_axes)"),
false}
.only_dims(1)
.same_dims();
auto dds = input_shape.to_dynamic().dyn_dims();
if(inputs.size() == 3)
{ {
// attr axes set; inputs are (data, input_starts, input_ends)
if(inputs[1].lens().at(0) != axes.size()) if(inputs[1].lens().at(0) != axes.size())
{ {
MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension " MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
"as the axes attribute");
} }
std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) { std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
dds.at(axis) = {0, dds.at(axis).max}; dds.at(axis) = {0, dds.at(axis).max};
}); });
} }
else else if(set_attributes == ends_only)
{
// attr ends set; inputs are (data, input_starts, input_axes)
if(inputs[1].lens().at(0) != ends.size())
{
MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
}
std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
return shape::dynamic_dimension{0, dd.max};
});
}
else if(set_attributes == starts_only)
{ {
// if axes is an input, then all the output dimensions could be 0 to the max value // attr starts set; inputs are (data, input_ends, input_axes)
if(inputs[1].lens().at(0) != starts.size())
{
MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
}
std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) { std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
return shape::dynamic_dimension{0, dd.max}; return shape::dynamic_dimension{0, dd.max};
}); });
} }
return shape{input_shape.type(), dds}; else
{
MIGRAPHX_THROW("Invalid 3 input and attributes configuration");
}
}
else
{
// all 4 inputs (data, inputs_starts, input_ends, input_axes)
std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
return shape::dynamic_dimension{0, dd.max};
});
}
return shape{input_shape.type(), dds};
}
// uses the normalize_axes flag to normalize axes, starts, and ends
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this, true}.has(1, 2, 3, 4);
if(inputs.size() == 1)
{
auto input_shape = inputs[0];
auto set_attributes = get_set_attributes();
if(set_attributes != all_set)
{
MIGRAPHX_THROW("SLICE 1_arg: Invalid 1 input and attributes configuration");
}
// NOTE: make sure to update how normalization works here if this type of slicing is
// changed to be allowed
if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
return not input_shape.dyn_dims()[axis].is_fixed();
}))
{
MIGRAPHX_THROW(
"SLICE 1_arg: slicing is not allowed on non-fixed dynamic input axis ");
}
if(input_shape.dynamic())
{
return shape{
input_shape.type(),
lens_calc(input_shape.min_lens(), this->starts, this->ends, this->axes),
lens_calc(input_shape.max_lens(), this->starts, this->ends, this->axes),
{}};
}
else
{
return shape{input_shape.type(),
lens_calc(input_shape.lens(), this->starts, this->ends, this->axes),
input_shape.strides()};
}
}
else
{
return compute_two_or_more(inputs);
} }
} }
...@@ -194,14 +319,14 @@ struct slice ...@@ -194,14 +319,14 @@ struct slice
/** /**
* Calculates the starting offset for the sliced tensor (for aliasing). * Calculates the starting offset for the sliced tensor (for aliasing).
* Used when the starts and/or the axes are inputs. * Used for 2-4 inputs to `slice.
* *
* \param s static input shape * \param s static input shape
* \param input_starts starting indices of slice * \param input_starts starting indices of slice
* \param ax_vec axes to slice on * \param ax_vec axes to slice on
*/ */
template <class IndView, class Axes> template <class T>
auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const auto compute_offset(const shape& s, const T& input_starts, const T& ax_vec) const
{ {
auto ret = 0; auto ret = 0;
for(std::size_t i = 0; i < ax_vec.size(); ++i) for(std::size_t i = 0; i < ax_vec.size(); ++i)
...@@ -212,106 +337,168 @@ struct slice ...@@ -212,106 +337,168 @@ struct slice
return ret * s.type_size(); return ret * s.type_size();
} }
std::unordered_map<std::string, std::vector<int64_t>>
normalize_inputs(const shape& input_shape,
const std::vector<int64_t>& input_starts,
const std::vector<int64_t>& input_ends) const
{
auto attrs = this->attributes().at("normalize_axes");
return {{"input_starts",
normalize_indices(input_starts,
this->axes,
input_shape,
attrs.at("starts"),
"Slice variable input_starts")},
{"input_ends",
normalize_indices(input_ends,
this->axes,
input_shape,
attrs.at("ends"),
"Slice variable input_ends")}};
}
/** /**
* Three input version of the normalize_inputs. * If given, normalize the inputs. Otherwise get from operator attributes.
* This one also checks that the input_axes are valid. * Return the values in a map.
*
* Parameters
* input_shape: static shape of the input
* input_starts: optional
* input_ends: optional
* input_ends: optional
*/ */
std::unordered_map<std::string, std::vector<int64_t>> std::unordered_map<std::string, std::vector<int64_t>>
normalize_inputs(shape input_shape, normalize_starts_ends_axes(shape input_shape,
const std::vector<int64_t>& input_starts, const optional<std::vector<int64_t>>& input_starts,
const std::vector<int64_t>& input_ends, const optional<std::vector<int64_t>>& input_ends,
const std::vector<int64_t>& input_axes) const const optional<std::vector<int64_t>>& input_axes) const
{ {
auto attrs = this->attributes().at("normalize_axes"); auto axes_attrs = this->attributes().at("normalize_axes");
auto norm_axes = std::vector<int64_t> norm_starts;
normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes"); std::vector<int64_t> norm_ends;
return {{"input_starts", std::vector<int64_t> norm_axes;
normalize_indices(input_starts, if(input_axes)
norm_axes, {
input_shape, norm_axes = normalize_axes(input_axes.value(),
attrs.at("starts"), input_shape,
"Slice variable input_starts")}, axes_attrs.at("axes"),
{"input_ends", "Slice variable input_axes");
normalize_indices(input_ends, }
norm_axes, else
input_shape, {
attrs.at("ends"), norm_axes = this->axes;
"Slice variable input ends")}, }
{"input_axes", norm_axes}}; if(input_starts)
{
norm_starts = normalize_indices(input_starts.value(),
norm_axes,
input_shape,
axes_attrs.at("starts"),
"Slice variable input_starts");
}
else
{
norm_starts = this->starts;
}
if(input_ends)
{
norm_ends = normalize_indices(input_ends.value(),
norm_axes,
input_shape,
axes_attrs.at("ends"),
"Slice variable input ends");
}
else
{
norm_ends = this->ends;
}
return {{"norm_starts", norm_starts}, {"norm_ends", norm_ends}, {"norm_axes", norm_axes}};
} }
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
auto input = args[0]; auto input = args[0];
auto input_shape = input.get_shape(); auto input_shape = input.get_shape();
switch(args.size()) if(args.size() == 1)
{ {
case 1: {
std::size_t offset = compute_offset(input_shape); std::size_t offset = compute_offset(input_shape);
return {dyn_out.computed_shape, [=] { return input.data() + offset; }}; return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
} }
case 3: { else
shape calc_shape; {
std::size_t offset = 0; // Note that we re-normalize both the attributes and inputs because of the non-fixed
visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) { // dynamic input shape case. It's possible to only re-normalize if slicing over
auto norm_inputs = normalize_inputs(input_shape, // non-fixed dynamic_dimensions.
input_starts.template to_vector<int64_t>(), auto set_attributes = get_set_attributes();
input_ends.template to_vector<int64_t>()); std::unordered_map<std::string, std::vector<int64_t>> norm_inputs;
offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes); if(set_attributes == ends_axes)
calc_shape = {input_shape.type(), {
lens_calc(input_shape.lens(), // attr ends and axes set; inputs are (data, input_starts)
norm_inputs.at("input_starts"), args[1].visit([&](auto input_starts) {
norm_inputs.at("input_ends"), norm_inputs =
this->axes), normalize_starts_ends_axes(input_shape,
input_shape.strides()}; input_starts.template to_vector<int64_t>(),
}); this->ends,
return {calc_shape, [=] { return input.data() + offset; }}; this->axes);
} });
case 4: { }
shape calc_shape; else if(set_attributes == starts_axes)
std::size_t offset = 0; {
visit_all(args[1], args[2], args[3])( // attr starts and axes set; inputs are (data, input_ends)
[&](auto input_starts, auto input_ends, auto input_axes) { args[1].visit([&](auto input_ends) {
auto norm_inputs = normalize_inputs(input_shape, norm_inputs =
input_starts.template to_vector<int64_t>(), normalize_starts_ends_axes(input_shape,
input_ends.template to_vector<int64_t>(), this->starts,
input_axes.template to_vector<int64_t>()); input_ends.template to_vector<int64_t>(),
offset = compute_offset( this->axes);
input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes")); });
calc_shape = shape{input_shape.type(), }
lens_calc(input_shape.lens(), else if(set_attributes == starts_ends)
norm_inputs.at("input_starts"), {
norm_inputs.at("input_ends"), // attr starts and ends set; inputs are (data, input_axes)
norm_inputs.at("input_axes")), args[1].visit([&](auto input_axes) {
input_shape.strides()}; norm_inputs =
normalize_starts_ends_axes(input_shape,
this->starts,
this->ends,
input_axes.template to_vector<int64_t>());
}); });
}
else if(set_attributes == axes_only)
{
// attr axes set; inputs are (data, input_starts, input_ends)
visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
norm_inputs =
normalize_starts_ends_axes(input_shape,
input_starts.template to_vector<int64_t>(),
input_ends.template to_vector<int64_t>(),
this->axes);
});
}
else if(set_attributes == ends_only)
{
// attr ends set; inputs are (data, input_starts, input_axes)
visit_all(args[1], args[2])([&](auto input_starts, auto input_axes) {
norm_inputs =
normalize_starts_ends_axes(input_shape,
input_starts.template to_vector<int64_t>(),
this->ends,
input_axes.template to_vector<int64_t>());
});
}
else if(set_attributes == starts_only)
{
// attr starts set; inputs are (data, input_ends, input_axes)
visit_all(args[1], args[2])([&](auto input_ends, auto input_axes) {
norm_inputs =
normalize_starts_ends_axes(input_shape,
this->starts,
input_ends.template to_vector<int64_t>(),
input_axes.template to_vector<int64_t>());
});
}
else
{
// no attr set, all inputs
visit_all(args[1], args[2], args[3])(
[&](auto input_starts, auto input_ends, auto input_axes) {
norm_inputs =
normalize_starts_ends_axes(input_shape,
input_starts.template to_vector<int64_t>(),
input_ends.template to_vector<int64_t>(),
input_axes.template to_vector<int64_t>());
});
}
auto offset = compute_offset(
input_shape, norm_inputs.at("norm_starts"), norm_inputs.at("norm_axes"));
shape calc_shape = shape{input_shape.type(),
lens_calc(input_shape.lens(),
norm_inputs.at("norm_starts"),
norm_inputs.at("norm_ends"),
norm_inputs.at("norm_axes")),
input_shape.strides()};
return {calc_shape, [=] { return input.data() + offset; }}; return {calc_shape, [=] { return input.data() + offset; }};
} }
default: {
// Should never get here; covering in case some code change occurs
MIGRAPHX_THROW("SLICE: invalid number of inputs");
}
}
} }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; } std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp> #include <migraphx/dyn_output.hpp>
#include <migraphx/par.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -84,10 +85,10 @@ struct unary : op_name<Derived> ...@@ -84,10 +85,10 @@ struct unary : op_name<Derived>
argument result{dyn_out.computed_shape}; argument result{dyn_out.computed_shape};
result.visit([&](auto output) { result.visit([&](auto output) {
args[0].visit([&](auto input) { args[0].visit([&](auto input) {
std::transform(input.begin(), par_transform(input.begin(),
input.end(), input.end(),
output.begin(), output.begin(),
static_cast<const Derived&>(*this).apply()); static_cast<const Derived&>(*this).apply());
}); });
}); });
return result; return result;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment