Unverified Commit 70d9faf7 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into mi200

parents a56c531c a60bdb67
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -37,20 +37,22 @@ namespace op { ...@@ -37,20 +37,22 @@ namespace op {
* Static allocate: * Static allocate:
* No inputs: `allocate()` * No inputs: `allocate()`
* `this.s` attribute set to the static output shape of the buffer. * `this.s` attribute set to the static output shape of the buffer.
* `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
* buffer size for that case
* *
* Dynamic allocate: * Dynamic allocate:
* One input: `allocate(output_dims)` * One input: `allocate(output_dims)`
* `output_dims` are the output buffer dimensions and has a static shape. * `output_dims` are the output buffer dimensions and has a static shape.
* Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute * Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
* time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have * shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
* dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the * will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
* compute_shape() will output `this.s`; `this.s` should be a dynamic shape. * then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
*/ */
struct allocate struct allocate
{ {
shape s{}; optional<shape> s;
// for dynamic allocate to set the buffer type // for dynamic allocate to set the buffer type
shape::type_t buf_type = shape::half_type; optional<shape::type_t> buf_type;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
...@@ -62,8 +64,12 @@ struct allocate ...@@ -62,8 +64,12 @@ struct allocate
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
if(s != shape()) if(s.has_value())
{ {
if(buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
}
if(inputs.size() == 1) if(inputs.size() == 1)
{ {
migraphx::check_shapes{inputs, *this, false}.only_dims(1); migraphx::check_shapes{inputs, *this, false}.only_dims(1);
...@@ -72,16 +78,20 @@ struct allocate ...@@ -72,16 +78,20 @@ struct allocate
{ {
migraphx::check_shapes{inputs, *this, false}.has(0); migraphx::check_shapes{inputs, *this, false}.has(0);
} }
return s; return s.value();
} }
else else
{ {
if(not buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
}
migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1); migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
const auto& out_dims = inputs.at(0); const auto& out_dims = inputs.at(0);
std::size_t max_val = std::numeric_limits<std::size_t>::max(); std::size_t max_val = std::numeric_limits<std::size_t>::max();
std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0), std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
shape::dynamic_dimension{0, max_val}); shape::dynamic_dimension{0, max_val});
return {buf_type, dyn_dims}; return {buf_type.value(), dyn_dims};
} }
} }
argument compute(const shape& output_shape, const std::vector<argument>& args) const argument compute(const shape& output_shape, const std::vector<argument>& args) const
...@@ -94,7 +104,11 @@ struct allocate ...@@ -94,7 +104,11 @@ struct allocate
{ {
std::vector<std::size_t> output_dims(output_shape.ndim()); std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); }); args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return argument{shape{buf_type, output_dims}}; if(s)
{
return argument{shape{s->type(), output_dims}};
}
return argument{shape{buf_type.value(), output_dims}};
} }
} }
}; };
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp> #include <migraphx/dyn_output.hpp>
#include <migraphx/par.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -95,11 +96,11 @@ struct binary : op_name<Derived> ...@@ -95,11 +96,11 @@ struct binary : op_name<Derived>
{ {
argument result{dyn_out.computed_shape}; argument result{dyn_out.computed_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) { visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
std::transform(input1.begin(), par_transform(input1.begin(),
input1.end(), input1.end(),
input2.begin(), input2.begin(),
output.begin(), output.begin(),
static_cast<const Derived&>(*this).apply()); static_cast<const Derived&>(*this).apply());
}); });
return result; return result;
} }
......
...@@ -72,8 +72,8 @@ struct dequantizelinear ...@@ -72,8 +72,8 @@ struct dequantizelinear
visit_all(x, x_zero_point)([&](auto input, auto zero_pts) { visit_all(x, x_zero_point)([&](auto input, auto zero_pts) {
visit_all(result, x_scale)([&](auto output, auto scales) { visit_all(result, x_scale)([&](auto output, auto scales) {
par_for(output_shape.elements(), [&](auto i) { par_for(output_shape.elements(), [&](auto i) {
output[i] = static_cast<double>(static_cast<int64_t>(input[i]) - output[i] = static_cast<double>(static_cast<double>(input[i]) -
static_cast<int64_t>(zero_pts[i])) * static_cast<double>(zero_pts[i])) *
scales[i]; scales[i];
}); });
}); });
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,31 +21,32 @@ ...@@ -21,31 +21,32 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #define MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#include <migraphx/argument.hpp> #include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <utility>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace op {
struct context; struct isinf : unary<isinf>
struct miopen_int8_conv_pack
{ {
std::string name() const { return "gpu::int8_conv_pack"; } auto apply() const
shape compute_shape(const std::vector<shape>& inputs) const; {
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const; return [&](auto x) { return std::isinf(static_cast<double>(x)); };
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const }
std::string name() const { return "isinf"; }
shape compute_shape(std::vector<shape> inputs) const
{ {
return shapes.size() - 1; return unary<isinf>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
} }
}; };
} // namespace gpu } // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,11 +21,52 @@ ...@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <random> #include <random>
...@@ -47,22 +88,35 @@ struct multinomial ...@@ -47,22 +88,35 @@ struct multinomial
std::string name() const { return "multinomial"; } std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(2).only_dims(2); check_shapes{inputs, *this, true}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype)) if(inputs.back().ndim() < 1)
MIGRAPHX_THROW( MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."); if(dtype == shape::bool_type)
MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
return {dtype, {inputs.front().lens().front(), sample_size}}; // Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
{
if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
{
size_t batch = {inputs.front().max_lens().front()};
size_t sample_size{inputs.back().max_lens().back()};
return {dtype, {batch, sample_size}};
}
}
return {dtype,
{inputs.front().to_dynamic().dyn_dims().front(),
inputs.back().to_dynamic().dyn_dims().back()}};
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
size_t batch_size = output_shape.lens().front(); size_t batch_size = dyn_out.computed_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back(); size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back(); size_t sample_size = dyn_out.computed_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) { visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) { result.visit([&](auto output) {
...@@ -70,13 +124,16 @@ struct multinomial ...@@ -70,13 +124,16 @@ struct multinomial
auto idx = args[1].get_shape().multi(i); auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size); auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size; auto cdf_end = cdf_begin + class_size;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto sample_iter = auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
// convert iterator to an integer index
output[i] = std::distance(cdf_begin, sample_iter); output[i] = std::distance(cdf_begin, sample_iter);
}); });
}); });
}); });
return result; return result;
} }
}; };
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,25 +21,29 @@ ...@@ -21,25 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP #define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#include <migraphx/argument.hpp> #include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <fenv.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace ref { namespace op {
struct nearbyint : unary<nearbyint>
void migemm( {
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta); auto apply() const
void migemm(const argument& c_arg, {
const argument& a_arg, return [](auto x) {
const argument& b_arg, auto rounding_mode = fegetround();
int32_t alpha, fesetround(FE_TONEAREST);
int32_t beta); return std::nearbyint(x);
fesetround(rounding_mode);
} // namespace ref };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -40,6 +40,8 @@ namespace op { ...@@ -40,6 +40,8 @@ namespace op {
* 2. use_rank (default) vs use_len: * 2. use_rank (default) vs use_len:
* `use_rank` sets the max value/index of the attribute as the rank of lens. * `use_rank` sets the max value/index of the attribute as the rank of lens.
* `use_lens` sets the max value/index as the corresponding value in lens at the axes index. * `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
* Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
* (no normalization) if any of dynamic_dimension[axes] are not fixed.
* 3. `clip_min` vs. `not_clip_min` (default): * 3. `clip_min` vs. `not_clip_min` (default):
* Clip values less than the minimum to the minimum or not. * Clip values less than the minimum to the minimum or not.
* 4. `include_min` vs. `exclude_min` (default): * 4. `include_min` vs. `exclude_min` (default):
......
...@@ -70,7 +70,8 @@ struct pooling ...@@ -70,7 +70,8 @@ struct pooling
// 2 smaller than the input tensor rank (NCHW layout) // 2 smaller than the input tensor rank (NCHW layout)
std::vector<std::size_t> lengths = {1, 1}; std::vector<std::size_t> lengths = {1, 1};
// Dilations are not supported at this time. // Spacing between the elements of the pooling kernel. Must be the same ndim as lengths.
std::vector<std::size_t> dilations = {1, 1};
// ceiling mode is a flag affecting output size // ceiling mode is a flag affecting output size
// or equivalently, placements of the pooling kernel. // or equivalently, placements of the pooling kernel.
...@@ -99,6 +100,7 @@ struct pooling ...@@ -99,6 +100,7 @@ struct pooling
f(self.padding_mode, "padding_mode"), f(self.padding_mode, "padding_mode"),
f(self.stride, "stride"), f(self.stride, "stride"),
f(self.lengths, "lengths"), f(self.lengths, "lengths"),
f(self.dilations, "dilations"),
f(self.ceil_mode, "ceil_mode"), f(self.ceil_mode, "ceil_mode"),
f(self.lp_order, "lp_order"), f(self.lp_order, "lp_order"),
f(self.dyn_global, "dyn_global")); f(self.dyn_global, "dyn_global"));
...@@ -112,14 +114,17 @@ struct pooling ...@@ -112,14 +114,17 @@ struct pooling
return; return;
if((padding_mode != default_ and padding.size() != stride.size() and if((padding_mode != default_ and padding.size() != stride.size() and
(padding.size()) != stride.size() * 2) or (padding.size()) != stride.size() * 2) or
stride.size() != lengths.size()) stride.size() != lengths.size() or dilations.size() != lengths.size())
{ {
MIGRAPHX_THROW("POOLING: inconsistent attribute sizes"); MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
} }
if(std::any_of(lengths.begin(), lengths.end(), [&](auto i) { return (i == 0); }) or
std::any_of(stride.begin(), stride.end(), [&](auto i) { return (i == 0); })) const auto is_zero = [](auto el) { return el == 0; };
if(std::any_of(lengths.begin(), lengths.end(), is_zero) or
std::any_of(stride.begin(), stride.end(), is_zero) or
std::any_of(dilations.begin(), dilations.end(), is_zero))
{ {
MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride"); MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride or dilations");
} }
// TODO: update lowering to run the reference // TODO: update lowering to run the reference
...@@ -142,6 +147,11 @@ struct pooling ...@@ -142,6 +147,11 @@ struct pooling
value attributes() const { return {{"normalize_padding", "padding"}}; } value attributes() const { return {{"normalize_padding", "padding"}}; }
inline std::size_t dilate_dim(std::size_t dim, std::size_t dilation) const
{
return 1 + dilation * (dim - 1);
}
std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens, std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens,
std::size_t kdims) const std::size_t kdims) const
{ {
...@@ -151,8 +161,9 @@ struct pooling ...@@ -151,8 +161,9 @@ struct pooling
std::size_t padding_factor = 2 * padding[i]; std::size_t padding_factor = 2 * padding[i];
if(padding.size() == 2 * kdims) if(padding.size() == 2 * kdims)
padding_factor = padding[i] + padding[i + kdims]; padding_factor = padding[i] + padding[i + kdims];
std::size_t dilated_length = dilate_dim(lengths[i], dilations[i]);
std::size_t dim_size; std::size_t dim_size;
if(input_lens[i + 2] + padding_factor < lengths[i]) if(input_lens[i + 2] + padding_factor < dilated_length)
{ {
if(padding_mode == default_) if(padding_mode == default_)
MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size"); MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
...@@ -162,7 +173,7 @@ struct pooling ...@@ -162,7 +173,7 @@ struct pooling
} }
else else
{ {
dim_size = input_lens[i + 2] + padding_factor - lengths[i]; dim_size = input_lens[i + 2] + padding_factor - dilated_length;
} }
std::size_t len = std::size_t len =
(ceil_mode) (ceil_mode)
...@@ -331,6 +342,7 @@ struct pooling ...@@ -331,6 +342,7 @@ struct pooling
int start = static_cast<int>(idx_o[dim] * stride[d_2]) - int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
static_cast<int>(padding_vals[d_2]); static_cast<int>(padding_vals[d_2]);
int end; int end;
std::size_t dilated_kernel_dim = dilate_dim(kernel_dims[d_2], dilations[d_2]);
// NOLINT // NOLINT
if(count_include_pad and ceil_mode and (mode != pooling_mode::max)) if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
{ {
...@@ -340,15 +352,14 @@ struct pooling ...@@ -340,15 +352,14 @@ struct pooling
// padding. Clip out-of-bounds indexes but not padding. // padding. Clip out-of-bounds indexes but not padding.
// Check if this kernel extends beyond the padding at end of dimension // Check if this kernel extends beyond the padding at end of dimension
end = std::min(start + kernel_dims[d_2], end = std::min(start + dilated_kernel_dim,
in_lens[dim] + static_cast<int>(padding_vals[d_2])); in_lens[dim] + static_cast<int>(padding_vals[d_2]));
} }
else else
{ {
// In non-ceiling mode, when // In non-ceiling mode, when
// count_include_pad is false, or for max pooling, clip off padding. // count_include_pad is false, or for max pooling, clip off padding.
end = std::min(start + kernel_dims[d_2], in_lens[dim]); end = std::min(start + dilated_kernel_dim, in_lens[dim]);
start = std::max(start, 0);
} }
win_start.push_back(start); win_start.push_back(start);
if(end < start) if(end < start)
...@@ -366,6 +377,16 @@ struct pooling ...@@ -366,6 +377,16 @@ struct pooling
// for each element in the window... // for each element in the window...
shape_for_each(win_shape, [&](const auto& idx_w) { shape_for_each(win_shape, [&](const auto& idx_w) {
// Skip elements that belong to the dilated area
for(size_t axis = 0; axis < idx_w.size(); ++axis)
{
if(idx_w[axis] % dilations[axis])
{
pool_size -= 1;
return;
}
}
// the coordinates of this element // the coordinates of this element
auto idx = idx_o; auto idx = idx_o;
...@@ -390,7 +411,15 @@ struct pooling ...@@ -390,7 +411,15 @@ struct pooling
// this is a padding element. Padding locations // this is a padding element. Padding locations
// don't contribute to average or max pooling total but can play in // don't contribute to average or max pooling total but can play in
// lpnorm pooling. // lpnorm pooling.
output_val = op(output_val, 0); if(mode == pooling_mode::lpnorm)
{
output_val = op(output_val, op.template init<Type>());
}
if(mode == pooling_mode::average)
{
// Ignore padding
pool_size -= 1;
}
} }
}); });
output[i] = Type(op.final(output_val, pool_size)); output[i] = Type(op.final(output_val, pool_size));
......
...@@ -22,6 +22,12 @@ ...@@ -22,6 +22,12 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
......
...@@ -44,9 +44,11 @@ struct quant_dot ...@@ -44,9 +44,11 @@ struct quant_dot
const shape& a = inputs.at(0); const shape& a = inputs.at(0);
const shape& b = inputs.at(1); const shape& b = inputs.at(1);
auto t = a.type(); auto t = a.type();
if(t != shape::int8_type) std::set<migraphx::shape::type_t> suppported_types = {shape::int8_type,
shape::fp8e4m3fnuz_type};
if(not contains(suppported_types, t))
{ {
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t"); MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t and fp8e4m3fnuz_type");
} }
if(not std::all_of( if(not std::all_of(
...@@ -73,6 +75,10 @@ struct quant_dot ...@@ -73,6 +75,10 @@ struct quant_dot
auto out_lens = a.lens(); auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1]; out_lens[dim_1] = b.lens()[dim_1];
if(t == shape::fp8e4m3fnuz_type)
{
return {shape::float_type, out_lens};
} // else int8 gemm
return {shape::int32_type, out_lens}; return {shape::int32_type, out_lens};
} }
}; };
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment