Unverified Commit 70d9faf7 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into mi200

parents a56c531c a60bdb67
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -37,20 +37,22 @@ namespace op {
* Static allocate:
* No inputs: `allocate()`
* `this.s` attribute set to the static output shape of the buffer.
* `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
* buffer size for that case
*
* Dynamic allocate:
* One input: `allocate(output_dims)`
* `output_dims` are the output buffer dimensions and has a static shape.
* Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute
* time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have
* dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the
* compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
* Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
* shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
* will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
* then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
*/
struct allocate
{
shape s{};
optional<shape> s;
// for dynamic allocate to set the buffer type
shape::type_t buf_type = shape::half_type;
optional<shape::type_t> buf_type;
template <class Self, class F>
static auto reflect(Self& self, F f)
......@@ -62,8 +64,12 @@ struct allocate
shape compute_shape(const std::vector<shape>& inputs) const
{
if(s != shape())
if(s.has_value())
{
if(buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
}
if(inputs.size() == 1)
{
migraphx::check_shapes{inputs, *this, false}.only_dims(1);
......@@ -72,16 +78,20 @@ struct allocate
{
migraphx::check_shapes{inputs, *this, false}.has(0);
}
return s;
return s.value();
}
else
{
if(not buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
}
migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
const auto& out_dims = inputs.at(0);
std::size_t max_val = std::numeric_limits<std::size_t>::max();
std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
shape::dynamic_dimension{0, max_val});
return {buf_type, dyn_dims};
return {buf_type.value(), dyn_dims};
}
}
argument compute(const shape& output_shape, const std::vector<argument>& args) const
......@@ -94,7 +104,11 @@ struct allocate
{
std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return argument{shape{buf_type, output_dims}};
if(s)
{
return argument{shape{s->type(), output_dims}};
}
return argument{shape{buf_type.value(), output_dims}};
}
}
};
......
......@@ -29,6 +29,7 @@
#include <migraphx/argument.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -95,11 +96,11 @@ struct binary : op_name<Derived>
{
argument result{dyn_out.computed_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
std::transform(input1.begin(),
input1.end(),
input2.begin(),
output.begin(),
static_cast<const Derived&>(*this).apply());
par_transform(input1.begin(),
input1.end(),
input2.begin(),
output.begin(),
static_cast<const Derived&>(*this).apply());
});
return result;
}
......
......@@ -72,8 +72,8 @@ struct dequantizelinear
visit_all(x, x_zero_point)([&](auto input, auto zero_pts) {
visit_all(result, x_scale)([&](auto output, auto scales) {
par_for(output_shape.elements(), [&](auto i) {
output[i] = static_cast<double>(static_cast<int64_t>(input[i]) -
static_cast<int64_t>(zero_pts[i])) *
output[i] = static_cast<double>(static_cast<double>(input[i]) -
static_cast<double>(zero_pts[i])) *
scales[i];
});
});
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -21,31 +21,32 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#ifndef MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#define MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace op {
struct context;
struct miopen_int8_conv_pack
struct isinf : unary<isinf>
{
std::string name() const { return "gpu::int8_conv_pack"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
auto apply() const
{
return [&](auto x) { return std::isinf(static_cast<double>(x)); };
}
std::string name() const { return "isinf"; }
shape compute_shape(std::vector<shape> inputs) const
{
return shapes.size() - 1;
return unary<isinf>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
}
};
} // namespace gpu
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp>
#include <random>
......@@ -47,22 +88,35 @@ struct multinomial
std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
check_shapes{inputs, *this, true}.has(2).only_dims(2);
if(not contains({shape::int32_type, shape::int64_type}, dtype))
MIGRAPHX_THROW(
"Multinomial: Invalid output type. Valid types are int32_type and int64_type.");
if(inputs.back().ndim() < 1)
MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
if(dtype == shape::bool_type)
MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
return {dtype, {inputs.front().lens().front(), sample_size}};
// Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
{
if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
{
size_t batch = {inputs.front().max_lens().front()};
size_t sample_size{inputs.back().max_lens().back()};
return {dtype, {batch, sample_size}};
}
}
return {dtype,
{inputs.front().to_dynamic().dyn_dims().front(),
inputs.back().to_dynamic().dyn_dims().back()}};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{
argument result{output_shape};
size_t batch_size = output_shape.lens().front();
argument result{dyn_out.computed_shape};
size_t batch_size = dyn_out.computed_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back();
size_t sample_size = dyn_out.computed_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) {
......@@ -70,13 +124,16 @@ struct multinomial
auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
// convert iterator to an integer index
output[i] = std::distance(cdf_begin, sample_iter);
});
});
});
return result;
}
};
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -21,25 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
#ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/unary.hpp>
#include <migraphx/config.hpp>
#include <fenv.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace ref {
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta);
} // namespace ref
namespace op {
struct nearbyint : unary<nearbyint>
{
auto apply() const
{
return [](auto x) {
auto rounding_mode = fegetround();
fesetround(FE_TONEAREST);
return std::nearbyint(x);
fesetround(rounding_mode);
};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -40,6 +40,8 @@ namespace op {
* 2. use_rank (default) vs use_len:
* `use_rank` sets the max value/index of the attribute as the rank of lens.
* `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
* Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
* (no normalization) if any of dynamic_dimension[axes] are not fixed.
* 3. `clip_min` vs. `not_clip_min` (default):
* Clip values less than the minimum to the minimum or not.
* 4. `include_min` vs. `exclude_min` (default):
......
......@@ -70,7 +70,8 @@ struct pooling
// 2 smaller than the input tensor rank (NCHW layout)
std::vector<std::size_t> lengths = {1, 1};
// Dilations are not supported at this time.
// Spacing between the elements of the pooling kernel. Must be the same ndim as lengths.
std::vector<std::size_t> dilations = {1, 1};
// ceiling mode is a flag affecting output size
// or equivalently, placements of the pooling kernel.
......@@ -99,6 +100,7 @@ struct pooling
f(self.padding_mode, "padding_mode"),
f(self.stride, "stride"),
f(self.lengths, "lengths"),
f(self.dilations, "dilations"),
f(self.ceil_mode, "ceil_mode"),
f(self.lp_order, "lp_order"),
f(self.dyn_global, "dyn_global"));
......@@ -112,14 +114,17 @@ struct pooling
return;
if((padding_mode != default_ and padding.size() != stride.size() and
(padding.size()) != stride.size() * 2) or
stride.size() != lengths.size())
stride.size() != lengths.size() or dilations.size() != lengths.size())
{
MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
}
if(std::any_of(lengths.begin(), lengths.end(), [&](auto i) { return (i == 0); }) or
std::any_of(stride.begin(), stride.end(), [&](auto i) { return (i == 0); }))
const auto is_zero = [](auto el) { return el == 0; };
if(std::any_of(lengths.begin(), lengths.end(), is_zero) or
std::any_of(stride.begin(), stride.end(), is_zero) or
std::any_of(dilations.begin(), dilations.end(), is_zero))
{
MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride");
MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride or dilations");
}
// TODO: update lowering to run the reference
......@@ -142,6 +147,11 @@ struct pooling
value attributes() const { return {{"normalize_padding", "padding"}}; }
inline std::size_t dilate_dim(std::size_t dim, std::size_t dilation) const
{
return 1 + dilation * (dim - 1);
}
std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens,
std::size_t kdims) const
{
......@@ -151,8 +161,9 @@ struct pooling
std::size_t padding_factor = 2 * padding[i];
if(padding.size() == 2 * kdims)
padding_factor = padding[i] + padding[i + kdims];
std::size_t dilated_length = dilate_dim(lengths[i], dilations[i]);
std::size_t dim_size;
if(input_lens[i + 2] + padding_factor < lengths[i])
if(input_lens[i + 2] + padding_factor < dilated_length)
{
if(padding_mode == default_)
MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
......@@ -162,7 +173,7 @@ struct pooling
}
else
{
dim_size = input_lens[i + 2] + padding_factor - lengths[i];
dim_size = input_lens[i + 2] + padding_factor - dilated_length;
}
std::size_t len =
(ceil_mode)
......@@ -331,6 +342,7 @@ struct pooling
int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
static_cast<int>(padding_vals[d_2]);
int end;
std::size_t dilated_kernel_dim = dilate_dim(kernel_dims[d_2], dilations[d_2]);
// NOLINT
if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
{
......@@ -340,15 +352,14 @@ struct pooling
// padding. Clip out-of-bounds indexes but not padding.
// Check if this kernel extends beyond the padding at end of dimension
end = std::min(start + kernel_dims[d_2],
end = std::min(start + dilated_kernel_dim,
in_lens[dim] + static_cast<int>(padding_vals[d_2]));
}
else
{
// In non-ceiling mode, when
// count_include_pad is false, or for max pooling, clip off padding.
end = std::min(start + kernel_dims[d_2], in_lens[dim]);
start = std::max(start, 0);
end = std::min(start + dilated_kernel_dim, in_lens[dim]);
}
win_start.push_back(start);
if(end < start)
......@@ -366,6 +377,16 @@ struct pooling
// for each element in the window...
shape_for_each(win_shape, [&](const auto& idx_w) {
// Skip elements that belong to the dilated area
for(size_t axis = 0; axis < idx_w.size(); ++axis)
{
if(idx_w[axis] % dilations[axis])
{
pool_size -= 1;
return;
}
}
// the coordinates of this element
auto idx = idx_o;
......@@ -390,7 +411,15 @@ struct pooling
// this is a padding element. Padding locations
// don't contribute to average or max pooling total but can play in
// lpnorm pooling.
output_val = op(output_val, 0);
if(mode == pooling_mode::lpnorm)
{
output_val = op(output_val, op.template init<Type>());
}
if(mode == pooling_mode::average)
{
// Ignore padding
pool_size -= 1;
}
}
});
output[i] = Type(op.final(output_val, pool_size));
......
......@@ -22,6 +22,12 @@
* THE SOFTWARE.
*/
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
......
......@@ -44,9 +44,11 @@ struct quant_dot
const shape& a = inputs.at(0);
const shape& b = inputs.at(1);
auto t = a.type();
if(t != shape::int8_type)
std::set<migraphx::shape::type_t> suppported_types = {shape::int8_type,
shape::fp8e4m3fnuz_type};
if(not contains(suppported_types, t))
{
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t");
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t and fp8e4m3fnuz_type");
}
if(not std::all_of(
......@@ -73,6 +75,10 @@ struct quant_dot
auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1];
if(t == shape::fp8e4m3fnuz_type)
{
return {shape::float_type, out_lens};
} // else int8 gemm
return {shape::int32_type, out_lens};
}
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment