Unverified Commit 056acb80 authored by Brian Pickrell's avatar Brian Pickrell Committed by GitHub
Browse files

Multinomial parse (#2003)

parent 947cbec7
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,11 +21,52 @@ ...@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <random> #include <random>
...@@ -47,22 +88,35 @@ struct multinomial ...@@ -47,22 +88,35 @@ struct multinomial
std::string name() const { return "multinomial"; } std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(2).only_dims(2); check_shapes{inputs, *this, true}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype)) if(inputs.back().ndim() < 1)
MIGRAPHX_THROW( MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."); if(dtype == shape::bool_type)
MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
return {dtype, {inputs.front().lens().front(), sample_size}}; // Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
{
if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
{
size_t batch = {inputs.front().max_lens().front()};
size_t sample_size{inputs.back().max_lens().back()};
return {dtype, {batch, sample_size}};
}
}
return {dtype,
{inputs.front().to_dynamic().dyn_dims().front(),
inputs.back().to_dynamic().dyn_dims().back()}};
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
size_t batch_size = output_shape.lens().front(); size_t batch_size = dyn_out.computed_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back(); size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back(); size_t sample_size = dyn_out.computed_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) { visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) { result.visit([&](auto output) {
...@@ -70,13 +124,16 @@ struct multinomial ...@@ -70,13 +124,16 @@ struct multinomial
auto idx = args[1].get_shape().multi(i); auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size); auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size; auto cdf_end = cdf_begin + class_size;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto sample_iter = auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
// convert iterator to an integer index
output[i] = std::distance(cdf_begin, sample_iter); output[i] = std::distance(cdf_begin, sample_iter);
}); });
}); });
}); });
return result; return result;
} }
}; };
......
...@@ -22,6 +22,12 @@ ...@@ -22,6 +22,12 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
......
...@@ -65,11 +65,10 @@ struct random_uniform ...@@ -65,11 +65,10 @@ struct random_uniform
return inputs.at(1); return inputs.at(1);
} }
argument compute(const shape&, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
// Output goes into the passed buffer, not the shape output. // Output goes into the passed buffer, not the shape output.
auto result = args[1]; argument result{dyn_out.computed_shape};
uint64_t local_seed = args[0].at<uint64_t>(0); uint64_t local_seed = args[0].at<uint64_t>(0);
std::mt19937 gen(local_seed); std::mt19937 gen(local_seed);
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
if(args.empty())
MIGRAPHX_THROW("PARSE_MULTINOMIAL: no arguments given");
int dtype = 6; int dtype = 6;
if(contains(info.attributes, "dtype")) if(contains(info.attributes, "dtype"))
dtype = info.attributes.at("dtype").i(); dtype = info.attributes.at("dtype").i();
...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
size_t sample_size = 1; size_t sample_size = 1;
if(contains(info.attributes, "sample_size")) if(contains(info.attributes, "sample_size"))
sample_size = info.attributes.at("sample_size").i(); sample_size = info.attributes.at("sample_size").i();
else
MIGRAPHX_THROW("PARSE_MULTINOMIAL: sample_size not given");
// Use logarithmic math to scale probabilities while avoiding division by very
// small numbers. Scaling by the maximum makes very tiny ranges more
// tractable; any constant factor gives equivalent distr. since the Multinomial op.
// normalizes at runtime.
// Subtract the per-batch maximum log-probability, making the per-batch max 0 // Subtract the per-batch maximum log-probability, making the per-batch max 0
auto maxes = auto maxes =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]); info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
auto mb_maxes = info.add_instruction( auto cdf = info.add_common_op("sub", args[0], maxes);
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
maxes);
auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
// Take the element-wise exponent to get probabilities in the range (0, 1] // Take the element-wise exponent to get probabilities in the range (0, 1]
cdf = info.add_instruction(migraphx::make_op("exp"), cdf); cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
// Compute the cumulative density function // Compute the cumulative distribution function
cdf = info.add_instruction( cdf = info.add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
// Pre-compute random distribution instruction_ref seed_input;
std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed")) if(contains(info.attributes, "seed"))
gen.seed(info.attributes.at("seed").f()); {
float seed = info.attributes.at("seed").f();
migraphx::shape s{migraphx::shape::float_type, {1}};
std::vector<float> data = {seed};
seed_input = info.add_literal(migraphx::literal(s, data));
}
else
{
seed_input = info.add_instruction(migraphx::make_op("random_seed"));
}
instruction_ref randoms;
shape s0 = args[0]->get_shape();
if(s0.dynamic())
{
// Dynamic batch_size will be taken from args[0]. The input argument to this should
// have a second dimension of sample_size.
std::vector<shape::dynamic_dimension> dyn_dim_set;
dyn_dim_set.emplace_back(s0.dyn_dims().front());
dyn_dim_set.emplace_back(shape::dynamic_dimension{sample_size, sample_size});
// read the input dimensions
auto dim_of =
info.add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), args[0]);
// The next two operations insert the value sample_size into the second array position
// make an argument of (1, 0)
shape s(shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = info.add_literal(s, data1);
auto batch_arg = info.add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = info.add_literal(s, data2);
auto alloc_shape = info.add_instruction(migraphx::make_op("add"), batch_arg, l2);
// alloc_shape should contain the input-based shape dimensions as its values at runtime,
// and its own shape is {2}
// compile_shape is the shape used when compiling the Allocate op, and may be dynamic
migraphx::shape compile_shape =
migraphx::shape(s0.type(), {s0.dyn_dims().front(), {sample_size, sample_size}});
std::uniform_real_distribution<> dis(0.0, 1.0); // Allocate on-device storage for the random values
size_t batch_size = args[0]->get_shape().lens().front(); auto alloc = info.add_instruction(
migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}}; migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
randoms = info.add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
}
else
{
// use literal. The array populated by random_uniform may have any shape, as long its
// number of elements is batch_size * sample_size .
size_t batch_size = s0.lens().front();
auto rand_dummy = info.add_literal(
migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
std::vector<float> random_dist(batch_size * sample_size); randoms =
std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); }); info.add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist}); }
return info.add_instruction( return info.add_instruction(
migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit); migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, randoms);
} }
}; };
......
...@@ -4883,9 +4883,9 @@ def mod_test_fmod_different_dtypes(): ...@@ -4883,9 +4883,9 @@ def mod_test_fmod_different_dtypes():
@onnx_test() @onnx_test()
def multinomial_test(): def multinomial_test():
sample_size = 10 sample_size = 13
seed = 0.0 seed = 0.
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10]) input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32, output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10]) [1, 10])
...@@ -4898,6 +4898,44 @@ def multinomial_test(): ...@@ -4898,6 +4898,44 @@ def multinomial_test():
return ([node], [input], [output]) return ([node], [input], [output])
@onnx_test()
def multinomial_dyn_test():
sample_size = 100000
seed = 1.3
categories = 5
input = helper.make_tensor_value_info("input", TensorProto.FLOAT,
[None, categories])
output = helper.make_tensor_value_info("output", TensorProto.FLOAT,
[None, categories])
node = onnx.helper.make_node(
'Multinomial',
inputs=['input'],
sample_size=sample_size,
dtype=1, # shape::float_type
seed=seed,
outputs=['output'])
return ([node], [input], [output])
@onnx_test()
def multinomial_autoseed_dyn_test():
# If seed attribute is not given, device should auto generate one at runtime
sample_size = 12
input = helper.make_tensor_value_info("input", TensorProto.FLOAT,
[None, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[None, 10])
node = onnx.helper.make_node('Multinomial',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test() @onnx_test()
def multinomial_generated_seed_test(): def multinomial_generated_seed_test():
sample_size = 10 sample_size = 10
......
...@@ -4679,32 +4679,140 @@ TEST_CASE(multinomial_test) ...@@ -4679,32 +4679,140 @@ TEST_CASE(multinomial_test)
{ {
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
size_t sample_size = 10; size_t sample_size = 13;
float seed = 0.0f; size_t batch_size = 3;
size_t categories = 10;
float seed = 0;
auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); auto input = mm->add_parameter(
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); "input", migraphx::shape{migraphx::shape::float_type, {batch_size, categories}});
auto mb_maxes = auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); auto mb_maxes = mm->add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", {batch_size, 10}}}), maxes);
auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes); auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes);
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction( cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
std::mt19937 gen(seed); migraphx::shape s{migraphx::shape::float_type, {1}};
std::uniform_real_distribution<> dis(0.0, 1.0); std::vector<float> seed_data = {seed};
std::vector<float> rand_samples(sample_size); auto seed_input = mm->add_literal(migraphx::literal(s, seed_data));
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); }); auto rand_dummy =
migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}}; mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples});
mm->add_instruction(migraphx::make_op("multinomial"), cdf, rs_lit);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms);
auto prog = optimize_onnx("multinomial_test.onnx"); auto prog = optimize_onnx("multinomial_test.onnx");
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(multinomial_dyn_test)
{
// compile-time random seed
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 100000;
size_t categories = 5;
float seed = 1.3f;
auto input = mm->add_parameter(
"input",
migraphx::shape{migraphx::shape::float_type, {{1, categories}, {categories, categories}}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
migraphx::shape s{migraphx::shape::float_type, {1}};
std::vector<float> seed_data = {seed};
auto seed_input = mm->add_literal(migraphx::literal(s, seed_data));
// dynamic input only: must calculate alloc_shape as (batch_size, sample_size)
// read the runtime input dimensions
auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input);
// make an argument of (1, 0)
migraphx::shape lit_shape(migraphx::shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = mm->add_literal(lit_shape, data1);
auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = mm->add_literal(lit_shape, data2);
auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2);
migraphx::shape compile_shape =
migraphx::shape(migraphx::shape::float_type,
{input->get_shape().dyn_dims().front(), {sample_size, sample_size}});
auto alloc = mm->add_instruction(
migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
auto ret = mm->add_instruction(
migraphx::make_op("multinomial", {{"dtype", migraphx::shape::float_type}}), cdf, randoms);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, categories};
options.print_program_on_error = true;
auto prog = migraphx::parse_onnx("multinomial_dyn_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(multinomial_autoseed_dyn_test)
{
// runtime random seed
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 12;
size_t categories = 10;
auto input = mm->add_parameter(
"input", migraphx::shape{migraphx::shape::float_type, {{1, 10}, {10, 10}}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
auto seed_input = mm->add_instruction(migraphx::make_op("random_seed"));
// dynamic input only: must calculate alloc_shape as (batch_size, sample_size)
// read the runtime input dimensions
auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input);
// make an argument of (1, 0)
migraphx::shape lit_shape(migraphx::shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = mm->add_literal(lit_shape, data1);
auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = mm->add_literal(lit_shape, data2);
auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2);
migraphx::shape compile_shape =
migraphx::shape(migraphx::shape::float_type,
{input->get_shape().dyn_dims().front(), {sample_size, sample_size}});
auto alloc = mm->add_instruction(
migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
auto ret = mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, categories};
options.print_program_on_error = true;
auto prog = migraphx::parse_onnx("multinomial_autoseed_dyn_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(multinomial_dtype_error_test) TEST_CASE(multinomial_dtype_error_test)
{ {
EXPECT(test::throws([&] { migraphx::parse_onnx("multinomial_dtype_error_test.onnx"); })); EXPECT(test::throws([&] { migraphx::parse_onnx("multinomial_dtype_error_test.onnx"); }));
...@@ -4712,10 +4820,11 @@ TEST_CASE(multinomial_dtype_error_test) ...@@ -4712,10 +4820,11 @@ TEST_CASE(multinomial_dtype_error_test)
TEST_CASE(multinomial_generated_seed_test) TEST_CASE(multinomial_generated_seed_test)
{ {
// multinomial op. no longer generates its own randoms
auto p1 = optimize_onnx("multinomial_generated_seed_test.onnx"); auto p1 = optimize_onnx("multinomial_generated_seed_test.onnx");
auto p2 = optimize_onnx("multinomial_generated_seed_test.onnx"); auto p2 = optimize_onnx("multinomial_generated_seed_test.onnx");
EXPECT(p1 != p2); EXPECT(p1 == p2);
} }
TEST_CASE(multinomial_int64_test) TEST_CASE(multinomial_int64_test)
...@@ -4723,27 +4832,27 @@ TEST_CASE(multinomial_int64_test) ...@@ -4723,27 +4832,27 @@ TEST_CASE(multinomial_int64_test)
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
size_t sample_size = 10; size_t sample_size = 10;
float seed = 1.0f; float seed = 1.0;
uint32_t batch_size = 1;
migraphx::shape::type_t dtype = migraphx::shape::type_t::int64_type; migraphx::shape::type_t dtype = migraphx::shape::type_t::int64_type;
auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto mb_maxes =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes);
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction( cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
std::mt19937 gen(seed); migraphx::shape s{migraphx::shape::float_type, {1}};
std::uniform_real_distribution<> dis(0.0, 1.0); std::vector<float> data = {seed};
std::vector<float> rand_samples(sample_size); auto seed_input = mm->add_literal(migraphx::literal(s, data));
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); });
migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}};
auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples});
mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, rs_lit);
// static size
auto rand_dummy =
mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, randoms);
auto prog = optimize_onnx("multinomial_int64_test.onnx"); auto prog = optimize_onnx("multinomial_int64_test.onnx");
EXPECT(p == prog); EXPECT(p == prog);
......
...@@ -1434,6 +1434,77 @@ TEST_CASE(mod_test_fmod_different_types) ...@@ -1434,6 +1434,77 @@ TEST_CASE(mod_test_fmod_different_types)
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
} }
TEST_CASE(multinomial_dyn_test)
{
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, 4};
auto p = migraphx::parse_onnx("multinomial_dyn_test.onnx", options);
const size_t batch_size(2);
const size_t categories(5);
const size_t sample_size(100000);
p.compile(migraphx::make_target("ref"));
// Distribution function (2 distributions of 5 categories each)
std::vector<int> dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25};
EXPECT(dist.size() == categories * batch_size);
std::vector<float> data(categories * batch_size);
std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return log(d); });
// Shape of the probability distribution, which also defines the number of categories
migraphx::shape s{migraphx::shape::float_type, {batch_size, categories}};
migraphx::parameter_map pp;
pp["input"] = migraphx::argument(s, data.data());
auto result = p.eval(pp).back();
std::vector<int32_t> result_vec(batch_size * sample_size);
result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); });
// Make a categorical histogram of output
// for first result in batch
std::vector<int> res_dist(categories, 0);
size_t r = 0;
for(r = 0; r < result_vec.size() / 2; r++)
res_dist[result_vec[r]]++;
// normalizing factors for original and measured distributions
auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0);
auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0);
// Values approximate the distribution in dist
std::vector<float> norm(5);
std::vector<float> res_norm(5);
std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance(
norm, migraphx::verify::expected{res_norm}, migraphx::verify::tolerance{0.01}));
// Make a categorical histogram of output
// for second result in batch
std::fill(res_dist.begin(), res_dist.end(), 0);
for(; r < result_vec.size(); r++)
res_dist[result_vec[r]]++;
dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0);
res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0);
std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
}
TEST_CASE(nonzero_test) TEST_CASE(nonzero_test)
{ {
migraphx::program p = migraphx::parse_onnx("nonzero_dynamic_test.onnx"); migraphx::program p = migraphx::parse_onnx("nonzero_dynamic_test.onnx");
......
...@@ -1957,12 +1957,42 @@ TEST_CASE(multibroadcast_3in_dyn_dyn) ...@@ -1957,12 +1957,42 @@ TEST_CASE(multibroadcast_3in_dyn_dyn)
expect_shape(expected_shape, migraphx::make_op("multibroadcast"), c_shape, a_shape, b_shape); expect_shape(expected_shape, migraphx::make_op("multibroadcast"), c_shape, a_shape, b_shape);
} }
TEST_CASE(multinomial) TEST_CASE(multinomial_bool_type)
{ {
migraphx::shape s{migraphx::shape::float_type, {2, 5}}; migraphx::shape s1{migraphx::shape::float_type, {1, 2}};
migraphx::shape s2{migraphx::shape::float_type, {3, 4}};
int dtype = 0; int dtype = 0;
throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s, s); throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2);
}
TEST_CASE(multinomial)
{
migraphx::shape s1{migraphx::shape::float_type, {1, 2}};
migraphx::shape s2{migraphx::shape::float_type, {3, 4}};
migraphx::shape s3{migraphx::shape::float_type, {1, 4}};
int dtype = 2;
expect_shape(s3, migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2);
}
TEST_CASE(multinomial_0size_input)
{
migraphx::shape s1{migraphx::shape::float_type, {1, 2}};
migraphx::shape s2{migraphx::shape::float_type, {}};
int dtype = 2;
throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2);
}
TEST_CASE(multinomial_dyn)
{
migraphx::shape s1{migraphx::shape::int32_type, {{2, 3}, {5, 6}}};
migraphx::shape s2{migraphx::shape::int32_type, {{7, 8}, {9, 10}}};
migraphx::shape s3{migraphx::shape::int32_type, {{2, 3}, {9, 10}}};
expect_shape(
s3, migraphx::make_op("multinomial", {{"dtype", migraphx::shape::int32_type}}), s1, s2);
} }
TEST_CASE(nms_shape) TEST_CASE(nms_shape)
......
...@@ -24,9 +24,10 @@ ...@@ -24,9 +24,10 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/literal.hpp> #include <migraphx/literal.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/program.hpp> #include <migraphx/onnx.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/verify.hpp> #include <migraphx/verify.hpp>
#include <numeric>
#include <random> #include <random>
#include <test.hpp> #include <test.hpp>
...@@ -48,27 +49,37 @@ TEST_CASE(multinomial_test) ...@@ -48,27 +49,37 @@ TEST_CASE(multinomial_test)
migraphx::shape s{migraphx::shape::float_type, {1, 5}}; migraphx::shape s{migraphx::shape::float_type, {1, 5}};
std::vector<int> dist{15, 25, 15, 25, 20}; std::vector<int> dist{15, 25, 15, 25, 20};
std::vector<float> data(5); std::vector<float> data(5);
std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return std::log(d); }); std::vector<float> sum(5);
auto input = mm->add_literal(migraphx::literal(s, data)); // convert to float
std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return d; });
// take cumulative sum
std::partial_sum(data.begin(), data.end(), sum.begin(), std::plus<float>());
// scale probabilities arbitrarily
float odd_scale = 10000.;
std::transform(sum.begin(), sum.end(), data.begin(), [&](auto d) { return d * odd_scale; });
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); auto input = mm->add_literal(migraphx::literal(s, data));
auto mb_maxes =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 5}}}), maxes);
auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes);
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
mm->add_instruction(migraphx::make_op("multinomial"), cdf, rs_lit); mm->add_instruction(migraphx::make_op("multinomial"), input, rs_lit);
p.compile(migraphx::make_target("ref")); p.compile(migraphx::make_target("ref"));
auto result = p.eval({}).back(); auto result = p.eval({}).back();
// result_vec contains an index, or category label, for each random input value
std::vector<int32_t> result_vec(sample_size); std::vector<int32_t> result_vec(sample_size);
result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); }); result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); });
// res_dist is a count, or histogram, of the number of samples in each category. This is the
// sampled distribution.
std::vector<int> res_dist(5, 0); std::vector<int> res_dist(5, 0);
for(const auto& r : result_vec) for(const auto& r : result_vec)
res_dist[r]++; res_dist[r]++;
auto dist_sum = std::accumulate(dist.begin(), dist.end(), 0);
// To check the result, normalize the original probability distribution dist
// and the sampling result res_dist; they should be close
// Total the unnormalized probabilities
auto dist_sum = std::accumulate(dist.begin(), dist.end(), 0);
// Total the number of values returned
auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0);
std::vector<float> norm(5); std::vector<float> norm(5);
std::vector<float> res_norm(5); std::vector<float> res_norm(5);
...@@ -78,6 +89,204 @@ TEST_CASE(multinomial_test) ...@@ -78,6 +89,204 @@ TEST_CASE(multinomial_test)
std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum; return static_cast<double>(n) / res_dist_sum;
}); });
EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
}
TEST_CASE(multinomial_dyn_test)
{
// Invokes random_uniform and multinomial ops together, to verify the interface
// Dynamic Batch dimension input of 2 means there are 2 different probability
// distribution functions contained in Input_2
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 100000;
size_t batch_size = 2;
// Shape of the random data
migraphx::shape rs{migraphx::shape::float_type, {{1, 2}, {2, sample_size + 1}}};
auto input = mm->add_parameter("Input_1", rs);
// Runtime randomization seed
// To seed the random_uniform, we can provide a value by literal or input,
// or ask the system to auto-seed with random_seed op.
migraphx::shape seed_shape{migraphx::shape::uint32_type,
{migraphx::shape::dynamic_dimension{0, 1}}};
auto seed_input = mm->add_parameter("Seed", seed_shape);
// Shape of the probability distribution, which also defines the number of categories
migraphx::shape s{migraphx::shape::float_type, {{2, 2}, {5, 6}}};
// Unnormalized distributions for batch size 2:
// 15, 25, 15, 15, 20
// 20, 20, 10, 25, 25
std::vector<int> dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25};
// Hard-coded non-normalized, accumulated distribution follows:
std::vector<float> data{.15f, .40f, .55f, .80f, 1.0f, 20.f, 40.f, 50.f, 75.f, 100.f};
auto input2 = mm->add_parameter("Input_2", s);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, input);
mm->add_instruction(migraphx::make_op("multinomial"), input2, randoms);
p.compile(migraphx::make_target("ref"));
// Create a dummy input in the shape we want for the random data
std::vector<float> dummy(sample_size, 0);
migraphx::shape input_fixed_shape1{migraphx::shape::float_type, {batch_size, sample_size}};
migraphx::shape input_fixed_shape2{migraphx::shape::float_type, {batch_size, 5}};
migraphx::parameter_map params0;
params0["Input_1"] = migraphx::argument(input_fixed_shape1, dummy.data());
migraphx::shape seed_fixed_shape{migraphx::shape::uint32_type, {1}};
std::vector<uint32_t> seed_data = {4};
params0["Seed"] = migraphx::argument(seed_fixed_shape, seed_data.data());
params0["Input_2"] = migraphx::argument(input_fixed_shape2, data.data());
auto result = p.eval(params0).back();
std::vector<float> result_vec(input_fixed_shape2.elements());
result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); });
// Make a categorical histogram of output
std::vector<int> res_dist(5, 0);
size_t r = 0;
for(r = 0; r < result_vec.size() / 2; r++)
res_dist[result_vec[r]]++;
// histogram for second set of batch
std::vector<int> res_dist2(5, 0);
for(; r < result_vec.size(); r++)
res_dist2[result_vec[r]]++;
// Rescale or normalize both the input probability distribution and the output
// histogram, and compare. Should be close but not identical.
auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0);
auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0);
std::vector<float> norm(5);
std::vector<float> res_norm(5);
std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
// Do the same rescaling for the 2nd in batch, which has a different probability distribution
dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0);
res_dist_sum = std::accumulate(res_dist2.begin(), res_dist2.end(), 0);
std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist2.begin(), res_dist2.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
}
TEST_CASE(multinomial_float_dyn_test)
{
// int data type for random_uniform op and float data type for multinomial.
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 100000;
size_t batch_size = 2;
// Shape of the random data
migraphx::shape rs{migraphx::shape::int32_type, {{1, 2}, {2, sample_size + 1}}};
auto input = mm->add_parameter("Input_1", rs);
// Runtime randomization seed
// To seed the random_uniform, we can provide a value by literal or input,
// or ask the system to auto-seed with random_seed op.
migraphx::shape seed_shape{migraphx::shape::uint32_type,
{migraphx::shape::dynamic_dimension{0, 1}}};
auto seed_input = mm->add_parameter("Seed", seed_shape);
// Shape of the probability distribution, which also defines the number of categories
migraphx::shape s{migraphx::shape::float_type, {{2, 2}, {5, 6}}};
// Unnormalized distributions for batch size 2:
// 15, 25, 15, 15, 20
// 20, 20, 10, 25, 25
std::vector<int> dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25};
// Hard-coded normalized, accumulated distribution follows:
std::vector<float> data{.15f, .40f, .55f, .80f, 1.0f, .20f, .40f, .50f, .75f, 1.0f};
auto input2 = mm->add_parameter("Input_2", s);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, input);
mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", migraphx::shape::float_type}}),
input2,
randoms);
p.compile(migraphx::make_target("ref"));
// Create a dummy input in the shape we want for the random data
std::vector<float> dummy(sample_size, 0);
migraphx::shape input_fixed_shape1{migraphx::shape::float_type, {batch_size, sample_size}};
migraphx::shape input_fixed_shape2{migraphx::shape::float_type, {batch_size, 5}};
migraphx::parameter_map params0;
params0["Input_1"] = migraphx::argument(input_fixed_shape1, dummy.data());
migraphx::shape seed_fixed_shape{migraphx::shape::uint32_type, {1}};
std::vector<uint32_t> seed_data = {4};
params0["Seed"] = migraphx::argument(seed_fixed_shape, seed_data.data());
params0["Input_2"] = migraphx::argument(input_fixed_shape2, data.data());
auto result = p.eval(params0).back();
std::vector<float> result_vec(input_fixed_shape2.elements());
result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); });
// Make a categorical histogram of output
std::vector<int> res_dist(5, 0);
size_t r = 0;
for(r = 0; r < result_vec.size() / 2; r++)
res_dist[result_vec[r]]++;
// histogram for second set of batch
std::vector<int> res_dist2(5, 0);
for(; r < result_vec.size(); r++)
res_dist2[result_vec[r]]++;
// Rescale or normalize both the input probability distribution and the output
// histogram, and compare. Should be close but not identical.
auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0);
auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0);
std::vector<float> norm(5);
std::vector<float> res_norm(5);
std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
// Do the same rescaling for the 2nd in batch, which has a different probability distribution
dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0);
res_dist_sum = std::accumulate(res_dist2.begin(), res_dist2.end(), 0);
std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) {
return static_cast<double>(n) / dist_sum;
});
std::transform(res_dist2.begin(), res_dist2.end(), res_norm.begin(), [&](auto n) {
return static_cast<double>(n) / res_dist_sum;
});
EXPECT(migraphx::verify::verify_range_with_tolerance( EXPECT(migraphx::verify::verify_range_with_tolerance(
res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01}));
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment