Commit f8761f3f authored by Umang Yadav's avatar Umang Yadav
Browse files

Merge remote-tracking branch 'origin/blas_tuning' into fp8_rocblas

parents 6fb61ded 1ea4a08a
...@@ -80,6 +80,10 @@ ADD rbuild.ini /rbuild.ini ...@@ -80,6 +80,10 @@ ADD rbuild.ini /rbuild.ini
# Temporarily install a new cmake until switching to ubuntu 22.04 # Temporarily install a new cmake until switching to ubuntu 22.04
RUN pip3 install cmake==3.22.1 RUN pip3 install cmake==3.22.1
# Location where onnx unit tests models are cached
ENV ONNX_HOME=/.onnx
RUN mkdir -p $ONNX_HOME/models && chmod 777 $ONNX_HOME/models
COPY ./tools/install_prereqs.sh / COPY ./tools/install_prereqs.sh /
RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh
RUN test -f /usr/local/hash || exit 1 RUN test -f /usr/local/hash || exit 1
...@@ -91,11 +95,6 @@ RUN pip3 install yapf==0.28.0 ...@@ -91,11 +95,6 @@ RUN pip3 install yapf==0.28.0
ADD docs/.sphinx/requirements.txt /doc-requirements.txt ADD docs/.sphinx/requirements.txt /doc-requirements.txt
RUN pip3 install -r /doc-requirements.txt RUN pip3 install -r /doc-requirements.txt
# Download real models to run onnx unit tests
ENV ONNX_HOME=/.onnx
COPY ./tools/download_models.sh /
RUN /download_models.sh && rm /download_models.sh
# Install latest ccache version # Install latest ccache version
RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -21,11 +21,52 @@ ...@@ -21,11 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* * Multinomial or categorical distribution. Performs a sampling of random input
* and returns a count of
* each category, or bucket. This does not require the standard multinomial
* distribution but instead takes a probability distribution, i.e. cumulative
* distribution function (CDF) as its first input.
*
* Inputs: args[0] - a tensor of probabilities for each category. Values are
* cumulative density function
* totals as provided by operation prefix_scan_sum. Values are
* cumulative probabilities (i.e. start with any set of numbers > 0
* and then apply prefix_scan_sum). Values do not need to be
* normalized to sum to 1; this is done in runtime computation.
*
* This input has Rank 2. Dimension 0 is batch #, so that there can be
* a different CDF for each iteration in the batch. The size of dimension
* 1 is the number of categories.
*
* args[1] - a tensor of random numbers. The last dimension is the sample
* size, i.e. the number of
* random samples in each iteration of the batch. Nominally
* has two dimensions where the first dimension is batch size, but
* any reshaping such that the total
* number of elements is (batch_size * sample_size) is legal.
*
* Values as created by a std::mt19937 like this:
*
* size_t sample_size = 100000;
* float seed = 0.0f;
* std::mt19937 gen(seed);
* std::uniform_real_distribution<> dis(0.0, 1.0);
* std::vector<float> rand_samples(sample_size);
* std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
* dis(gen); });
*
* Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input
2[last]).
*
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <random> #include <random>
...@@ -47,22 +88,35 @@ struct multinomial ...@@ -47,22 +88,35 @@ struct multinomial
std::string name() const { return "multinomial"; } std::string name() const { return "multinomial"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(2).only_dims(2); check_shapes{inputs, *this, true}.has(2).only_dims(2);
size_t sample_size = inputs.back().lens().back();
if(not contains({shape::int32_type, shape::int64_type}, dtype)) if(inputs.back().ndim() < 1)
MIGRAPHX_THROW( MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
"Multinomial: Invalid output type. Valid types are int32_type and int64_type."); if(dtype == shape::bool_type)
MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
return {dtype, {inputs.front().lens().front(), sample_size}}; // Output takes one dimension from each of the two input shapes. If they are both fixed,
// return a static shape
if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
{
if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
{
size_t batch = {inputs.front().max_lens().front()};
size_t sample_size{inputs.back().max_lens().back()};
return {dtype, {batch, sample_size}};
}
}
return {dtype,
{inputs.front().to_dynamic().dyn_dims().front(),
inputs.back().to_dynamic().dyn_dims().back()}};
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
size_t batch_size = output_shape.lens().front(); size_t batch_size = dyn_out.computed_shape.lens().front();
size_t class_size = args[0].get_shape().lens().back(); size_t class_size = args[0].get_shape().lens().back();
size_t sample_size = output_shape.lens().back(); size_t sample_size = dyn_out.computed_shape.lens().back();
visit_all(args[0], args[1])([&](auto cdf, auto dist) { visit_all(args[0], args[1])([&](auto cdf, auto dist) {
result.visit([&](auto output) { result.visit([&](auto output) {
...@@ -70,13 +124,16 @@ struct multinomial ...@@ -70,13 +124,16 @@ struct multinomial
auto idx = args[1].get_shape().multi(i); auto idx = args[1].get_shape().multi(i);
auto cdf_begin = cdf.begin() + (idx[0] * class_size); auto cdf_begin = cdf.begin() + (idx[0] * class_size);
auto cdf_end = cdf_begin + class_size; auto cdf_end = cdf_begin + class_size;
// std::upper_bound returns an iterator to the bucket the value belongs in,
// when normalized by the probability distribution dist
auto sample_iter = auto sample_iter =
std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
// convert iterator to an integer index
output[i] = std::distance(cdf_begin, sample_iter); output[i] = std::distance(cdf_begin, sample_iter);
}); });
}); });
}); });
return result; return result;
} }
}; };
......
...@@ -22,6 +22,12 @@ ...@@ -22,6 +22,12 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful
* in parallelizing various computations. Given a list of numbers, a prefix scan
* op returns an equal size list of running totals of the values. Other operations
* besides addition can be supported by child ops.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
......
...@@ -65,11 +65,10 @@ struct random_uniform ...@@ -65,11 +65,10 @@ struct random_uniform
return inputs.at(1); return inputs.at(1);
} }
argument compute(const shape&, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
// Output goes into the passed buffer, not the shape output. // Output goes into the passed buffer, not the shape output.
auto result = args[1]; argument result{dyn_out.computed_shape};
uint64_t local_seed = args[0].at<uint64_t>(0); uint64_t local_seed = args[0].at<uint64_t>(0);
std::mt19937 gen(local_seed); std::mt19937 gen(local_seed);
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
if(args.empty())
MIGRAPHX_THROW("PARSE_MULTINOMIAL: no arguments given");
int dtype = 6; int dtype = 6;
if(contains(info.attributes, "dtype")) if(contains(info.attributes, "dtype"))
dtype = info.attributes.at("dtype").i(); dtype = info.attributes.at("dtype").i();
...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
size_t sample_size = 1; size_t sample_size = 1;
if(contains(info.attributes, "sample_size")) if(contains(info.attributes, "sample_size"))
sample_size = info.attributes.at("sample_size").i(); sample_size = info.attributes.at("sample_size").i();
else
MIGRAPHX_THROW("PARSE_MULTINOMIAL: sample_size not given");
// Use logarithmic math to scale probabilities while avoiding division by very
// small numbers. Scaling by the maximum makes very tiny ranges more
// tractable; any constant factor gives equivalent distr. since the Multinomial op.
// normalizes at runtime.
// Subtract the per-batch maximum log-probability, making the per-batch max 0 // Subtract the per-batch maximum log-probability, making the per-batch max 0
auto maxes = auto maxes =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]); info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
auto mb_maxes = info.add_instruction( auto cdf = info.add_common_op("sub", args[0], maxes);
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
maxes);
auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
// Take the element-wise exponent to get probabilities in the range (0, 1] // Take the element-wise exponent to get probabilities in the range (0, 1]
cdf = info.add_instruction(migraphx::make_op("exp"), cdf); cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
// Compute the cumulative density function // Compute the cumulative distribution function
cdf = info.add_instruction( cdf = info.add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
// Pre-compute random distribution instruction_ref seed_input;
std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed")) if(contains(info.attributes, "seed"))
gen.seed(info.attributes.at("seed").f()); {
float seed = info.attributes.at("seed").f();
migraphx::shape s{migraphx::shape::float_type, {1}};
std::vector<float> data = {seed};
seed_input = info.add_literal(migraphx::literal(s, data));
}
else
{
seed_input = info.add_instruction(migraphx::make_op("random_seed"));
}
instruction_ref randoms;
shape s0 = args[0]->get_shape();
if(s0.dynamic())
{
// Dynamic batch_size will be taken from args[0]. The input argument to this should
// have a second dimension of sample_size.
std::vector<shape::dynamic_dimension> dyn_dim_set;
dyn_dim_set.emplace_back(s0.dyn_dims().front());
dyn_dim_set.emplace_back(shape::dynamic_dimension{sample_size, sample_size});
// read the input dimensions
auto dim_of =
info.add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), args[0]);
// The next two operations insert the value sample_size into the second array position
// make an argument of (1, 0)
shape s(shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = info.add_literal(s, data1);
auto batch_arg = info.add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = info.add_literal(s, data2);
auto alloc_shape = info.add_instruction(migraphx::make_op("add"), batch_arg, l2);
// alloc_shape should contain the input-based shape dimensions as its values at runtime,
// and its own shape is {2}
// compile_shape is the shape used when compiling the Allocate op, and may be dynamic
migraphx::shape compile_shape =
migraphx::shape(s0.type(), {s0.dyn_dims().front(), {sample_size, sample_size}});
std::uniform_real_distribution<> dis(0.0, 1.0); // Allocate on-device storage for the random values
size_t batch_size = args[0]->get_shape().lens().front(); auto alloc = info.add_instruction(
migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}}; migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
randoms = info.add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
}
else
{
// use literal. The array populated by random_uniform may have any shape, as long its
// number of elements is batch_size * sample_size .
size_t batch_size = s0.lens().front();
auto rand_dummy = info.add_literal(
migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
std::vector<float> random_dist(batch_size * sample_size); randoms =
std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); }); info.add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist}); }
return info.add_instruction( return info.add_instruction(
migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit); migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, randoms);
} }
}; };
......
...@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split> ...@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided // no split attribute, input is equally divided
else else
{ {
if((lens[tuned_axis] % info.num_outputs) != 0) std::size_t num_outputs = info.num_outputs;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if(contains(info.attributes, "num_outputs"))
{ {
MIGRAPHX_THROW("PARSE_SPLIT: input cannot be equally divided into " + num_outputs =
std::to_string(info.num_outputs) + " splits!"); parser.parse_value(info.attributes.at("num_outputs")).at<std::size_t>();
if(num_outputs != info.num_outputs)
{
MIGRAPHX_THROW("PARSE_SPLIT: num_outputs attribute " +
std::to_string(num_outputs) +
" doesn't match actual number of outputs " +
std::to_string(info.num_outputs) + "!");
}
}
if(lens[tuned_axis] % num_outputs == 0)
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs;
vec_splits.resize(num_outputs, chunk_size);
}
else
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs + 1;
std::size_t last_chunk_size = lens[tuned_axis] - chunk_size * (num_outputs - 1);
vec_splits.resize(num_outputs - 1, chunk_size);
vec_splits.push_back(last_chunk_size);
} }
auto dl = lens[tuned_axis] / info.num_outputs;
vec_splits.resize(info.num_outputs, dl);
} }
if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) != if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
......
...@@ -253,7 +253,7 @@ get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION) ...@@ -253,7 +253,7 @@ get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API) check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
# Beta API for automated GEMM tuning # Beta API for automated GEMM tuning
check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_BETA_FEATURES_API) check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_TUNING_BETA_FEATURE_API)
set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "") set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "")
...@@ -276,11 +276,11 @@ else() ...@@ -276,11 +276,11 @@ else()
message(STATUS "MIOpen does not have find mode api") message(STATUS "MIOpen does not have find mode api")
endif() endif()
if(HAS_ROCBLAS_BETA_FEATURES_API) if(HAS_ROCBLAS_TUNING_BETA_FEATURE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS)
message(STATUS "MIGraphx is using Beta API of rocBLAS") message(STATUS "MIGraphx is using Beta API of rocBLAS")
else() else()
message(STATUS "rocBLAS does not have Beta API") message(STATUS "rocBLAS does not have User Tuning Beta API")
endif() endif()
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas) target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
......
...@@ -22,24 +22,14 @@ ...@@ -22,24 +22,14 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
/**
* Contains a templated struct implementation that wraps several rocBLAS API calls
* used by the GEMM operator. These are accessed by methods declared in gemm_impl.hpp
*
*/
#include <rocblas/rocblas.h> #include <rocblas/rocblas.h>
#include <migraphx/gpu/gemm_impl.hpp> #include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp> #include <migraphx/time.hpp>
using microseconds = std::chrono::duration<double, std::micro>; using microseconds = std::chrono::duration<double, std::micro>;
#if ROCBLAS_VERSION_MAJOR > 2 or (ROCBLAS_VERSION_MAJOR == 2 and ROCBLAS_VERSION_MINOR >= 38)
using flag_type = rocblas_gemm_flags;
#else
using flag_type = int;
#endif
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
...@@ -230,7 +220,7 @@ struct gemm_impl ...@@ -230,7 +220,7 @@ struct gemm_impl
auto common_args = create_strided_batched_args_common(ctx, input_args); auto common_args = create_strided_batched_args_common(ctx, input_args);
rocblas_invoke(&rocblas_gemm_strided_batched_ex, rocblas_invoke(&rocblas_gemm_strided_batched_ex,
common_args, common_args,
rocblas_gemm_algo_standard, rocblas_gemm_algo_solution_index,
solution_idx, solution_idx,
gemm_flags); gemm_flags);
} }
...@@ -239,7 +229,7 @@ struct gemm_impl ...@@ -239,7 +229,7 @@ struct gemm_impl
auto common_args = create_gemm_ex_args_common(ctx, input_args); auto common_args = create_gemm_ex_args_common(ctx, input_args);
rocblas_invoke(&rocblas_gemm_ex, rocblas_invoke(&rocblas_gemm_ex,
common_args, common_args,
rocblas_gemm_algo_standard, rocblas_gemm_algo_solution_index,
solution_idx, solution_idx,
gemm_flags); gemm_flags);
} }
...@@ -450,9 +440,6 @@ struct gemm_impl ...@@ -450,9 +440,6 @@ struct gemm_impl
ctx.finish(); ctx.finish();
}); });
// todo: Measured time dropped from 20 us to about 6.7 us when I raised hot_calls from
// 1 to 11. The higher the hot_calls value, the faster per-call time up to at least 25,
// and increasing cold_calls makes little or no difference. Why?
host_time /= hot_calls; host_time /= hot_calls;
// dev/evaluation only: track time for first solution. // dev/evaluation only: track time for first solution.
...@@ -554,17 +541,16 @@ int32_t gemm_finalize(context& ctx, ...@@ -554,17 +541,16 @@ int32_t gemm_finalize(context& ctx,
if(solution_idx == 0) if(solution_idx == 0)
{ {
auto gemm_item = gemm_impl<float>(output_shape, input_shapes, alpha, beta, compute_fp32); auto gemm_item = gemm_impl<float>(output_shape, input_shapes, alpha, beta, compute_fp32);
solution_idx = gemm_item.tune(ctx, input_shapes); solution_idx = gemm_item.tune(ctx, input_shapes);
} }
else else
{ {
// If a tuned solution index is already given, don't tune again but validate // If a tuned solution index is already given, don't tune again but validate
// in case the data was tuned with a different rocBLAS version // in case the data was tuned with a different rocBLAS version
auto gemm_item = gemm_impl<float>(output_shape, input_shapes, alpha, beta, compute_fp32); auto gemm_item = gemm_impl<float>(output_shape, input_shapes, alpha, beta, compute_fp32);
solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx); solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx);
} }
#else #else
// suppress compiler warnings
(void)ctx, (void)output_shape, (void)input_shapes; (void)ctx, (void)output_shape, (void)input_shapes;
(void)alpha, (void)beta, (void)compute_fp32; (void)alpha, (void)beta, (void)compute_fp32;
#endif #endif
...@@ -584,23 +570,19 @@ int32_t gemm_finalize(context& ctx, ...@@ -584,23 +570,19 @@ int32_t gemm_finalize(context& ctx,
int32_t solution_idx) int32_t solution_idx)
{ {
#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API #ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API
// This code should be called only if either the environment var.
// MIGRAPHX_ENABLE_GEMM_TUNING, or option --exhaustive-tune, is set
if(solution_idx == 0) if(solution_idx == 0)
{ {
auto gemm_item = gemm_impl<int32_t>(output_shape, input_shapes, alpha, beta, compute_fp32); auto gemm_item = gemm_impl<int32_t>(output_shape, input_shapes, alpha, beta, compute_fp32);
solution_idx = gemm_item.tune(ctx, input_shapes); solution_idx = gemm_item.tune(ctx, input_shapes);
} }
else else
{ {
// If a tuned solution index is already given, don't tune again but validate // If a tuned solution index is already given, don't tune again but validate
// in case the data was tuned with a different rocBLAS version // in case the data was tuned with a different rocBLAS version
auto gemm_item = gemm_impl<int32_t>(output_shape, input_shapes, alpha, beta, compute_fp32); auto gemm_item = gemm_impl<int32_t>(output_shape, input_shapes, alpha, beta, compute_fp32);
solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx); solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx);
} }
#else #else
// suppress compiler warnings
(void)ctx, (void)output_shape, (void)input_shapes; (void)ctx, (void)output_shape, (void)input_shapes;
(void)alpha, (void)beta, (void)compute_fp32; (void)alpha, (void)beta, (void)compute_fp32;
#endif #endif
......
...@@ -27,11 +27,7 @@ ...@@ -27,11 +27,7 @@
#include <iterator> #include <iterator>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/time.hpp>
// Set this environment variable to "true" to perform GEMM tuning even when the // Set this environment variable to "true" to perform GEMM tuning even when the
// --exhaustive-tune option isn't set. Can be used to skip slow convolution tuning. // --exhaustive-tune option isn't set. Can be used to skip slow convolution tuning.
...@@ -44,12 +40,6 @@ namespace migraphx { ...@@ -44,12 +40,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
using flag_type = rocblas_gemm_flags;
#else
using flag_type = int;
#endif
/** /**
* @brief Templated implementations of the compute() and finalize() methods of the Gemm operator. * @brief Templated implementations of the compute() and finalize() methods of the Gemm operator.
* For each function there are overloads using either float or int32_t for the arguments * For each function there are overloads using either float or int32_t for the arguments
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP #define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/config.hpp> #include <migraphx/gpu/config.hpp>
// ROCBLAS_BETA_FEATURES_API is defined by CMake, if available.
#include <rocblas/rocblas.h> #include <rocblas/rocblas.h>
namespace migraphx { namespace migraphx {
......
...@@ -30,6 +30,9 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR) ...@@ -30,6 +30,9 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR}) add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR})
add_dependencies(tests ${NAME}) add_dependencies(tests ${NAME})
add_dependencies(check ${NAME}) add_dependencies(check ${NAME})
if(WIN32)
target_compile_definitions(${NAME} PRIVATE _CRT_SECURE_NO_WARNINGS)
endif()
endfunction() endfunction()
# Workaround: C file dont work with clang-tidy right now, need a fix in rocm-cmake # Workaround: C file dont work with clang-tidy right now, need a fix in rocm-cmake
...@@ -41,6 +44,9 @@ function(add_c_api_test TEST_NAME TEST_SRC TEST_DIR) ...@@ -41,6 +44,9 @@ function(add_c_api_test TEST_NAME TEST_SRC TEST_DIR)
add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR}) add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR})
add_dependencies(tests ${NAME}) add_dependencies(tests ${NAME})
add_dependencies(check ${NAME}) add_dependencies(check ${NAME})
if(WIN32)
target_compile_definitions(${NAME} PRIVATE _CRT_SECURE_NO_WARNINGS)
endif()
endfunction() endfunction()
add_api_test(array_base test_array_base.cpp ${TEST_ONNX_DIR}) add_api_test(array_base test_array_base.cpp ${TEST_ONNX_DIR})
...@@ -57,10 +63,6 @@ add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR}) ...@@ -57,10 +63,6 @@ add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR})
add_api_test(tf_parser test_tf_parser.cpp ${TEST_TF_DIR}) add_api_test(tf_parser test_tf_parser.cpp ${TEST_TF_DIR})
# GPU-based tests # GPU-based tests
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
find_package(hip)
add_api_test(gpu test_gpu.cpp ${TEST_ONNX_DIR}) add_api_test(gpu test_gpu.cpp ${TEST_ONNX_DIR})
target_link_libraries(test_api_gpu)
add_api_test(custom_op_gpu test_custom_op_gpu.cpp ${TEST_ONNX_DIR}) add_api_test(custom_op_gpu test_custom_op_gpu.cpp ${TEST_ONNX_DIR})
target_link_libraries(test_api_custom_op_gpu)
endif() endif()
...@@ -4883,9 +4883,9 @@ def mod_test_fmod_different_dtypes(): ...@@ -4883,9 +4883,9 @@ def mod_test_fmod_different_dtypes():
@onnx_test() @onnx_test()
def multinomial_test(): def multinomial_test():
sample_size = 10 sample_size = 13
seed = 0.0 seed = 0.
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10]) input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32, output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10]) [1, 10])
...@@ -4898,6 +4898,44 @@ def multinomial_test(): ...@@ -4898,6 +4898,44 @@ def multinomial_test():
return ([node], [input], [output]) return ([node], [input], [output])
@onnx_test()
def multinomial_dyn_test():
sample_size = 100000
seed = 1.3
categories = 5
input = helper.make_tensor_value_info("input", TensorProto.FLOAT,
[None, categories])
output = helper.make_tensor_value_info("output", TensorProto.FLOAT,
[None, categories])
node = onnx.helper.make_node(
'Multinomial',
inputs=['input'],
sample_size=sample_size,
dtype=1, # shape::float_type
seed=seed,
outputs=['output'])
return ([node], [input], [output])
@onnx_test()
def multinomial_autoseed_dyn_test():
# If seed attribute is not given, device should auto generate one at runtime
sample_size = 12
input = helper.make_tensor_value_info("input", TensorProto.FLOAT,
[None, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[None, 10])
node = onnx.helper.make_node('Multinomial',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test() @onnx_test()
def multinomial_generated_seed_test(): def multinomial_generated_seed_test():
sample_size = 10 sample_size = 10
...@@ -8042,6 +8080,42 @@ def split_test_no_attribute(): ...@@ -8042,6 +8080,42 @@ def split_test_no_attribute():
return ([const_node, node], [x], [y1, y2, y3, y4]) return ([const_node, node], [x], [y1, y2, y3, y4])
@onnx_test()
def split_test_uneven():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [12, 15])
y1 = helper.make_tensor_value_info('y1', TensorProto.FLOAT, [3, 15])
y2 = helper.make_tensor_value_info('y2', TensorProto.FLOAT, [3, 15])
y3 = helper.make_tensor_value_info('y3', TensorProto.FLOAT, [3, 15])
y4 = helper.make_tensor_value_info('y4', TensorProto.FLOAT, [3, 15])
y5 = helper.make_tensor_value_info('y5', TensorProto.FLOAT, [0, 15])
node = onnx.helper.make_node(
'Split',
inputs=['x'],
outputs=['y1', 'y2', 'y3', 'y4', 'y5'],
)
return ([node], [x], [y1, y2, y3, y4, y5])
@onnx_test()
def split_test_uneven_num_outputs():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [11, 15])
y1 = helper.make_tensor_value_info('y1', TensorProto.FLOAT, [3, 15])
y2 = helper.make_tensor_value_info('y2', TensorProto.FLOAT, [3, 15])
y3 = helper.make_tensor_value_info('y3', TensorProto.FLOAT, [3, 15])
y4 = helper.make_tensor_value_info('y4', TensorProto.FLOAT, [2, 15])
node = onnx.helper.make_node(
'Split',
inputs=['x'],
outputs=['y1', 'y2', 'y3', 'y4'],
num_outputs=4,
)
return ([node], [x], [y1, y2, y3, y4])
@onnx_test() @onnx_test()
def split_test_no_attribute_invalid_split(): def split_test_no_attribute_invalid_split():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [300, 15]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [300, 15])
...@@ -8101,6 +8175,24 @@ def split_test_no_attribute_invalid_input_split(): ...@@ -8101,6 +8175,24 @@ def split_test_no_attribute_invalid_input_split():
return ([node], [x], [y1, y2, y3]) return ([node], [x], [y1, y2, y3])
@onnx_test()
def split_test_invalid_num_outputs():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [11, 15])
y1 = helper.make_tensor_value_info('y1', TensorProto.FLOAT, [3, 15])
y2 = helper.make_tensor_value_info('y2', TensorProto.FLOAT, [3, 15])
y3 = helper.make_tensor_value_info('y3', TensorProto.FLOAT, [3, 15])
y4 = helper.make_tensor_value_info('y4', TensorProto.FLOAT, [2, 15])
node = onnx.helper.make_node(
'Split',
inputs=['x'],
outputs=['y1', 'y2', 'y3', 'y4'],
num_outputs=5,
)
return ([node], [x], [y1, y2, y3, y4])
@onnx_test() @onnx_test()
def sqrt_test(): def sqrt_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15])
......
...@@ -4679,32 +4679,140 @@ TEST_CASE(multinomial_test) ...@@ -4679,32 +4679,140 @@ TEST_CASE(multinomial_test)
{ {
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
size_t sample_size = 10; size_t sample_size = 13;
float seed = 0.0f; size_t batch_size = 3;
size_t categories = 10;
float seed = 0;
auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); auto input = mm->add_parameter(
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); "input", migraphx::shape{migraphx::shape::float_type, {batch_size, categories}});
auto mb_maxes = auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); auto mb_maxes = mm->add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", {batch_size, 10}}}), maxes);
auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes); auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes);
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction( cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
std::mt19937 gen(seed); migraphx::shape s{migraphx::shape::float_type, {1}};
std::uniform_real_distribution<> dis(0.0, 1.0); std::vector<float> seed_data = {seed};
std::vector<float> rand_samples(sample_size); auto seed_input = mm->add_literal(migraphx::literal(s, seed_data));
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); }); auto rand_dummy =
migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}}; mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples});
mm->add_instruction(migraphx::make_op("multinomial"), cdf, rs_lit);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms);
auto prog = optimize_onnx("multinomial_test.onnx"); auto prog = optimize_onnx("multinomial_test.onnx");
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(multinomial_dyn_test)
{
// compile-time random seed
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 100000;
size_t categories = 5;
float seed = 1.3f;
auto input = mm->add_parameter(
"input",
migraphx::shape{migraphx::shape::float_type, {{1, categories}, {categories, categories}}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
migraphx::shape s{migraphx::shape::float_type, {1}};
std::vector<float> seed_data = {seed};
auto seed_input = mm->add_literal(migraphx::literal(s, seed_data));
// dynamic input only: must calculate alloc_shape as (batch_size, sample_size)
// read the runtime input dimensions
auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input);
// make an argument of (1, 0)
migraphx::shape lit_shape(migraphx::shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = mm->add_literal(lit_shape, data1);
auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = mm->add_literal(lit_shape, data2);
auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2);
migraphx::shape compile_shape =
migraphx::shape(migraphx::shape::float_type,
{input->get_shape().dyn_dims().front(), {sample_size, sample_size}});
auto alloc = mm->add_instruction(
migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
auto ret = mm->add_instruction(
migraphx::make_op("multinomial", {{"dtype", migraphx::shape::float_type}}), cdf, randoms);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, categories};
options.print_program_on_error = true;
auto prog = migraphx::parse_onnx("multinomial_dyn_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(multinomial_autoseed_dyn_test)
{
// runtime random seed
migraphx::program p;
auto* mm = p.get_main_module();
size_t sample_size = 12;
size_t categories = 10;
auto input = mm->add_parameter(
"input", migraphx::shape{migraphx::shape::float_type, {{1, 10}, {10, 10}}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
auto seed_input = mm->add_instruction(migraphx::make_op("random_seed"));
// dynamic input only: must calculate alloc_shape as (batch_size, sample_size)
// read the runtime input dimensions
auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input);
// make an argument of (1, 0)
migraphx::shape lit_shape(migraphx::shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = mm->add_literal(lit_shape, data1);
auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = mm->add_literal(lit_shape, data2);
auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2);
migraphx::shape compile_shape =
migraphx::shape(migraphx::shape::float_type,
{input->get_shape().dyn_dims().front(), {sample_size, sample_size}});
auto alloc = mm->add_instruction(
migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
auto ret = mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, categories};
options.print_program_on_error = true;
auto prog = migraphx::parse_onnx("multinomial_autoseed_dyn_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(multinomial_dtype_error_test) TEST_CASE(multinomial_dtype_error_test)
{ {
EXPECT(test::throws([&] { migraphx::parse_onnx("multinomial_dtype_error_test.onnx"); })); EXPECT(test::throws([&] { migraphx::parse_onnx("multinomial_dtype_error_test.onnx"); }));
...@@ -4712,10 +4820,11 @@ TEST_CASE(multinomial_dtype_error_test) ...@@ -4712,10 +4820,11 @@ TEST_CASE(multinomial_dtype_error_test)
TEST_CASE(multinomial_generated_seed_test) TEST_CASE(multinomial_generated_seed_test)
{ {
// multinomial op. no longer generates its own randoms
auto p1 = optimize_onnx("multinomial_generated_seed_test.onnx"); auto p1 = optimize_onnx("multinomial_generated_seed_test.onnx");
auto p2 = optimize_onnx("multinomial_generated_seed_test.onnx"); auto p2 = optimize_onnx("multinomial_generated_seed_test.onnx");
EXPECT(p1 != p2); EXPECT(p1 == p2);
} }
TEST_CASE(multinomial_int64_test) TEST_CASE(multinomial_int64_test)
...@@ -4723,27 +4832,27 @@ TEST_CASE(multinomial_int64_test) ...@@ -4723,27 +4832,27 @@ TEST_CASE(multinomial_int64_test)
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
size_t sample_size = 10; size_t sample_size = 10;
float seed = 1.0f; float seed = 1.0;
uint32_t batch_size = 1;
migraphx::shape::type_t dtype = migraphx::shape::type_t::int64_type; migraphx::shape::type_t dtype = migraphx::shape::type_t::int64_type;
auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}});
auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input);
auto mb_maxes =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes});
auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes);
cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf);
cdf = mm->add_instruction( cdf = mm->add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
std::mt19937 gen(seed); migraphx::shape s{migraphx::shape::float_type, {1}};
std::uniform_real_distribution<> dis(0.0, 1.0); std::vector<float> data = {seed};
std::vector<float> rand_samples(sample_size); auto seed_input = mm->add_literal(migraphx::literal(s, data));
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); });
migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}};
auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples});
mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, rs_lit);
// static size
auto rand_dummy =
mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});
auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, randoms);
auto prog = optimize_onnx("multinomial_int64_test.onnx"); auto prog = optimize_onnx("multinomial_int64_test.onnx");
EXPECT(p == prog); EXPECT(p == prog);
...@@ -7671,6 +7780,46 @@ TEST_CASE(split_test_default) ...@@ -7671,6 +7780,46 @@ TEST_CASE(split_test_default)
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(split_test_uneven)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto input = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {12, 15}});
auto r1 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {3}}}), input);
auto r2 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {3}}, {"ends", {6}}}), input);
auto r3 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {6}}, {"ends", {9}}}), input);
auto r4 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {9}}, {"ends", {12}}}), input);
auto r5 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {12}}, {"ends", {12}}}), input);
mm->add_return({r1, r2, r3, r4, r5});
auto prog = migraphx::parse_onnx("split_test_uneven.onnx");
EXPECT(p == prog);
}
TEST_CASE(split_test_uneven_num_outputs)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto input = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {11, 15}});
auto r1 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {3}}}), input);
auto r2 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {3}}, {"ends", {6}}}), input);
auto r3 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {6}}, {"ends", {9}}}), input);
auto r4 = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {9}}, {"ends", {11}}}), input);
mm->add_return({r1, r2, r3, r4});
auto prog = migraphx::parse_onnx("split_test_uneven_num_outputs.onnx");
EXPECT(p == prog);
}
TEST_CASE(split_test_no_attribute_invalid_split) TEST_CASE(split_test_no_attribute_invalid_split)
{ {
EXPECT( EXPECT(
...@@ -7688,6 +7837,11 @@ TEST_CASE(split_test_no_attribute_invalid_input_split) ...@@ -7688,6 +7837,11 @@ TEST_CASE(split_test_no_attribute_invalid_input_split)
[&] { migraphx::parse_onnx("split_test_no_attribute_invalid_input_split.onnx"); })); [&] { migraphx::parse_onnx("split_test_no_attribute_invalid_input_split.onnx"); }));
} }
TEST_CASE(split_test_invalid_num_outputs)
{
EXPECT(test::throws([&] { migraphx::parse_onnx("split_test_invalid_num_outputs.onnx"); }));
}
TEST_CASE(sqrt_test) TEST_CASE(sqrt_test)
{ {
migraphx::program p; migraphx::program p;
......
 split_test_invalid_num_outputs:
.
xy1y2y3y4"Split*
num_outputssplit_test_invalid_num_outputsZ
x


b
y1


b
y2


b
y3


b
y4


B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment